summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bzrignore54
-rw-r--r--.tree-is-private0
-rwxr-xr-xBUILD/SETUP.sh7
-rwxr-xr-xBUILD/compile-amd64-gprof-no-ndb7
-rwxr-xr-xBUILD/compile-dist1
-rwxr-xr-xBitKeeper/triggers/post-commit4
-rwxr-xr-xBitKeeper/triggers/pre-delta15
-rw-r--r--Makefile.am2
-rw-r--r--client/mysqladmin.cc4
-rw-r--r--client/mysqltest.c59
-rw-r--r--configure.in17
-rw-r--r--dbug/dbug.c32
-rwxr-xr-xdbug/dbug_add_tags.pl4
-rw-r--r--include/Makefile.am6
-rw-r--r--include/atomic/nolock.h29
-rw-r--r--include/atomic/rwlock.h23
-rw-r--r--include/atomic/x86-gcc.h31
-rw-r--r--include/atomic/x86-msvc.h12
-rw-r--r--include/ft_global.h11
-rw-r--r--include/keycache.h3
-rw-r--r--include/lf.h260
-rw-r--r--include/m_string.h7
-rw-r--r--include/maria.h448
-rw-r--r--include/my_atomic.h209
-rw-r--r--include/my_base.h78
-rw-r--r--include/my_bit.h107
-rw-r--r--include/my_dbug.h2
-rw-r--r--include/my_global.h17
-rw-r--r--include/my_handler.h62
-rw-r--r--include/my_pthread.h19
-rw-r--r--include/my_sys.h53
-rw-r--r--include/myisam.h274
-rw-r--r--include/myisamchk.h166
-rw-r--r--include/mysql_com.h12
-rw-r--r--include/wqueue.h26
-rwxr-xr-xlibmysql/CMakeLists.txt4
-rw-r--r--libmysql/Makefile.shared4
-rw-r--r--mysql-test/extra/rpl_tests/rpl_flsh_tbls.test4
-rw-r--r--mysql-test/extra/rpl_tests/rpl_insert_delayed.test2
-rw-r--r--mysql-test/include/have_maria.inc4
-rw-r--r--mysql-test/include/maria_empty_logs.inc33
-rw-r--r--mysql-test/include/maria_make_snapshot.inc48
-rw-r--r--mysql-test/include/maria_make_snapshot_for_comparison.inc30
-rw-r--r--mysql-test/include/maria_make_snapshot_for_feeding_recovery.inc35
-rw-r--r--mysql-test/include/maria_verify_recovery.inc96
-rw-r--r--mysql-test/include/ps_conv.inc2
-rw-r--r--mysql-test/include/wait_until_connected_again.inc6
-rw-r--r--mysql-test/lib/mtr_process.pl40
-rw-r--r--mysql-test/lib/mtr_report.pl2
-rwxr-xr-xmysql-test/mysql-test-run.pl63
-rw-r--r--mysql-test/r/alter_table.result2
-rw-r--r--mysql-test/r/binlog_unsafe.result1
-rw-r--r--mysql-test/r/create.result4
-rw-r--r--mysql-test/r/have_maria.require2
-rw-r--r--mysql-test/r/innodb.result14
-rw-r--r--mysql-test/r/maria-big.result63
-rw-r--r--mysql-test/r/maria-connect.result24
-rw-r--r--mysql-test/r/maria-purge.result92
-rw-r--r--mysql-test/r/maria-recovery-bitmap.result29
-rw-r--r--mysql-test/r/maria-recovery.result219
-rw-r--r--mysql-test/r/maria.result2081
-rw-r--r--mysql-test/r/merge.result2
-rw-r--r--mysql-test/r/mix2_myisam.result28
-rw-r--r--mysql-test/r/myisam.result12
-rw-r--r--mysql-test/r/mysqldump.result6
-rw-r--r--mysql-test/r/old-mode.result14
-rw-r--r--mysql-test/r/ps_2myisam.result2
-rw-r--r--mysql-test/r/ps_3innodb.result2
-rw-r--r--mysql-test/r/ps_4heap.result2
-rw-r--r--mysql-test/r/ps_5merge.result4
-rw-r--r--mysql-test/r/ps_maria.result3146
-rw-r--r--mysql-test/r/query_cache.result6
-rw-r--r--mysql-test/r/subselect.result6
-rw-r--r--mysql-test/suite/ndb/r/ps_7ndb.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_insert.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result8
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_insert_delayed.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_stm_flsh_tbls.result8
-rw-r--r--mysql-test/suite/rpl/r/rpl_stm_insert_delayed.result4
-rw-r--r--mysql-test/suite/rpl/r/rpl_switch_stm_row_mixed.result6
-rw-r--r--mysql-test/suite/rpl/t/rpl_innodb_bug28430.test1
-rw-r--r--mysql-test/suite/rpl/t/rpl_insert.test2
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test2
-rw-r--r--mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test2
-rw-r--r--mysql-test/suite/rpl/t/rpl_switch_stm_row_mixed.test2
-rw-r--r--mysql-test/t/alter_table.test2
-rw-r--r--mysql-test/t/binlog_unsafe.test3
-rw-r--r--mysql-test/t/delayed.test9
-rw-r--r--mysql-test/t/disabled.def1
-rw-r--r--mysql-test/t/events_logs_tests.test2
-rw-r--r--mysql-test/t/maria-big.test28
-rw-r--r--mysql-test/t/maria-connect.test42
-rw-r--r--mysql-test/t/maria-purge.test104
-rw-r--r--mysql-test/t/maria-recovery-bitmap-master.opt2
-rw-r--r--mysql-test/t/maria-recovery-bitmap.test79
-rw-r--r--mysql-test/t/maria-recovery-master.opt2
-rw-r--r--mysql-test/t/maria-recovery.test187
-rw-r--r--mysql-test/t/maria.test1334
-rw-r--r--mysql-test/t/merge.test14
-rw-r--r--mysql-test/t/myisam.test2
-rw-r--r--mysql-test/t/mysqldump.test6
-rw-r--r--mysql-test/t/old-mode-master.opt1
-rw-r--r--mysql-test/t/old-mode.test16
-rw-r--r--mysql-test/t/ps_maria.test47
-rw-r--r--mysql-test/t/query_cache.test6
-rw-r--r--mysql-test/t/query_cache_merge.test2
-rw-r--r--mysql-test/t/subselect.test6
-rwxr-xr-xmysys/CMakeLists.txt4
-rw-r--r--mysys/Makefile.am11
-rw-r--r--mysys/array.c139
-rw-r--r--mysys/checksum.c12
-rw-r--r--mysys/lf_alloc-pin.c529
-rw-r--r--mysys/lf_dynarray.c208
-rw-r--r--mysys/lf_hash.c493
-rw-r--r--mysys/mf_iocache.c4
-rw-r--r--mysys/mf_keycache.c37
-rw-r--r--mysys/mf_keycaches.c277
-rw-r--r--mysys/mf_tempfile.c1
-rw-r--r--mysys/my_atomic.c9
-rw-r--r--mysys/my_bit.c100
-rw-r--r--mysys/my_bitmap.c1
-rw-r--r--mysys/my_compress.c5
-rw-r--r--mysys/my_create.c7
-rw-r--r--mysys/my_delete.c3
-rw-r--r--mysys/my_error.c9
-rw-r--r--mysys/my_fopen.c2
-rw-r--r--mysys/my_getopt.c31
-rw-r--r--mysys/my_handler.c136
-rw-r--r--mysys/my_init.c13
-rw-r--r--mysys/my_lock.c14
-rw-r--r--mysys/my_open.c74
-rw-r--r--mysys/my_pread.c13
-rw-r--r--mysys/my_read.c8
-rw-r--r--mysys/my_realloc.c12
-rw-r--r--mysys/my_rename.c17
-rw-r--r--mysys/my_rnd.c55
-rw-r--r--mysys/my_safehash.c297
-rw-r--r--mysys/my_safehash.h58
-rw-r--r--mysys/my_seek.c4
-rw-r--r--mysys/my_static.c2
-rw-r--r--mysys/my_symlink.c2
-rw-r--r--mysys/my_sync.c82
-rw-r--r--mysys/my_thr_init.c21
-rw-r--r--mysys/my_uuid.c178
-rw-r--r--mysys/my_write.c2
-rw-r--r--mysys/safemalloc.c22
-rw-r--r--mysys/thr_lock.c20
-rw-r--r--mysys/thr_mutex.c74
-rw-r--r--mysys/wqueue.c169
-rw-r--r--plugin/daemon_example/daemon_example.cc4
-rwxr-xr-xserver-tools/instance-manager/CMakeLists.txt2
-rw-r--r--server-tools/instance-manager/listener.cc6
-rw-r--r--server-tools/instance-manager/mysql_connection.cc9
-rw-r--r--server-tools/instance-manager/mysql_connection.h3
-rw-r--r--sql-bench/example9
-rw-r--r--sql/field.h6
-rw-r--r--sql/filesort.cc10
-rw-r--r--sql/gen_lex_hash.cc4
-rw-r--r--sql/ha_partition.cc20
-rw-r--r--sql/ha_partition.h2
-rw-r--r--sql/handler.cc15
-rw-r--r--sql/handler.h26
-rw-r--r--sql/item.h12
-rw-r--r--sql/item_func.cc5
-rw-r--r--sql/item_func.h3
-rw-r--r--sql/item_strfunc.cc8
-rw-r--r--sql/lex.h6
-rw-r--r--sql/lock.cc8
-rw-r--r--sql/log.cc5
-rw-r--r--sql/log_event_old.h12
-rw-r--r--sql/my_lock.c45
-rw-r--r--sql/mysql_priv.h10
-rw-r--r--sql/mysqld.cc137
-rw-r--r--sql/opt_range.cc4
-rw-r--r--sql/password.c42
-rw-r--r--sql/set_var.cc23
-rw-r--r--sql/set_var.h4
-rw-r--r--sql/sql_class.cc3
-rw-r--r--sql/sql_class.h75
-rw-r--r--sql/sql_crypt.cc2
-rw-r--r--sql/sql_crypt.h2
-rw-r--r--sql/sql_delete.cc20
-rw-r--r--sql/sql_insert.cc16
-rw-r--r--sql/sql_parse.cc7
-rw-r--r--sql/sql_plugin.cc3
-rw-r--r--sql/sql_select.cc313
-rw-r--r--sql/sql_select.h2
-rw-r--r--sql/sql_show.cc272
-rw-r--r--sql/sql_sort.h12
-rw-r--r--sql/sql_table.cc25
-rw-r--r--sql/sql_test.cc4
-rw-r--r--sql/sql_union.cc4
-rw-r--r--sql/sql_update.cc2
-rw-r--r--sql/sql_yacc.yy29
-rw-r--r--sql/table.cc12
-rw-r--r--sql/table.h1
-rw-r--r--sql/unireg.cc8
-rw-r--r--storage/csv/ha_tina.cc12
-rw-r--r--storage/csv/ha_tina.h4
-rw-r--r--storage/maria/CMakeLists.txt64
-rw-r--r--storage/maria/Makefile.am185
-rw-r--r--storage/maria/ft_maria.c48
-rw-r--r--storage/maria/ha_maria.cc2746
-rw-r--r--storage/maria/ha_maria.h155
-rw-r--r--storage/maria/lockman.c786
-rw-r--r--storage/maria/lockman.h76
-rw-r--r--storage/maria/ma_bitmap.c2510
-rw-r--r--storage/maria/ma_blockrec.c6100
-rw-r--r--storage/maria/ma_blockrec.h253
-rw-r--r--storage/maria/ma_cache.c107
-rw-r--r--storage/maria/ma_changed.c33
-rw-r--r--storage/maria/ma_check.c5959
-rw-r--r--storage/maria/ma_check_standalone.h106
-rw-r--r--storage/maria/ma_checkpoint.c1196
-rw-r--r--storage/maria/ma_checkpoint.h92
-rw-r--r--storage/maria/ma_checksum.c89
-rw-r--r--storage/maria/ma_close.c160
-rw-r--r--storage/maria/ma_commit.c141
-rw-r--r--storage/maria/ma_commit.h18
-rw-r--r--storage/maria/ma_control_file.c509
-rw-r--r--storage/maria/ma_control_file.h90
-rw-r--r--storage/maria/ma_create.c1344
-rw-r--r--storage/maria/ma_dbug.c193
-rw-r--r--storage/maria/ma_delete.c1421
-rw-r--r--storage/maria/ma_delete_all.c158
-rw-r--r--storage/maria/ma_delete_table.c111
-rw-r--r--storage/maria/ma_dynrec.c1976
-rw-r--r--storage/maria/ma_extra.c578
-rw-r--r--storage/maria/ma_ft_boolean_search.c975
-rw-r--r--storage/maria/ma_ft_eval.c254
-rw-r--r--storage/maria/ma_ft_eval.h41
-rw-r--r--storage/maria/ma_ft_nlq_search.c375
-rw-r--r--storage/maria/ma_ft_parser.c426
-rw-r--r--storage/maria/ma_ft_stem.c18
-rw-r--r--storage/maria/ma_ft_test1.c317
-rw-r--r--storage/maria/ma_ft_test1.h420
-rw-r--r--storage/maria/ma_ft_update.c357
-rw-r--r--storage/maria/ma_ftdefs.h152
-rw-r--r--storage/maria/ma_fulltext.h27
-rw-r--r--storage/maria/ma_info.c141
-rw-r--r--storage/maria/ma_init.c69
-rw-r--r--storage/maria/ma_key.c572
-rw-r--r--storage/maria/ma_key_recover.c1071
-rw-r--r--storage/maria/ma_key_recover.h103
-rw-r--r--storage/maria/ma_keycache.c164
-rw-r--r--storage/maria/ma_locking.c582
-rw-r--r--storage/maria/ma_loghandler.c7637
-rw-r--r--storage/maria/ma_loghandler.h437
-rw-r--r--storage/maria/ma_loghandler_lsn.h105
-rw-r--r--storage/maria/ma_open.c1731
-rw-r--r--storage/maria/ma_packrec.c1722
-rw-r--r--storage/maria/ma_page.c341
-rwxr-xr-xstorage/maria/ma_pagecache.c4517
-rw-r--r--storage/maria/ma_pagecache.h307
-rw-r--r--storage/maria/ma_pagecaches.c105
-rw-r--r--storage/maria/ma_pagecrc.c302
-rw-r--r--storage/maria/ma_panic.c147
-rw-r--r--storage/maria/ma_preload.c104
-rw-r--r--storage/maria/ma_range.c297
-rw-r--r--storage/maria/ma_recovery.c3159
-rw-r--r--storage/maria/ma_recovery.h34
-rw-r--r--storage/maria/ma_rename.c139
-rw-r--r--storage/maria/ma_rfirst.c26
-rw-r--r--storage/maria/ma_rkey.c199
-rw-r--r--storage/maria/ma_rlast.c26
-rw-r--r--storage/maria/ma_rnext.c122
-rw-r--r--storage/maria/ma_rnext_same.c107
-rw-r--r--storage/maria/ma_rprev.c88
-rw-r--r--storage/maria/ma_rrnd.c44
-rw-r--r--storage/maria/ma_rsame.c69
-rw-r--r--storage/maria/ma_rsamepos.c58
-rw-r--r--storage/maria/ma_rt_index.c1196
-rw-r--r--storage/maria/ma_rt_index.h49
-rw-r--r--storage/maria/ma_rt_key.c113
-rw-r--r--storage/maria/ma_rt_key.h32
-rw-r--r--storage/maria/ma_rt_mbr.c807
-rw-r--r--storage/maria/ma_rt_mbr.h38
-rw-r--r--storage/maria/ma_rt_split.c374
-rw-r--r--storage/maria/ma_rt_test.c475
-rw-r--r--storage/maria/ma_scan.c73
-rw-r--r--storage/maria/ma_search.c2032
-rw-r--r--storage/maria/ma_sort.c1059
-rw-r--r--storage/maria/ma_sp_defs.h47
-rw-r--r--storage/maria/ma_sp_key.c286
-rw-r--r--storage/maria/ma_sp_test.c568
-rw-r--r--storage/maria/ma_static.c82
-rw-r--r--storage/maria/ma_statrec.c290
-rw-r--r--storage/maria/ma_test1.c885
-rw-r--r--storage/maria/ma_test2.c1197
-rw-r--r--storage/maria/ma_test3.c501
-rw-r--r--storage/maria/ma_test_all.res14
-rwxr-xr-xstorage/maria/ma_test_all.sh7
-rwxr-xr-xstorage/maria/ma_test_recovery213
-rw-r--r--storage/maria/ma_test_recovery.expected942
-rw-r--r--storage/maria/ma_unique.c235
-rw-r--r--storage/maria/ma_update.c241
-rw-r--r--storage/maria/ma_write.c2115
-rw-r--r--storage/maria/maria_chk.c1808
-rw-r--r--storage/maria/maria_def.h1095
-rw-r--r--storage/maria/maria_ftdump.c279
-rw-r--r--storage/maria/maria_pack.c3230
-rw-r--r--storage/maria/maria_read_log.c268
-rwxr-xr-xstorage/maria/maria_rename.sh17
-rw-r--r--storage/maria/plug.in8
-rw-r--r--storage/maria/tablockman.c676
-rw-r--r--storage/maria/tablockman.h87
-rwxr-xr-xstorage/maria/test_pack10
-rw-r--r--storage/maria/trnman.c746
-rw-r--r--storage/maria/trnman.h59
-rw-r--r--storage/maria/trnman_public.h62
-rw-r--r--storage/maria/unittest/Makefile.am102
-rw-r--r--storage/maria/unittest/lockman-t.c309
-rw-r--r--storage/maria/unittest/lockman1-t.c335
-rw-r--r--storage/maria/unittest/lockman2-t.c362
-rw-r--r--storage/maria/unittest/ma_control_file-t.c583
-rw-r--r--storage/maria/unittest/ma_loghandler_examples.c52
-rw-r--r--storage/maria/unittest/ma_maria_log_cleanup.c49
-rw-r--r--storage/maria/unittest/ma_pagecache_consist.c485
-rw-r--r--storage/maria/unittest/ma_pagecache_single.c635
-rw-r--r--storage/maria/unittest/ma_test_loghandler-t.c627
-rw-r--r--storage/maria/unittest/ma_test_loghandler_first_lsn-t.c147
-rw-r--r--storage/maria/unittest/ma_test_loghandler_max_lsn-t.c140
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multigroup-t.c650
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multithread-t.c479
-rw-r--r--storage/maria/unittest/ma_test_loghandler_noflush-t.c132
-rw-r--r--storage/maria/unittest/ma_test_loghandler_nologs-t.c179
-rw-r--r--storage/maria/unittest/ma_test_loghandler_pagecache-t.c186
-rw-r--r--storage/maria/unittest/ma_test_loghandler_purge-t.c176
-rw-r--r--storage/maria/unittest/test_file.c77
-rw-r--r--storage/maria/unittest/test_file.h14
-rw-r--r--storage/maria/unittest/trnman-t.c195
-rw-r--r--storage/myisam/Makefile.am4
-rw-r--r--storage/myisam/ft_boolean_search.c11
-rw-r--r--storage/myisam/ft_eval.c2
-rw-r--r--storage/myisam/ft_myisam.c36
-rw-r--r--storage/myisam/ft_nlq_search.c2
-rw-r--r--storage/myisam/ft_parser.c2
-rw-r--r--storage/myisam/ft_static.c14
-rw-r--r--storage/myisam/ft_stopwords.c19
-rw-r--r--storage/myisam/ft_test1.c4
-rw-r--r--storage/myisam/ft_update.c4
-rw-r--r--storage/myisam/fulltext.h10
-rw-r--r--storage/myisam/ha_myisam.cc77
-rw-r--r--storage/myisam/ha_myisam.h10
-rw-r--r--storage/myisam/mi_cache.c4
-rw-r--r--storage/myisam/mi_check.c201
-rw-r--r--storage/myisam/mi_checksum.c27
-rw-r--r--storage/myisam/mi_close.c1
-rw-r--r--storage/myisam/mi_create.c52
-rw-r--r--storage/myisam/mi_dbug.c3
-rw-r--r--storage/myisam/mi_delete.c14
-rw-r--r--storage/myisam/mi_dynrec.c20
-rw-r--r--storage/myisam/mi_extra.c9
-rw-r--r--storage/myisam/mi_key.c2
-rw-r--r--storage/myisam/mi_locking.c56
-rw-r--r--storage/myisam/mi_log.c2
-rw-r--r--storage/myisam/mi_open.c36
-rw-r--r--storage/myisam/mi_packrec.c7
-rw-r--r--storage/myisam/mi_range.c2
-rw-r--r--storage/myisam/mi_rkey.c2
-rw-r--r--storage/myisam/mi_search.c10
-rw-r--r--storage/myisam/mi_test1.c8
-rw-r--r--storage/myisam/mi_test2.c12
-rw-r--r--storage/myisam/mi_unique.c2
-rw-r--r--storage/myisam/mi_update.c2
-rw-r--r--storage/myisam/mi_write.c12
-rw-r--r--storage/myisam/myisamchk.c36
-rw-r--r--storage/myisam/myisamdef.h866
-rw-r--r--storage/myisam/myisamlog.c2
-rw-r--r--storage/myisam/myisampack.c8
-rw-r--r--storage/myisam/plug.in12
-rw-r--r--storage/myisam/rt_index.c4
-rw-r--r--storage/myisam/sort.c14
-rw-r--r--storage/myisam/sp_test.c2
-rw-r--r--storage/myisammrg/ha_myisammrg.cc9
-rw-r--r--storage/myisammrg/ha_myisammrg.h4
-rw-r--r--strings/llstr.c1
-rw-r--r--support-files/compiler_warnings.supp5
-rw-r--r--support-files/magic15
-rw-r--r--unittest/Makefile.am2
-rw-r--r--unittest/mysys/Makefile.am8
-rw-r--r--unittest/mysys/my_atomic-t.c261
-rw-r--r--unittest/mytap/tap.c8
-rw-r--r--unittest/mytap/tap.h43
-rw-r--r--unittest/unit.pl12
385 files changed, 97324 insertions, 2442 deletions
diff --git a/.bzrignore b/.bzrignore
index 6e09cdd7b5d..fcafdf8915a 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -1,6 +1,7 @@
*-t
*.Plo
*.Po
+*.Tpo
*.a
*.bb
*.bbg
@@ -1079,6 +1080,7 @@ libmysqld/ha_innobase.cc
libmysqld/ha_innodb.cc
libmysqld/ha_isam.cc
libmysqld/ha_isammrg.cc
+libmysqld/ha_maria.cc
libmysqld/ha_myisam.cc
libmysqld/ha_myisammrg.cc
libmysqld/ha_ndbcluster.cc
@@ -1220,6 +1222,7 @@ linked_tools_sources
locked
ltmain.sh
man/*.1
+maria_log_control
merge/*.ds?
merge/*.vcproj
missing
@@ -2429,6 +2432,45 @@ storage/innobase/ut/.deps/ut0rnd.Po
storage/innobase/ut/.deps/ut0ut.Po
storage/innobase/ut/.deps/ut0vec.Po
storage/innobase/ut/.deps/ut0wqueue.Po
+storage/maria/*.MAD
+storage/maria/*.MAI
+storage/maria/ma_rt_test
+storage/maria/ma_sp_test
+storage/maria/ma_test1
+storage/maria/ma_test2
+storage/maria/ma_test3
+storage/maria/ma_test_all
+storage/maria/maria.log
+storage/maria/maria_chk
+storage/maria/maria_control
+storage/maria/maria_ftdump
+storage/maria/maria_log
+storage/maria/maria_log.*
+storage/maria/maria_pack
+storage/maria/maria_read_log
+storage/maria/tmp/*
+storage/maria/unittest/ma_pagecache_consist_1k-t-big
+storage/maria/unittest/ma_pagecache_consist_1kHC-t-big
+storage/maria/unittest/ma_pagecache_consist_1kRD-t-big
+storage/maria/unittest/ma_pagecache_consist_1kWR-t-big
+storage/maria/unittest/ma_pagecache_consist_64k-t-big
+storage/maria/unittest/ma_pagecache_consist_64kHC-t-big
+storage/maria/unittest/ma_pagecache_consist_64kRD-t-big
+storage/maria/unittest/ma_pagecache_consist_64kWR-t-big
+storage/maria/unittest/ma_pagecache_single_64k-t-big
+storage/maria/unittest/ma_test_loghandler_long-t-big
+storage/maria/unittest/maria_control
+storage/maria/unittest/mf_pagecache_consist_1k-t-big
+storage/maria/unittest/mf_pagecache_consist_1kHC-t-big
+storage/maria/unittest/mf_pagecache_consist_1kRD-t-big
+storage/maria/unittest/mf_pagecache_consist_1kWR-t-big
+storage/maria/unittest/mf_pagecache_consist_64k-t-big
+storage/maria/unittest/mf_pagecache_consist_64kHC-t-big
+storage/maria/unittest/mf_pagecache_consist_64kRD-t-big
+storage/maria/unittest/mf_pagecache_consist_64kWR-t-big
+storage/maria/unittest/mf_pagecache_single_64k-t-big
+storage/maria/unittest/page_cache_test_file_1
+storage/maria/unittest/pagecache_debug.log
storage/myisam/.deps/ft_boolean_search.Po
storage/myisam/.deps/ft_nlq_search.Po
storage/myisam/.deps/ft_parser.Po
@@ -2981,13 +3023,25 @@ unittest/examples/.deps/simple-t.Po
unittest/examples/.deps/skip-t.Po
unittest/examples/.deps/skip_all-t.Po
unittest/examples/.deps/todo-t.Po
+unittest/maria_control
unittest/mysys/*.t
unittest/mysys/.deps/base64-t.Po
unittest/mysys/.deps/bitmap-t.Po
unittest/mysys/.deps/my_atomic-t.Po
+unittest/mysys/mf_pagecache_consist_1k-t-big
+unittest/mysys/mf_pagecache_consist_1kHC-t-big
+unittest/mysys/mf_pagecache_consist_1kRD-t-big
+unittest/mysys/mf_pagecache_consist_1kWR-t-big
+unittest/mysys/mf_pagecache_consist_64k-t-big
+unittest/mysys/mf_pagecache_consist_64kHC-t-big
+unittest/mysys/mf_pagecache_consist_64kRD-t-big
+unittest/mysys/mf_pagecache_consist_64kWR-t-big
+unittest/mysys/mf_pagecache_single_64k-t-big
unittest/mytap/.deps/tap.Po
unittest/mytap/t/*.t
unittest/mytap/t/.deps/basic-t.Po
+unittest/page_cache_test_file_1
+unittest/pagecache_debug.log
unittest/unit
vi.h
vio/*.ds?
diff --git a/.tree-is-private b/.tree-is-private
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/.tree-is-private
diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh
index e940f7a3371..34bdfecf995 100755
--- a/BUILD/SETUP.sh
+++ b/BUILD/SETUP.sh
@@ -145,6 +145,7 @@ base_configs="--prefix=$prefix --enable-assembler "
base_configs="$base_configs --with-extra-charsets=complex "
base_configs="$base_configs --enable-thread-safe-client "
base_configs="$base_configs --with-big-tables"
+base_configs="$base_configs --with-plugin-maria"
if test -d "$path/../cmd-line-utils/readline"
then
@@ -170,10 +171,10 @@ max_configs="$SSL_LIBRARY --with-plugins=max --with-embedded-server"
# CPU and platform specific compilation flags.
#
alpha_cflags="$check_cpu_cflags -Wa,-m$cpu_flag"
-amd64_cflags="$check_cpu_cflags"
+amd64_cflags="$check_cpu_cflags -DSTACK_DIRECTION=-1"
amd64_cxxflags="" # If dropping '--with-big-tables', add here "-DBIG_TABLES"
-pentium_cflags="$check_cpu_cflags"
-pentium64_cflags="$check_cpu_cflags -m64"
+pentium_cflags="$check_cpu_cflags -DSTACK_DIRECTION=-1"
+pentium64_cflags="$check_cpu_cflags -m64 -DSTACK_DIRECTION=-1"
ppc_cflags="$check_cpu_cflags"
sparc_cflags=""
diff --git a/BUILD/compile-amd64-gprof-no-ndb b/BUILD/compile-amd64-gprof-no-ndb
new file mode 100755
index 00000000000..9fd4c67155c
--- /dev/null
+++ b/BUILD/compile-amd64-gprof-no-ndb
@@ -0,0 +1,7 @@
+#! /bin/sh
+path=`dirname $0`
+. "$path/SETUP.sh"
+extra_flags="$amd64_cflags -pg -g"
+extra_configs="$amd64_configs $max_no_ndb_configs --disable-shared $static_link"
+
+. "$path/FINISH.sh"
diff --git a/BUILD/compile-dist b/BUILD/compile-dist
index d9103e0a419..d0a49bbd78d 100755
--- a/BUILD/compile-dist
+++ b/BUILD/compile-dist
@@ -40,5 +40,6 @@ fi
# Make sure to enable all features that affect "make dist"
./configure \
+ --with-maria-storage-engine \
--with-ndbcluster
make
diff --git a/BitKeeper/triggers/post-commit b/BitKeeper/triggers/post-commit
index c3a61ed2dde..abc9de5cb4e 100755
--- a/BitKeeper/triggers/post-commit
+++ b/BitKeeper/triggers/post-commit
@@ -11,7 +11,7 @@ FROM=$COMMITTER@mysql.com
COMMITS=commits@lists.mysql.com
DOCS=docs-commit@mysql.com
LIMIT=10000
-VERSION="5.1"
+VERSION="maria"
BKROOT=`bk root`
if [ -x /usr/sbin/sendmail ]; then
@@ -103,7 +103,7 @@ see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
EOF
bk changes -v -r+
bk rset -r+ -ah | bk gnupatch -h -dup -T
- ) | bk sed -e ${LIMIT}q > $BKROOT/BitKeeper/tmp/commits.txt
+ ) > $BKROOT/BitKeeper/tmp/commits.txt
$SENDMAIL -t < $BKROOT/BitKeeper/tmp/commits.txt
diff --git a/BitKeeper/triggers/pre-delta b/BitKeeper/triggers/pre-delta
index cd861703bb5..d6afe0905e7 100755
--- a/BitKeeper/triggers/pre-delta
+++ b/BitKeeper/triggers/pre-delta
@@ -20,3 +20,18 @@ then
exit 1
fi
+# detect if C/C++ files have new trailing white space
+trailingblank=`echo $BK_FILE | egrep '\.(c|.h)'`
+if [ -n "$trailingblank" ]
+then
+ trailingblank=`bk diffs $BK_FILE | grep '^> .*[[:space:]]$'`
+ if [ -n "$trailingblank" ]
+ then
+ echo "bk diffs $BK_FILE | grep '^> .*[[:space:]]$'"
+ echo "reported white space at end of some added/modified lines"
+ echo ""
+ echo "Checkin FAILED!"
+ echo "Fix the problem and retry."
+ exit 1
+ fi
+fi
diff --git a/Makefile.am b/Makefile.am
index ad94c9ea325..698e49f61f0 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -111,7 +111,7 @@ test-embedded:
echo "no program found for 'embedded' tests - skipped testing" ; \
fi
-test: test-unit test-ns test-pr
+test: test-ns test-pr
test-full: test test-nr test-ps
diff --git a/client/mysqladmin.cc b/client/mysqladmin.cc
index b3b699f61fd..01329c550d6 100644
--- a/client/mysqladmin.cc
+++ b/client/mysqladmin.cc
@@ -534,7 +534,7 @@ static int execute_commands(MYSQL *mysql,int argc, char **argv)
If this behaviour is ever changed, Docs should be notified.
*/
- struct rand_struct rand_st;
+ struct my_rnd_struct rand_st;
for (; argc > 0 ; argv++,argc--)
{
@@ -846,7 +846,7 @@ static int execute_commands(MYSQL *mysql,int argc, char **argv)
time_t start_time;
/* Do initialization the same way as we do in mysqld */
start_time=time((time_t*) 0);
- randominit(&rand_st,(ulong) start_time,(ulong) start_time/2);
+ my_rnd_init(&rand_st,(ulong) start_time,(ulong) start_time/2);
if (argc < 2)
{
diff --git a/client/mysqltest.c b/client/mysqltest.c
index e0662fc2251..dbf6c999478 100644
--- a/client/mysqltest.c
+++ b/client/mysqltest.c
@@ -63,7 +63,8 @@
enum {
OPT_SKIP_SAFEMALLOC=OPT_MAX_CLIENT_OPTION,
OPT_PS_PROTOCOL, OPT_SP_PROTOCOL, OPT_CURSOR_PROTOCOL, OPT_VIEW_PROTOCOL,
- OPT_MAX_CONNECT_RETRIES, OPT_MARK_PROGRESS, OPT_LOG_DIR, OPT_TAIL_LINES
+ OPT_MAX_CONNECT_RETRIES, OPT_MARK_PROGRESS, OPT_LOG_DIR, OPT_TAIL_LINES,
+ OPT_GLOBAL_SUBST
};
static int record= 0, opt_sleep= -1;
@@ -104,6 +105,9 @@ static char delimiter[MAX_DELIMITER_LENGTH]= ";";
static uint delimiter_length= 1;
static char TMPDIR[FN_REFLEN];
+static char global_subst_from[200];
+static char global_subst_to[200];
+static char *global_subst= NULL;
/* Block stack */
enum block_cmd {
@@ -168,6 +172,10 @@ static void init_re(void);
static int match_re(my_regex_t *, char *);
static void free_re(void);
+static int replace(DYNAMIC_STRING *ds_str,
+ const char *search_str, ulong search_len,
+ const char *replace_str, ulong replace_len);
+
DYNAMIC_ARRAY q_lines;
#include "sslopt-vars.h"
@@ -1542,6 +1550,7 @@ int dyn_string_cmp(DYNAMIC_STRING* ds, const char *fname)
void check_result(DYNAMIC_STRING* ds)
{
+ int res;
const char* mess= "Result content mismatch\n";
DBUG_ENTER("check_result");
@@ -1551,7 +1560,32 @@ void check_result(DYNAMIC_STRING* ds)
if (access(result_file_name, F_OK) != 0)
die("The specified result file does not exist: '%s'", result_file_name);
- switch (dyn_string_cmp(ds, result_file_name)) {
+ res= dyn_string_cmp(ds, result_file_name);
+ if (global_subst && res != RESULT_OK)
+ {
+ /**
+ @todo MARIA_HACK
+ This serves for when a test is run with --default-storage-engine=X
+ where X is not MyISAM: tests using SHOW CREATE TABLE will always fail
+ because SHOW CREATE TABLE prints X instead of MyISAM. With
+ --global-subst=X,MyISAM , such trivial differences are eliminated and
+ test may be reported as passing.
+ --global-subst is only a quick way to run a lot of existing tests
+ with Maria and find bugs; it is not good enough for reaching the main
+ trees when Maria is merged into them.
+ --global-subst should be removed.
+ */
+ uint global_subst_from_len= strlen(global_subst_from);
+ uint global_subst_to_len= strlen(global_subst_to);
+ while (replace(ds,
+ global_subst_from, global_subst_from_len,
+ global_subst_to, global_subst_to_len) == 0)
+ /* do nothing */ ;
+ /* let's compare again to see if it is ok now */
+ res= dyn_string_cmp(ds, result_file_name);
+ }
+ switch(res)
+ {
case RESULT_OK:
break; /* ok */
case RESULT_LENGTH_MISMATCH:
@@ -1997,9 +2031,9 @@ void var_set_query_get_value(struct st_command *command, VAR *var)
static DYNAMIC_STRING ds_col;
static DYNAMIC_STRING ds_row;
const struct command_arg query_get_value_args[] = {
- "query", ARG_STRING, TRUE, &ds_query, "Query to run",
- "column name", ARG_STRING, TRUE, &ds_col, "Name of column",
- "row number", ARG_STRING, TRUE, &ds_row, "Number for row"
+ {"query", ARG_STRING, TRUE, &ds_query, "Query to run"},
+ {"column name", ARG_STRING, TRUE, &ds_col, "Name of column"},
+ {"row number", ARG_STRING, TRUE, &ds_row, "Number for row"},
};
DBUG_ENTER("var_set_query_get_value");
@@ -5006,6 +5040,11 @@ static struct my_option my_long_options[] =
{"debug-info", OPT_DEBUG_INFO, "Print some debug info at exit.",
(uchar**) &debug_info_flag, (uchar**) &debug_info_flag,
0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"global-subst", OPT_GLOBAL_SUBST, "argument should be 'X,Y' ;"
+ " substitute string X with another Y accross the whole test's current"
+ " result before comparing with expected result file",
+ (uchar**) &global_subst, (uchar**) &global_subst, 0,
+ GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"host", 'h', "Connect to host.", (uchar**) &opt_host, (uchar**) &opt_host, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"include", 'i', "Include SQL before each test case.", (uchar**) &opt_include,
@@ -5264,6 +5303,16 @@ int parse_args(int argc, char **argv)
if (debug_check_flag)
my_end_arg= MY_CHECK_ERROR;
+ if (global_subst != NULL)
+ {
+ char *comma= strstr(global_subst, ",");
+ if (comma == NULL)
+ die("wrong --global-subst, must be X,Y");
+ memcpy(global_subst_from, global_subst, (comma-global_subst));
+ global_subst_from[comma-global_subst]= 0;
+ memcpy(global_subst_to, comma+1, strlen(comma));
+ }
+
return 0;
}
diff --git a/configure.in b/configure.in
index a63f8d6b44c..3eb0856e495 100644
--- a/configure.in
+++ b/configure.in
@@ -808,7 +808,7 @@ AC_HEADER_STDC
AC_HEADER_SYS_WAIT
AC_CHECK_HEADERS(fcntl.h float.h floatingpoint.h ieeefp.h limits.h \
memory.h pwd.h select.h \
- stdlib.h stddef.h \
+ stdlib.h stddef.h sys/stat.h \
strings.h string.h synch.h sys/mman.h sys/socket.h netinet/in.h arpa/inet.h \
sys/timeb.h sys/types.h sys/un.h sys/vadvise.h sys/wait.h term.h \
unistd.h utime.h sys/utime.h termio.h termios.h sched.h crypt.h alloca.h \
@@ -2013,7 +2013,7 @@ AC_CHECK_FUNCS(alarm bcmp bfill bmove bsearch bzero \
pthread_setprio_np pthread_setschedparam pthread_sigmask readlink \
realpath rename rint rwlock_init setupterm \
shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
- sighold sigset sigthreadmask port_create sleep \
+ sighold sigset sigthreadmask port_create sleep thr_yield \
snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
posix_fallocate)
@@ -2321,6 +2321,17 @@ MYSQL_CHECK_SSL
# functions tested above
#--------------------------------------------------------------------
+# MyISAM is declared here,not in storage/myisam/plug.in
+# because we want it to be the first in the list of plugins,
+# Maria needs it. When it'll be fixed the declaration below can
+# be removed and restored (uncommented) in storage/myisam/plug.in
+MYSQL_STORAGE_ENGINE(myisam,no, [MyISAM Storage Engine],
+ [Traditional non-transactional MySQL tables])
+MYSQL_PLUGIN_DIRECTORY(myisam, [storage/myisam])
+MYSQL_PLUGIN_STATIC(myisam, [libmyisam.a])
+MYSQL_PLUGIN_MANDATORY(myisam) dnl Default
+MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(myisam, [ha_myisam.cc])
+
MYSQL_STORAGE_ENGINE(partition, partition, [Partition Support],
[MySQL Partitioning Support], [max,max-no-ndb])
@@ -2547,8 +2558,6 @@ AC_SUBST(readline_basedir)
AC_SUBST(readline_link)
AC_SUBST(readline_h_ln_cmd)
-
-
# Include man pages, if desired, adapted to the configured parts.
if test X"$with_man" = Xyes
then
diff --git a/dbug/dbug.c b/dbug/dbug.c
index 7f4292d18b1..4ddf785b27c 100644
--- a/dbug/dbug.c
+++ b/dbug/dbug.c
@@ -71,7 +71,12 @@
*
*/
+/*
+ We can't have SAFE_MUTEX defined here as this will cause recursion
+ in pthread_mutex_lock
+*/
+#undef SAFE_MUTEX
#include <my_global.h>
#include <m_string.h>
#include <errno.h>
@@ -79,7 +84,6 @@
#include <process.h>
#endif
-
#ifndef DBUG_OFF
@@ -322,12 +326,11 @@ static unsigned long Clock(void);
#ifdef THREAD
#include <my_pthread.h>
-pthread_mutex_t THR_LOCK_dbug;
+static pthread_mutex_t THR_LOCK_dbug;
static CODE_STATE *code_state(void)
{
- CODE_STATE *cs=0;
- struct st_my_thread_var *tmp;
+ CODE_STATE *cs, **cs_ptr;
if (!init_done)
{
@@ -338,18 +341,17 @@ static CODE_STATE *code_state(void)
init_done=TRUE;
}
- if ((tmp=my_thread_var))
+ if (!(cs_ptr= (CODE_STATE**) my_thread_var_dbug()))
+ return 0; /* Thread not initialised */
+ if (!(cs= *cs_ptr))
{
- if (!(cs=(CODE_STATE *) tmp->dbug))
- {
- cs=(CODE_STATE*) DbugMalloc(sizeof(*cs));
- bzero((uchar*) cs,sizeof(*cs));
- cs->process= db_process ? db_process : "dbug";
- cs->func="?func";
- cs->file="?file";
- cs->stack=&init_settings;
- tmp->dbug= (void*) cs;
- }
+ cs=(CODE_STATE*) DbugMalloc(sizeof(*cs));
+ bzero((uchar*) cs,sizeof(*cs));
+ cs->process= db_process ? db_process : "dbug";
+ cs->func="?func";
+ cs->file="?file";
+ cs->stack=&init_settings;
+ *cs_ptr= cs;
}
return cs;
}
diff --git a/dbug/dbug_add_tags.pl b/dbug/dbug_add_tags.pl
index 141a2ed85f1..3e51a54c707 100755
--- a/dbug/dbug_add_tags.pl
+++ b/dbug/dbug_add_tags.pl
@@ -7,7 +7,7 @@ $ctags="exctags -x -f - --c-types=f -u";
sub get_tag {
local $.; local $_=<TAGS>;
($symbol, $line)= /^(.*\S)\s+function\s+(\d+)/;
- $symbol=$1 if /\s(\S+)\s*\(/;
+ $symbol=$1 if /[\s*]([^\s*]+)\s*\(/;
$line=1e50 unless $line;
}
@@ -51,7 +51,7 @@ while($src=shift)
$skip=!$semicolon;
$semicolon= /;\s*$/;
print && next if $skip ||
- (/^\s+\w+((::\w+)?|<\w+>)\s+\**\w+/ && !/^\s*return/);
+ (/^\s+\w+((::\w+)?|<\w+>)\s+\**\w+/ && !/^\s*return\b/);
last if /DBUG_ENTER/;
print "$tab DBUG_ENTER(\"$symbol\");\n";
print "\n" unless $_ eq "\n";
diff --git a/include/Makefile.am b/include/Makefile.am
index 8335da36e93..540fb9a54dd 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -26,8 +26,8 @@ pkginclude_HEADERS = $(HEADERS_ABI) my_dbug.h m_string.h my_sys.h \
my_getopt.h sslopt-longopts.h my_dir.h \
sslopt-vars.h sslopt-case.h sql_common.h keycache.h \
m_ctype.h my_attribute.h $(HEADERS_GEN)
-noinst_HEADERS = config-win.h config-netware.h \
- heap.h my_bitmap.h my_uctype.h \
+noinst_HEADERS = config-win.h config-netware.h lf.h my_bit.h \
+ heap.h maria.h myisamchk.h my_bitmap.h my_uctype.h \
myisam.h myisampack.h myisammrg.h ft_global.h\
mysys_err.h my_base.h help_start.h help_end.h \
my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \
@@ -36,7 +36,7 @@ noinst_HEADERS = config-win.h config-netware.h \
mysql_version.h.in my_handler.h my_time.h \
my_vle.h my_user.h my_atomic.h atomic/nolock.h \
atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \
- my_libwrap.h
+ my_libwrap.h wqueue.h
# Remove built files and the symlinked directories
CLEANFILES = $(BUILT_SOURCES) readline openssl
diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h
index f15c8b13b7f..9b5cbecbd0a 100644
--- a/include/atomic/nolock.h
+++ b/include/atomic/nolock.h
@@ -13,24 +13,25 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#if defined(__i386__) || defined(_M_IX86)
-
-#ifdef MY_ATOMIC_MODE_DUMMY
-# define LOCK ""
-#else
-# define LOCK "lock"
-#endif
-
-#ifdef __GNUC__
-#include "x86-gcc.h"
-#elif defined(_MSC_VER)
-#include "x86-msvc.h"
-#endif
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__)
+
+# ifdef MY_ATOMIC_MODE_DUMMY
+# define LOCK_prefix ""
+# else
+# define LOCK_prefix "lock"
+# endif
+
+# ifdef __GNUC__
+# include "x86-gcc.h"
+# elif defined(_MSC_VER)
+# error Broken!
+# include "x86-msvc.h"
+# endif
#endif
#ifdef make_atomic_cas_body
-typedef struct { } my_atomic_rwlock_t;
+typedef struct { } my_atomic_rwlock_t __attribute__ ((unused));
#define my_atomic_rwlock_destroy(name)
#define my_atomic_rwlock_init(name)
#define my_atomic_rwlock_rdlock(name)
diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h
index 18b77e93d80..cb41952b70c 100644
--- a/include/atomic/rwlock.h
+++ b/include/atomic/rwlock.h
@@ -13,7 +13,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t;
+typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t;
#ifdef MY_ATOMIC_MODE_DUMMY
/*
@@ -31,17 +31,22 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t;
#define my_atomic_rwlock_wrunlock(name)
#define MY_ATOMIC_MODE "dummy (non-atomic)"
#else
-#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw)
-#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0)
-#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw)
-#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw)
-#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw)
-#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw)
-#define MY_ATOMIC_MODE "rwlocks"
+/*
+ we're using read-write lock macros but map them to mutex locks, and they're
+ faster. Still, having semantically rich API we can change the
+ underlying implementation, if necessary.
+*/
+#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw)
+#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0)
+#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw)
+#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw)
+#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw)
+#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw)
+#define MY_ATOMIC_MODE "mutex"
#endif
#define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav;
-#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav;
+#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav;
#define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a;
#define make_atomic_load_body(S) ret= *a;
#define make_atomic_store_body(S) *a= v;
diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h
index d79dadbf05e..5a34bc22f9e 100644
--- a/include/atomic/x86-gcc.h
+++ b/include/atomic/x86-gcc.h
@@ -19,10 +19,18 @@
architectures support double-word (128-bit) cas.
*/
-#ifdef MY_ATOMIC_NO_XADD
-#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd"
+#ifdef __x86_64__
+# ifdef MY_ATOMIC_NO_XADD
+# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd"
+# else
+# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix
+# endif
#else
-#define MY_ATOMIC_MODE "gcc-x86" LOCK
+# ifdef MY_ATOMIC_NO_XADD
+# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd"
+# else
+# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix
+# endif
#endif
/* fix -ansi errors while maintaining readability */
@@ -32,12 +40,12 @@
#ifndef MY_ATOMIC_NO_XADD
#define make_atomic_add_body(S) \
- asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a))
+ asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a))
#endif
-#define make_atomic_swap_body(S) \
- asm volatile ("; xchg %0, %1;" : "+r" (v) , "+m" (*a))
+#define make_atomic_fas_body(S) \
+ asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a))
#define make_atomic_cas_body(S) \
- asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \
+ asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \
: "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set))
#ifdef MY_ATOMIC_MODE_DUMMY
@@ -46,13 +54,16 @@
#else
/*
Actually 32-bit reads/writes are always atomic on x86
- But we add LOCK here anyway to force memory barriers
+ But we add LOCK_prefix here anyway to force memory barriers
*/
#define make_atomic_load_body(S) \
ret=0; \
- asm volatile (LOCK "; cmpxchg %2, %0" \
+ asm volatile (LOCK_prefix "; cmpxchg %2, %0" \
: "+m" (*a), "+a" (ret): "r" (ret))
#define make_atomic_store_body(S) \
- asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v))
+ asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v))
#endif
+/* TODO test on intel whether the below helps. on AMD it makes no difference */
+//#define LF_BACKOFF ({asm volatile ("rep; nop"); 1; })
+
diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h
index c4885bb8451..2a2cfe70de9 100644
--- a/include/atomic/x86-msvc.h
+++ b/include/atomic/x86-msvc.h
@@ -25,24 +25,24 @@
#ifndef _atomic_h_cleanup_
#define _atomic_h_cleanup_ "atomic/x86-msvc.h"
-#define MY_ATOMIC_MODE "msvc-x86" LOCK
+#define MY_ATOMIC_MODE "msvc-x86" LOCK_prefix
#define make_atomic_add_body(S) \
_asm { \
_asm mov reg_ ## S, v \
- _asm LOCK xadd *a, reg_ ## S \
+ _asm LOCK_prefix xadd *a, reg_ ## S \
_asm movzx v, reg_ ## S \
}
#define make_atomic_cas_body(S) \
_asm { \
_asm mov areg_ ## S, *cmp \
_asm mov reg2_ ## S, set \
- _asm LOCK cmpxchg *a, reg2_ ## S \
+ _asm LOCK_prefix cmpxchg *a, reg2_ ## S \
_asm mov *cmp, areg_ ## S \
_asm setz al \
_asm movzx ret, al \
}
-#define make_atomic_swap_body(S) \
+#define make_atomic_fas_body(S) \
_asm { \
_asm mov reg_ ## S, v \
_asm xchg *a, reg_ ## S \
@@ -55,13 +55,13 @@
#else
/*
Actually 32-bit reads/writes are always atomic on x86
- But we add LOCK here anyway to force memory barriers
+ But we add LOCK_prefix here anyway to force memory barriers
*/
#define make_atomic_load_body(S) \
_asm { \
_asm mov areg_ ## S, 0 \
_asm mov reg2_ ## S, areg_ ## S \
- _asm LOCK cmpxchg *a, reg2_ ## S \
+ _asm LOCK_prefix cmpxchg *a, reg2_ ## S \
_asm mov ret, areg_ ## S \
}
#define make_atomic_store_body(S) \
diff --git a/include/ft_global.h b/include/ft_global.h
index 752371d6bc6..dba8a6e75e5 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -65,6 +65,17 @@ void ft_free_stopwords(void);
FT_INFO *ft_init_search(uint,void *, uint, uchar *, uint,CHARSET_INFO *, uchar *);
my_bool ft_boolean_check_syntax_string(const uchar *);
+/* Internal symbols for fulltext between maria and MyISAM */
+
+#define HA_FT_WTYPE HA_KEYTYPE_FLOAT
+#define HA_FT_WLEN 4
+#define FT_SEGS 2
+
+#define ft_sintXkorr(A) mi_sint4korr(A)
+#define ft_intXstore(T,A) mi_int4store(T,A)
+
+extern const HA_KEYSEG ft_keysegs[FT_SEGS];
+
#ifdef __cplusplus
}
#endif
diff --git a/include/keycache.h b/include/keycache.h
index a6005bae878..8c4ee4afe81 100644
--- a/include/keycache.h
+++ b/include/keycache.h
@@ -132,7 +132,8 @@ extern void end_key_cache(KEY_CACHE *keycache, my_bool cleanup);
/* Functions to handle multiple key caches */
extern my_bool multi_keycache_init(void);
extern void multi_keycache_free(void);
-extern KEY_CACHE *multi_key_cache_search(uchar *key, uint length);
+extern KEY_CACHE *multi_key_cache_search(uchar *key, uint length,
+ KEY_CACHE *def);
extern my_bool multi_key_cache_set(const uchar *key, uint length,
KEY_CACHE *key_cache);
extern void multi_key_cache_change(KEY_CACHE *old_data,
diff --git a/include/lf.h b/include/lf.h
new file mode 100644
index 00000000000..d4c5c64a01b
--- /dev/null
+++ b/include/lf.h
@@ -0,0 +1,260 @@
+/* Copyright (C) 2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _lf_h
+#define _lf_h
+
+#include <my_atomic.h>
+
+/*
+ Helpers to define both func() and _func(), where
+ func() is a _func() protected by my_atomic_rwlock_wrlock()
+*/
+
+#define lock_wrap(f, t, proto_args, args, lock) \
+t _ ## f proto_args; \
+static inline t f proto_args \
+{ \
+ t ret; \
+ my_atomic_rwlock_wrlock(lock); \
+ ret= _ ## f args; \
+ my_atomic_rwlock_wrunlock(lock); \
+ return ret; \
+}
+
+#define lock_wrap_void(f, proto_args, args, lock) \
+void _ ## f proto_args; \
+static inline void f proto_args \
+{ \
+ my_atomic_rwlock_wrlock(lock); \
+ _ ## f args; \
+ my_atomic_rwlock_wrunlock(lock); \
+}
+
+#define nolock_wrap(f, t, proto_args, args) \
+t _ ## f proto_args; \
+static inline t f proto_args \
+{ \
+ return _ ## f args; \
+}
+
+#define nolock_wrap_void(f, proto_args, args) \
+void _ ## f proto_args; \
+static inline void f proto_args \
+{ \
+ _ ## f args; \
+}
+
+/*
+ wait-free dynamic array, see lf_dynarray.c
+
+ 4 levels of 256 elements each mean 4311810304 elements in an array - it
+ should be enough for a while
+*/
+#define LF_DYNARRAY_LEVEL_LENGTH 256
+#define LF_DYNARRAY_LEVELS 4
+
+typedef struct {
+ void * volatile level[LF_DYNARRAY_LEVELS];
+ uint size_of_element;
+ my_atomic_rwlock_t lock;
+} LF_DYNARRAY;
+
+typedef int (*lf_dynarray_func)(void *, void *);
+
+void lf_dynarray_init(LF_DYNARRAY *array, uint element_size);
+void lf_dynarray_destroy(LF_DYNARRAY *array);
+
+nolock_wrap(lf_dynarray_value, void *,
+ (LF_DYNARRAY *array, uint idx),
+ (array, idx))
+lock_wrap(lf_dynarray_lvalue, void *,
+ (LF_DYNARRAY *array, uint idx),
+ (array, idx),
+ &array->lock)
+nolock_wrap(lf_dynarray_iterate, int,
+ (LF_DYNARRAY *array, lf_dynarray_func func, void *arg),
+ (array, func, arg))
+
+/*
+ pin manager for memory allocator, lf_alloc-pin.c
+*/
+
+#define LF_PINBOX_PINS 4
+#define LF_PURGATORY_SIZE 10
+
+typedef void lf_pinbox_free_func(void *, void *, void*);
+
+typedef struct {
+ LF_DYNARRAY pinarray;
+ lf_pinbox_free_func *free_func;
+ void *free_func_arg;
+ uint free_ptr_offset;
+ uint32 volatile pinstack_top_ver; /* this is a versioned pointer */
+ uint32 volatile pins_in_array; /* number of elements in array */
+} LF_PINBOX;
+
+typedef struct {
+ void * volatile pin[LF_PINBOX_PINS];
+ LF_PINBOX *pinbox;
+ void *stack_ends_here;
+ void *purgatory;
+ uint32 purgatory_count;
+ uint32 volatile link;
+/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */
+ char pad[128-sizeof(uint32)*2
+ -sizeof(LF_PINBOX *)
+ -sizeof(void*)
+ -sizeof(void *)*(LF_PINBOX_PINS+1)];
+} LF_PINS;
+
+/*
+ shortcut macros to do an atomic_wrlock on a structure that uses pins
+ (e.g. lf_hash).
+*/
+#define lf_rwlock_by_pins(PINS) \
+ my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinarray.lock)
+#define lf_rwunlock_by_pins(PINS) \
+ my_atomic_rwlock_wrunlock(&(PINS)->pinbox->pinarray.lock)
+
+/*
+ compile-time assert, to require "no less than N" pins
+ it's enough if it'll fail on at least one compiler, so
+ we'll enable it on GCC only, which supports zero-length arrays.
+*/
+#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG)
+#define LF_REQUIRE_PINS(N) \
+ static const char require_pins[LF_PINBOX_PINS-N] \
+ __attribute__ ((unused)); \
+ static const int LF_NUM_PINS_IN_THIS_FILE= N;
+#define _lf_pin(PINS, PIN, ADDR) \
+ ( \
+ assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \
+ my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \
+ )
+#else
+#define LF_REQUIRE_PINS(N)
+#define _lf_pin(PINS, PIN, ADDR) my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR))
+#endif
+
+#define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL)
+#define lf_pin(PINS, PIN, ADDR) \
+ do { \
+ lf_rwlock_by_pins(PINS); \
+ _lf_pin(PINS, PIN, ADDR); \
+ lf_rwunlock_by_pins(PINS); \
+ } while (0)
+#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL)
+#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0)
+#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0)
+
+void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
+ lf_pinbox_free_func *free_func, void * free_func_arg);
+void lf_pinbox_destroy(LF_PINBOX *pinbox);
+
+lock_wrap(lf_pinbox_get_pins, LF_PINS *,
+ (LF_PINBOX *pinbox, void *stack_end),
+ (pinbox, stack_end),
+ &pinbox->pinarray.lock)
+lock_wrap_void(lf_pinbox_put_pins,
+ (LF_PINS *pins),
+ (pins),
+ &pins->pinbox->pinarray.lock)
+lock_wrap_void(lf_pinbox_free,
+ (LF_PINS *pins, void *addr),
+ (pins, addr),
+ &pins->pinbox->pinarray.lock)
+
+/*
+ memory allocator, lf_alloc-pin.c
+*/
+
+struct st_lf_alloc_node {
+ struct st_lf_alloc_node *next;
+};
+
+typedef struct st_lf_allocator {
+ LF_PINBOX pinbox;
+ struct st_lf_alloc_node * volatile top;
+ uint element_size;
+ uint32 volatile mallocs;
+} LF_ALLOCATOR;
+
+void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset);
+void lf_alloc_destroy(LF_ALLOCATOR *allocator);
+uint lf_alloc_pool_count(LF_ALLOCATOR *allocator);
+/*
+ shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR
+ see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
+*/
+#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR))
+#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR))
+#define _lf_alloc_get_pins(A, ST) _lf_pinbox_get_pins(&(A)->pinbox, (ST))
+#define lf_alloc_get_pins(A, ST) lf_pinbox_get_pins(&(A)->pinbox, (ST))
+#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS)
+#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS)
+#define lf_alloc_direct_free(ALLOC, ADDR) my_free((uchar*)(ADDR), MYF(0))
+
+lock_wrap(lf_alloc_new, void *,
+ (LF_PINS *pins),
+ (pins),
+ &pins->pinbox->pinarray.lock)
+
+/*
+ extendible hash, lf_hash.c
+*/
+#include <hash.h>
+
+#define LF_HASH_UNIQUE 1
+
+typedef struct {
+ LF_DYNARRAY array; /* hash itself */
+ LF_ALLOCATOR alloc; /* allocator for elements */
+ hash_get_key get_key; /* see HASH */
+ CHARSET_INFO *charset; /* see HASH */
+ uint key_offset, key_length; /* see HASH */
+ uint element_size, flags; /* LF_HASH_UNIQUE, etc */
+ int32 volatile size; /* size of array */
+ int32 volatile count; /* number of elements in the hash */
+} LF_HASH;
+
+void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
+ uint key_offset, uint key_length, hash_get_key get_key,
+ CHARSET_INFO *charset);
+void lf_hash_destroy(LF_HASH *hash);
+int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data);
+void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
+int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
+/*
+ shortcut macros to access underlying pinbox functions from an LF_HASH
+ see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
+*/
+#define _lf_hash_get_pins(HASH, ST) _lf_alloc_get_pins(&(HASH)->alloc, (ST))
+#define lf_hash_get_pins(HASH, ST) lf_alloc_get_pins(&(HASH)->alloc, (ST))
+#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS)
+#define lf_hash_put_pins(PINS) lf_pinbox_put_pins(PINS)
+#define lf_hash_search_unpin(PINS) lf_unpin((PINS), 2)
+/*
+ cleanup
+*/
+
+#undef lock_wrap_void
+#undef lock_wrap
+#undef nolock_wrap_void
+#undef nolock_wrap
+
+#endif
+
diff --git a/include/m_string.h b/include/m_string.h
index 9d2a30917bd..c24bfd7aa6c 100644
--- a/include/m_string.h
+++ b/include/m_string.h
@@ -67,7 +67,7 @@
# define bcopy(s, d, n) memcpy((d), (s), (n))
# define bcmp(A,B,C) memcmp((A),(B),(C))
# define bzero(A,B) memset((A),0,(B))
-# define bmove_align(A,B,C) memcpy((A),(B),(C))
+# define bmove_align(A,B,C) memcpy((A),(B),(C))
#endif
#if defined(__cplusplus)
@@ -129,7 +129,10 @@ extern size_t bcmp(const uchar *s1,const uchar *s2,size_t len);
extern size_t my_bcmp(const uchar *s1,const uchar *s2,size_t len);
#undef bcmp
#define bcmp(A,B,C) my_bcmp((A),(B),(C))
-#endif
+#define bzero_if_purify(A,B) bzero(A,B)
+#else
+#define bzero_if_purify(A,B)
+#endif /* HAVE_purify */
#ifndef bmove512
extern void bmove512(uchar *dst,const uchar *src,size_t len);
diff --git a/include/maria.h b/include/maria.h
new file mode 100644
index 00000000000..6e9ae4d8391
--- /dev/null
+++ b/include/maria.h
@@ -0,0 +1,448 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* This file should be included when using maria functions */
+
+#ifndef _maria_h
+#define _maria_h
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifndef _my_base_h
+#include <my_base.h>
+#endif
+#ifndef _m_ctype_h
+#include <m_ctype.h>
+#endif
+#include "../storage/maria/ma_pagecache.h"
+#include "my_handler.h"
+#include "ft_global.h"
+#include <myisamchk.h>
+#include <mysql/plugin.h>
+
+/*
+ Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details
+*/
+
+#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY
+#define MARIA_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */
+#else
+#define MARIA_MAX_KEY MAX_INDEXES /* Max allowed keys */
+#endif
+
+#define MARIA_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */
+#define MARIA_NAME_IEXT ".MAI"
+#define MARIA_NAME_DEXT ".MAD"
+/* Max extra space to use when sorting keys */
+#define MARIA_MAX_TEMP_LENGTH 2*1024L*1024L*1024L
+/* Possible values for maria_block_size (must be power of 2) */
+#define MARIA_KEY_BLOCK_LENGTH 8192 /* default key block length */
+#define MARIA_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */
+#define MARIA_MAX_KEY_BLOCK_LENGTH 32768
+/* Minimal page cache when we only want to be able to scan a table */
+#define MARIA_MIN_PAGE_CACHE_SIZE (8192L*16L)
+
+/*
+ In the following macros '_keyno_' is 0 .. keys-1.
+ If there can be more keys than bits in the key_map, the highest bit
+ is for all upper keys. They cannot be switched individually.
+ This means that clearing of high keys is ignored, setting one high key
+ sets all high keys.
+*/
+#define MARIA_KEYMAP_BITS (8 * SIZEOF_LONG_LONG)
+#define MARIA_KEYMAP_HIGH_MASK (ULL(1) << (MARIA_KEYMAP_BITS - 1))
+#define maria_get_mask_all_keys_active(_keys_) \
+ (((_keys_) < MARIA_KEYMAP_BITS) ? \
+ ((ULL(1) << (_keys_)) - ULL(1)) : \
+ (~ ULL(0)))
+#if MARIA_MAX_KEY > MARIA_KEYMAP_BITS
+#define maria_is_key_active(_keymap_,_keyno_) \
+ (((_keyno_) < MARIA_KEYMAP_BITS) ? \
+ test((_keymap_) & (ULL(1) << (_keyno_))) : \
+ test((_keymap_) & MARIA_KEYMAP_HIGH_MASK))
+#define maria_set_key_active(_keymap_,_keyno_) \
+ (_keymap_)|= (((_keyno_) < MARIA_KEYMAP_BITS) ? \
+ (ULL(1) << (_keyno_)) : \
+ MARIA_KEYMAP_HIGH_MASK)
+#define maria_clear_key_active(_keymap_,_keyno_) \
+ (_keymap_)&= (((_keyno_) < MARIA_KEYMAP_BITS) ? \
+ (~ (ULL(1) << (_keyno_))) : \
+ (~ (ULL(0))) /*ignore*/ )
+#else
+#define maria_is_key_active(_keymap_,_keyno_) \
+ test((_keymap_) & (ULL(1) << (_keyno_)))
+#define maria_set_key_active(_keymap_,_keyno_) \
+ (_keymap_)|= (ULL(1) << (_keyno_))
+#define maria_clear_key_active(_keymap_,_keyno_) \
+ (_keymap_)&= (~ (ULL(1) << (_keyno_)))
+#endif
+#define maria_is_any_key_active(_keymap_) \
+ test((_keymap_))
+#define maria_is_all_keys_active(_keymap_,_keys_) \
+ ((_keymap_) == maria_get_mask_all_keys_active(_keys_))
+#define maria_set_all_keys_active(_keymap_,_keys_) \
+ (_keymap_)= maria_get_mask_all_keys_active(_keys_)
+#define maria_clear_all_keys_active(_keymap_) \
+ (_keymap_)= 0
+#define maria_intersect_keys_active(_to_,_from_) \
+ (_to_)&= (_from_)
+#define maria_is_any_intersect_keys_active(_keymap1_,_keys_,_keymap2_) \
+ ((_keymap1_) & (_keymap2_) & \
+ maria_get_mask_all_keys_active(_keys_))
+#define maria_copy_keys_active(_to_,_maxkeys_,_from_) \
+ (_to_)= (maria_get_mask_all_keys_active(_maxkeys_) & \
+ (_from_))
+
+ /* Param to/from maria_info */
+
+typedef ulonglong MARIA_RECORD_POS;
+
+typedef struct st_maria_info
+{
+ ha_rows records; /* Records in database */
+ ha_rows deleted; /* Deleted records in database */
+ MARIA_RECORD_POS recpos; /* Pos for last used record */
+ MARIA_RECORD_POS newrecpos; /* Pos if we write new record */
+ MARIA_RECORD_POS dup_key_pos; /* Position to record with dup key */
+ my_off_t data_file_length; /* Length of data file */
+ my_off_t max_data_file_length, index_file_length;
+ my_off_t max_index_file_length, delete_length;
+ ulonglong auto_increment;
+ ulonglong key_map; /* Which keys are used */
+ time_t create_time; /* When table was created */
+ time_t check_time;
+ time_t update_time;
+ ulong record_offset;
+ double *rec_per_key; /* for sql optimizing */
+ ulong reclength; /* Recordlength */
+ ulong mean_reclength; /* Mean recordlength (if packed) */
+ char *data_file_name, *index_file_name;
+ enum data_file_type data_file_type;
+ uint keys; /* Number of keys in use */
+ uint options; /* HA_OPTION_... used */
+ uint reflength;
+ int errkey, /* With key was dupplicated on err */
+ sortkey; /* clustered by this key */
+ File filenr; /* (uniq) filenr for datafile */
+} MARIA_INFO;
+
+
+typedef struct st_maria_create_info
+{
+ const char *index_file_name, *data_file_name; /* If using symlinks */
+ ha_rows max_rows;
+ ha_rows reloc_rows;
+ ulonglong auto_increment;
+ ulonglong data_file_length;
+ ulonglong key_file_length;
+ /* Size of null bitmap at start of row */
+ uint null_bytes;
+ uint old_options;
+ enum data_file_type org_data_file_type;
+ uint8 language;
+ my_bool with_auto_increment, transactional;
+} MARIA_CREATE_INFO;
+
+struct st_maria_share;
+struct st_maria_handler; /* For referense */
+typedef struct st_maria_handler MARIA_HA;
+struct st_maria_s_param;
+
+typedef struct st_maria_keydef /* Key definition with open & info */
+{
+ struct st_maria_share *share; /* Pointer to base (set in open) */
+ uint16 keysegs; /* Number of key-segment */
+ uint16 flag; /* NOSAME, PACK_USED */
+
+ uint8 key_alg; /* BTREE, RTREE */
+ uint8 key_nr; /* key number (auto) */
+ uint16 block_length; /* Length of keyblock (auto) */
+ uint16 underflow_block_length; /* When to execute underflow */
+ uint16 keylength; /* Tot length of keyparts (auto) */
+ uint16 minlength; /* min length of (packed) key (auto) */
+ uint16 maxlength; /* max length of (packed) key (auto) */
+ uint32 write_comp_flag; /* compare flag for write key (auto) */
+ uint32 version; /* For concurrent read/write */
+ uint32 ftparser_nr; /* distinct ftparser number */
+
+ HA_KEYSEG *seg, *end;
+ struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */
+ int (*bin_search)(MARIA_HA *info,
+ struct st_maria_keydef *keyinfo, uchar *page,
+ const uchar *key, uint key_len, uint comp_flag,
+ uchar **ret_pos,
+ uchar *buff, my_bool *was_last_key);
+ uint(*get_key)(struct st_maria_keydef *keyinfo, uint nod_flag,
+ uchar **page, uchar *key);
+ int (*pack_key)(struct st_maria_keydef *keyinfo, uint nod_flag,
+ uchar *next_key, uchar *org_key, uchar *prev_key,
+ const uchar *key, struct st_maria_s_param *s_temp);
+ void (*store_key)(struct st_maria_keydef *keyinfo, uchar *key_pos,
+ struct st_maria_s_param *s_temp);
+ int (*ck_insert)(MARIA_HA *inf, uint k_nr, uchar *k, uint klen);
+ int (*ck_delete)(MARIA_HA *inf, uint k_nr, uchar *k, uint klen);
+} MARIA_KEYDEF;
+
+
+#define MARIA_UNIQUE_HASH_LENGTH 4
+
+typedef struct st_maria_unique_def /* Segment definition of unique */
+{
+ uint16 keysegs; /* Number of key-segment */
+ uint8 key; /* Mapped to which key */
+ uint8 null_are_equal;
+ HA_KEYSEG *seg, *end;
+} MARIA_UNIQUEDEF;
+
+typedef struct st_maria_decode_tree /* Decode huff-table */
+{
+ uint16 *table;
+ uint quick_table_bits;
+ uchar *intervalls;
+} MARIA_DECODE_TREE;
+
+
+struct st_maria_bit_buff;
+
+/*
+ Note that null markers should always be first in a row !
+ When creating a column, one should only specify:
+ type, length, null_bit and null_pos
+*/
+
+typedef struct st_maria_columndef /* column information */
+{
+ enum en_fieldtype type;
+ uint32 offset; /* Offset to position in row */
+ uint16 length; /* length of field */
+ uint16 column_nr;
+ /* Intern variable (size of total storage area for the row) */
+ uint16 fill_length;
+ uint16 null_pos; /* Position for null marker */
+ uint16 empty_pos; /* Position for empty marker */
+ uint8 null_bit; /* If column may be NULL */
+ /* Intern. Set if column should be zero packed (part of empty_bits) */
+ uint8 empty_bit;
+
+#ifndef NOT_PACKED_DATABASES
+ void(*unpack)(struct st_maria_columndef *rec,
+ struct st_maria_bit_buff *buff,
+ uchar *start, uchar *end);
+ enum en_fieldtype base_type;
+ uint space_length_bits, pack_type;
+ MARIA_DECODE_TREE *huff_tree;
+#endif
+} MARIA_COLUMNDEF;
+
+
+extern ulong maria_block_size, maria_checkpoint_frequency;
+extern ulong maria_concurrent_insert;
+extern my_bool maria_flush, maria_single_user, maria_page_checksums;
+extern my_bool maria_delay_key_write;
+extern my_off_t maria_max_temp_length;
+extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size;
+extern PAGECACHE maria_pagecache_var, *maria_pagecache;
+extern MY_TMPDIR *maria_tmpdir;
+
+ /* Prototypes for maria-functions */
+
+extern int maria_init(void);
+extern void maria_end(void);
+extern int maria_close(MARIA_HA *file);
+extern int maria_delete(MARIA_HA *file, const uchar *buff);
+extern MARIA_HA *maria_open(const char *name, int mode,
+ uint wait_if_locked);
+extern MARIA_HA *maria_clone(struct st_maria_share *share, int mode);
+extern int maria_panic(enum ha_panic_function function);
+extern int maria_rfirst(MARIA_HA *file, uchar *buf, int inx);
+extern int maria_rkey(MARIA_HA *file, uchar *buf, int inx,
+ const uchar *key, key_part_map keypart_map,
+ enum ha_rkey_function search_flag);
+extern int maria_rlast(MARIA_HA *file, uchar *buf, int inx);
+extern int maria_rnext(MARIA_HA *file, uchar *buf, int inx);
+extern int maria_rnext_same(MARIA_HA *info, uchar *buf);
+extern int maria_rprev(MARIA_HA *file, uchar *buf, int inx);
+extern int maria_rrnd(MARIA_HA *file, uchar *buf,
+ MARIA_RECORD_POS pos);
+extern int maria_scan_init(MARIA_HA *file);
+extern int maria_scan(MARIA_HA *file, uchar *buf);
+extern void maria_scan_end(MARIA_HA *file);
+extern int maria_rsame(MARIA_HA *file, uchar *record, int inx);
+extern int maria_rsame_with_pos(MARIA_HA *file, uchar *record,
+ int inx, MARIA_RECORD_POS pos);
+extern int maria_update(MARIA_HA *file, const uchar *old,
+ uchar *new_record);
+extern int maria_write(MARIA_HA *file, uchar *buff);
+extern MARIA_RECORD_POS maria_position(MARIA_HA *file);
+extern int maria_status(MARIA_HA *info, MARIA_INFO *x, uint flag);
+extern int maria_lock_database(MARIA_HA *file, int lock_type);
+extern int maria_create(const char *name, enum data_file_type record_type,
+ uint keys, MARIA_KEYDEF *keydef,
+ uint columns, MARIA_COLUMNDEF *columndef,
+ uint uniques, MARIA_UNIQUEDEF *uniquedef,
+ MARIA_CREATE_INFO *create_info, uint flags);
+extern int maria_delete_table(const char *name);
+extern int maria_rename(const char *from, const char *to);
+extern int maria_extra(MARIA_HA *file,
+ enum ha_extra_function function, void *extra_arg);
+extern int maria_reset(MARIA_HA *file);
+extern ha_rows maria_records_in_range(MARIA_HA *info, int inx,
+ key_range *min_key, key_range *max_key);
+extern int maria_is_changed(MARIA_HA *info);
+extern int maria_delete_all_rows(MARIA_HA *info);
+extern uint maria_get_pointer_length(ulonglong file_length, uint def);
+extern int maria_commit(MARIA_HA *info);
+extern int maria_begin(MARIA_HA *info);
+extern void maria_disable_logging(MARIA_HA *info);
+extern void maria_enable_logging(MARIA_HA *info);
+
+/* this is used to pass to mysql_mariachk_table */
+
+#define MARIA_CHK_REPAIR 1 /* equivalent to mariachk -r */
+#define MARIA_CHK_VERIFY 2 /* Verify, run repair if failure */
+
+typedef uint maria_bit_type;
+
+typedef struct st_maria_bit_buff
+{ /* Used for packing of record */
+ maria_bit_type current_byte;
+ uint bits;
+ uchar *pos, *end, *blob_pos, *blob_end;
+ uint error;
+} MARIA_BIT_BUFF;
+
+
+typedef struct st_maria_sort_info
+{
+#ifdef THREAD
+ /* sync things */
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+#endif
+ MARIA_HA *info, *new_info;
+ HA_CHECK *param;
+ char *buff;
+ SORT_KEY_BLOCKS *key_block, *key_block_end;
+ SORT_FT_BUF *ft_buf;
+ my_off_t filelength, dupp, buff_length;
+ ulonglong page;
+ ha_rows max_records;
+ uint current_key, total_keys;
+ uint got_error, threads_running;
+ myf myf_rw;
+ enum data_file_type new_data_file_type, org_data_file_type;
+} MARIA_SORT_INFO;
+
+typedef struct st_maria_sort_param
+{
+ pthread_t thr;
+ IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
+ DYNAMIC_ARRAY buffpek;
+ MARIA_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
+
+ MARIA_KEYDEF *keyinfo;
+ MARIA_SORT_INFO *sort_info;
+ HA_KEYSEG *seg;
+ uchar **sort_keys;
+ uchar *rec_buff;
+ void *wordlist, *wordptr;
+ MEM_ROOT wordroot;
+ char *record;
+ MY_TMPDIR *tmpdir;
+
+ /*
+ The next two are used to collect statistics, see maria_update_key_parts for
+ description.
+ */
+ ulonglong unique[HA_MAX_KEY_SEG+1];
+ ulonglong notnull[HA_MAX_KEY_SEG+1];
+
+ MARIA_RECORD_POS pos,max_pos,filepos,start_recpos, current_filepos;
+ uint key, key_length,real_key_length,sortbuff_size;
+ uint maxbuffers, keys, find_length, sort_keys_length;
+ my_bool fix_datafile, master;
+ my_bool calc_checksum; /* calculate table checksum */
+ size_t rec_buff_size;
+
+ int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *);
+ int (*key_read)(struct st_maria_sort_param *, uchar *);
+ int (*key_write)(struct st_maria_sort_param *, const uchar *);
+ void (*lock_in_memory)(HA_CHECK *);
+ int (*write_keys)(struct st_maria_sort_param *, register uchar **,
+ uint , struct st_buffpek *, IO_CACHE *);
+ uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint);
+ int (*write_key)(struct st_maria_sort_param *, IO_CACHE *,uchar *,
+ uint, uint);
+} MARIA_SORT_PARAM;
+
+
+/* functions in maria_check */
+void maria_chk_init(HA_CHECK *param);
+int maria_chk_status(HA_CHECK *param, MARIA_HA *info);
+int maria_chk_del(HA_CHECK *param, MARIA_HA *info, uint test_flag);
+int maria_chk_size(HA_CHECK *param, MARIA_HA *info);
+int maria_chk_key(HA_CHECK *param, MARIA_HA *info);
+int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, int extend);
+int maria_repair(HA_CHECK *param, MARIA_HA *info, char * name, uint rep_quick);
+int maria_sort_index(HA_CHECK *param, MARIA_HA *info, char * name);
+int maria_repair_by_sort(HA_CHECK *param, MARIA_HA *info,
+ const char *name, uint rep_quick);
+int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
+ const char *name, uint rep_quick);
+int maria_change_to_newfile(const char *filename, const char *old_ext,
+ const char *new_ext, myf myflags);
+void maria_lock_memory(HA_CHECK *param);
+int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update);
+void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
+ ulonglong *unique, ulonglong *notnull,
+ ulonglong records);
+int maria_filecopy(HA_CHECK *param, File to, File from, my_off_t start,
+ my_off_t length, const char *type);
+int maria_movepoint(MARIA_HA *info, uchar *record, my_off_t oldpos,
+ my_off_t newpos, uint prot_key);
+int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile);
+int maria_test_if_almost_full(MARIA_HA *info);
+int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename);
+int maria_disable_indexes(MARIA_HA *info);
+int maria_enable_indexes(MARIA_HA *info);
+int maria_indexes_are_disabled(MARIA_HA *info);
+void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows);
+my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, ulonglong key_map,
+ my_bool force);
+
+int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows);
+void maria_flush_bulk_insert(MARIA_HA *info, uint inx);
+void maria_end_bulk_insert(MARIA_HA *info);
+int maria_assign_to_pagecache(MARIA_HA *info, ulonglong key_map,
+ PAGECACHE *key_cache);
+void maria_change_pagecache(PAGECACHE *old_key_cache,
+ PAGECACHE *new_key_cache);
+int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves);
+
+/* fulltext functions */
+FT_INFO *maria_ft_init_search(uint,void *, uint, uchar *, uint,
+ CHARSET_INFO *, uchar *);
+
+/* 'Almost-internal' Maria functions */
+
+void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
+ my_bool repair);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/include/my_atomic.h b/include/my_atomic.h
index a1347d26401..59297750cb7 100644
--- a/include/my_atomic.h
+++ b/include/my_atomic.h
@@ -13,6 +13,40 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+/*
+ This header defines five atomic operations:
+
+ my_atomic_add#(&var, what)
+ add 'what' to *var, and return the old value of *var
+
+ my_atomic_fas#(&var, what)
+ 'Fetch And Store'
+ store 'what' in *var, and return the old value of *var
+
+ my_atomic_cas#(&var, &old, new)
+ 'Compare And Swap'
+ if *var is equal to *old, then store 'new' in *var, and return TRUE
+ otherwise store *var in *old, and return FALSE
+
+ my_atomic_load#(&var)
+ return *var
+
+ my_atomic_store#(&var, what)
+ store 'what' in *var
+
+ '#' is substituted by a size suffix - 8, 16, 32, or ptr
+ (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr).
+
+ NOTE This operations are not always atomic, so they always must be
+ enclosed in my_atomic_rwlock_rdlock(lock)/my_atomic_rwlock_rdunlock(lock)
+ or my_atomic_rwlock_wrlock(lock)/my_atomic_rwlock_wrunlock(lock).
+ Hint: if a code block makes intensive use of atomic ops, it make sense
+ to take/release rwlock once for the whole block, not for every statement.
+
+ On architectures where these operations are really atomic, rwlocks will
+ be optimized away.
+*/
+
#ifndef my_atomic_rwlock_init
#define intptr void *
@@ -26,70 +60,124 @@
#endif
#ifndef make_atomic_add_body
-#define make_atomic_add_body(S) \
+#define make_atomic_add_body(S) \
int ## S tmp=*a; \
while (!my_atomic_cas ## S(a, &tmp, tmp+v)); \
v=tmp;
#endif
+/*
+ transparent_union doesn't work in g++
+ Bug ?
+
+ Darwin's gcc doesn't want to put pointers in a transparent_union
+ when built with -arch ppc64. Complains:
+ warning: 'transparent_union' attribute ignored
+*/
+#if defined(__GNUC__) && !defined(__cplusplus) && \
+ ! (defined(__APPLE__) && defined(_ARCH_PPC64))
+/*
+ we want to be able to use my_atomic_xxx functions with
+ both signed and unsigned integers. But gcc will issue a warning
+ "passing arg N of `my_atomic_XXX' as [un]signed due to prototype"
+ if the signedness of the argument doesn't match the prototype, or
+ "pointer targets in passing argument N of my_atomic_XXX differ in signedness"
+ if int* is used where uint* is expected (or vice versa).
+ Let's shut these warnings up
+*/
+#define make_transparent_unions(S) \
+ typedef union { \
+ int ## S i; \
+ uint ## S u; \
+ } U_ ## S __attribute__ ((transparent_union)); \
+ typedef union { \
+ int ## S volatile *i; \
+ uint ## S volatile *u; \
+ } Uv_ ## S __attribute__ ((transparent_union));
+#define uintptr intptr
+make_transparent_unions(8)
+make_transparent_unions(16)
+make_transparent_unions(32)
+make_transparent_unions(ptr)
+#undef uintptr
+#undef make_transparent_unions
+#define a U_a.i
+#define cmp U_cmp.i
+#define v U_v.i
+#define set U_set.i
+#else
+#define U_8 int8
+#define U_16 int16
+#define U_32 int32
+#define U_ptr intptr
+#define Uv_8 int8
+#define Uv_16 int16
+#define Uv_32 int32
+#define Uv_ptr intptr
+#define U_a volatile *a
+#define U_cmp *cmp
+#define U_v v
+#define U_set set
+#endif /* __GCC__ transparent_union magic */
+
#ifdef HAVE_INLINE
-#define make_atomic_add(S) \
-static inline int ## S my_atomic_add ## S( \
- int ## S volatile *a, int ## S v) \
-{ \
- make_atomic_add_body(S); \
- return v; \
+#define make_atomic_add(S) \
+STATIC_INLINE int ## S my_atomic_add ## S( \
+ Uv_ ## S U_a, U_ ## S U_v) \
+{ \
+ make_atomic_add_body(S); \
+ return v; \
}
-#define make_atomic_swap(S) \
-static inline int ## S my_atomic_swap ## S( \
- int ## S volatile *a, int ## S v) \
-{ \
- make_atomic_swap_body(S); \
- return v; \
+#define make_atomic_fas(S) \
+STATIC_INLINE int ## S my_atomic_fas ## S( \
+ Uv_ ## S U_a, U_ ## S U_v) \
+{ \
+ make_atomic_fas_body(S); \
+ return v; \
}
-#define make_atomic_cas(S) \
-static inline int my_atomic_cas ## S(int ## S volatile *a, \
- int ## S *cmp, int ## S set) \
-{ \
- int8 ret; \
- make_atomic_cas_body(S); \
- return ret; \
+#define make_atomic_cas(S) \
+STATIC_INLINE int my_atomic_cas ## S(Uv_ ## S U_a, \
+ Uv_ ## S U_cmp, U_ ## S U_set) \
+{ \
+ int8 ret; \
+ make_atomic_cas_body(S); \
+ return ret; \
}
-#define make_atomic_load(S) \
-static inline int ## S my_atomic_load ## S(int ## S volatile *a) \
-{ \
- int ## S ret; \
- make_atomic_load_body(S); \
- return ret; \
+#define make_atomic_load(S) \
+STATIC_INLINE int ## S my_atomic_load ## S(Uv_ ## S U_a) \
+{ \
+ int ## S ret; \
+ make_atomic_load_body(S); \
+ return ret; \
}
-#define make_atomic_store(S) \
-static inline void my_atomic_store ## S( \
- int ## S volatile *a, int ## S v) \
-{ \
- make_atomic_store_body(S); \
+#define make_atomic_store(S) \
+STATIC_INLINE void my_atomic_store ## S( \
+ Uv_ ## S U_a, U_ ## S U_v) \
+{ \
+ make_atomic_store_body(S); \
}
#else /* no inline functions */
-#define make_atomic_add(S) \
-extern int ## S my_atomic_add ## S(int ## S volatile *a, int ## S v);
+#define make_atomic_add(S) \
+extern int ## S my_atomic_add ## S(Uv_ ## S, U_ ## S);
-#define make_atomic_swap(S) \
-extern int ## S my_atomic_swap ## S(int ## S volatile *a, int ## S v);
+#define make_atomic_fas(S) \
+extern int ## S my_atomic_fas ## S(Uv_ ## S, U_ ## S);
-#define make_atomic_cas(S) \
-extern int my_atomic_cas ## S(int ## S volatile *a, int ## S *cmp, int ## S set);
+#define make_atomic_cas(S) \
+extern int my_atomic_cas ## S(Uv_ ## S, Uv_ ## S, U_ ## S);
-#define make_atomic_load(S) \
-extern int ## S my_atomic_load ## S(int ## S volatile *a);
+#define make_atomic_load(S) \
+extern int ## S my_atomic_load ## S(Uv_ ## S);
-#define make_atomic_store(S) \
-extern void my_atomic_store ## S(int ## S volatile *a, int ## S v);
+#define make_atomic_store(S) \
+extern void my_atomic_store ## S(Uv_ ## S, U_ ## S);
#endif
@@ -112,26 +200,47 @@ make_atomic_store(16)
make_atomic_store(32)
make_atomic_store(ptr)
-make_atomic_swap( 8)
-make_atomic_swap(16)
-make_atomic_swap(32)
-make_atomic_swap(ptr)
+make_atomic_fas( 8)
+make_atomic_fas(16)
+make_atomic_fas(32)
+make_atomic_fas(ptr)
+#ifdef _atomic_h_cleanup_
+#include _atomic_h_cleanup_
+#undef _atomic_h_cleanup_
+#endif
+
+#undef U_8
+#undef U_16
+#undef U_32
+#undef U_ptr
+#undef a
+#undef cmp
+#undef v
+#undef set
+#undef U_a
+#undef U_cmp
+#undef U_v
+#undef U_set
#undef make_atomic_add
#undef make_atomic_cas
#undef make_atomic_load
#undef make_atomic_store
-#undef make_atomic_swap
+#undef make_atomic_fas
#undef make_atomic_add_body
#undef make_atomic_cas_body
#undef make_atomic_load_body
#undef make_atomic_store_body
-#undef make_atomic_swap_body
+#undef make_atomic_fas_body
#undef intptr
-#ifdef _atomic_h_cleanup_
-#include _atomic_h_cleanup_
-#undef _atomic_h_cleanup_
+/*
+ the macro below defines (as an expression) the code that
+ will be run in spin-loops. Intel manuals recummend to have PAUSE there.
+ It is expected to be defined in include/atomic/ *.h files
+*/
+#ifndef LF_BACKOFF
+#define LF_BACKOFF (1)
#endif
#define MY_ATOMIC_OK 0
diff --git a/include/my_base.h b/include/my_base.h
index 947f7695215..fdf8d248879 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -14,7 +14,6 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* This file includes constants used with all databases */
-/* Author: Michael Widenius */
#ifndef _my_base_h
#define _my_base_h
@@ -48,10 +47,11 @@
#define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */
#define HA_OPEN_FROM_SQL_LAYER 64
#define HA_OPEN_MMAP 128 /* open memory mapped */
+#define HA_OPEN_COPY 256 /* Open copy (for repair) */
/* Internal temp table, used for temporary results */
-#define HA_OPEN_INTERNAL_TABLE 256
+#define HA_OPEN_INTERNAL_TABLE 512
- /* The following is parameter to ha_rkey() how to use key */
+/* The following is parameter to ha_rkey() how to use key */
/*
We define a complete-field prefix of a key value as a prefix where
@@ -137,7 +137,7 @@ enum ha_extra_function {
HA_EXTRA_RESET_STATE, /* Reset positions */
HA_EXTRA_IGNORE_DUP_KEY, /* Dup keys don't rollback everything*/
HA_EXTRA_NO_IGNORE_DUP_KEY,
- HA_EXTRA_PREPARE_FOR_DELETE,
+ HA_EXTRA_PREPARE_FOR_DROP,
HA_EXTRA_PREPARE_FOR_UPDATE, /* Remove read cache if problems */
HA_EXTRA_PRELOAD_BUFFER_SIZE, /* Set buffer size for preloading */
/*
@@ -193,7 +193,9 @@ enum ha_extra_function {
begin and end of a statement.
*/
HA_EXTRA_ATTACH_CHILDREN,
- HA_EXTRA_DETACH_CHILDREN
+ HA_EXTRA_DETACH_CHILDREN,
+ /* Inform handler that we will do a rename */
+ HA_EXTRA_PREPARE_FOR_RENAME
};
/* The following is parameter to ha_panic() */
@@ -300,8 +302,12 @@ enum ha_base_keytype {
#define HA_OPTION_NO_PACK_KEYS 128 /* Reserved for MySQL */
#define HA_OPTION_CREATE_FROM_ENGINE 256
#define HA_OPTION_RELIES_ON_SQL_LAYER 512
-#define HA_OPTION_TEMP_COMPRESS_RECORD ((uint) 16384) /* set by isamchk */
-#define HA_OPTION_READ_ONLY_DATA ((uint) 32768) /* Set by isamchk */
+#define HA_OPTION_NULL_FIELDS 1024
+#define HA_OPTION_PAGE_CHECKSUM 2048
+#define HA_OPTION_TEMP_COMPRESS_RECORD (1L << 15) /* set by isamchk */
+#define HA_OPTION_READ_ONLY_DATA (1L << 16) /* Set by isamchk */
+#define HA_OPTION_NO_CHECKSUM (1L << 17)
+#define HA_OPTION_NO_DELAY_KEY_WRITE (1L << 18)
/* Bits in flag to create() */
@@ -310,6 +316,7 @@ enum ha_base_keytype {
#define HA_CREATE_TMP_TABLE 4
#define HA_CREATE_CHECKSUM 8
#define HA_CREATE_KEEP_FILES 16 /* don't overwrite .MYD and MYI */
+#define HA_CREATE_PAGE_CHECKSUM 32
#define HA_CREATE_DELAY_KEY_WRITE 64
#define HA_CREATE_RELIES_ON_SQL_LAYER 128
@@ -356,14 +363,18 @@ enum ha_base_keytype {
*/
#define HA_STATUS_AUTO 64
- /* Errorcodes given by functions */
+/*
+ Errorcodes given by handler functions
+
+ opt_sum_query() assumes these codes are > 1
+ Do not add error numbers before HA_ERR_FIRST.
+ If necessary to add lower numbers, change HA_ERR_FIRST accordingly.
+*/
+#define HA_ERR_FIRST 120 /* Copy of first error nr.*/
-/* opt_sum_query() assumes these codes are > 1 */
-/* Do not add error numbers before HA_ERR_FIRST. */
-/* If necessary to add lower numbers, change HA_ERR_FIRST accordingly. */
-#define HA_ERR_FIRST 120 /*Copy first error nr.*/
#define HA_ERR_KEY_NOT_FOUND 120 /* Didn't find key on read or update */
#define HA_ERR_FOUND_DUPP_KEY 121 /* Dupplicate key on write */
+#define HA_ERR_INTERNAL_ERROR 122 /* Internal error */
#define HA_ERR_RECORD_CHANGED 123 /* Uppdate with is recoverable */
#define HA_ERR_WRONG_INDEX 124 /* Wrong index given to function */
#define HA_ERR_CRASHED 126 /* Indexfile is crashed */
@@ -382,7 +393,7 @@ enum ha_base_keytype {
#define HA_WRONG_CREATE_OPTION 140 /* Wrong create option */
#define HA_ERR_FOUND_DUPP_UNIQUE 141 /* Dupplicate unique on write */
#define HA_ERR_UNKNOWN_CHARSET 142 /* Can't open charset */
-#define HA_ERR_WRONG_MRG_TABLE_DEF 143 /* conflicting MyISAM tables in MERGE */
+#define HA_ERR_WRONG_MRG_TABLE_DEF 143 /* conflicting tables in MERGE */
#define HA_ERR_CRASHED_ON_REPAIR 144 /* Last (automatic?) repair failed */
#define HA_ERR_CRASHED_ON_USAGE 145 /* Table must be repaired */
#define HA_ERR_LOCK_WAIT_TIMEOUT 146
@@ -397,30 +408,38 @@ enum ha_base_keytype {
#define HA_ERR_NO_SUCH_TABLE 155 /* The table does not exist in engine */
#define HA_ERR_TABLE_EXIST 156 /* The table existed in storage engine */
#define HA_ERR_NO_CONNECTION 157 /* Could not connect to storage engine */
-#define HA_ERR_NULL_IN_SPATIAL 158 /* NULLs are not supported in spatial index */
+/* NULLs are not supported in spatial index */
+#define HA_ERR_NULL_IN_SPATIAL 158
#define HA_ERR_TABLE_DEF_CHANGED 159 /* The table changed in storage engine */
#define HA_ERR_NO_PARTITION_FOUND 160 /* There's no partition in table for
given value */
#define HA_ERR_RBR_LOGGING_FAILED 161 /* Row-based binlogging of row failed */
-#define HA_ERR_DROP_INDEX_FK 162 /* Index needed in foreign key constr. */
-#define HA_ERR_FOREIGN_DUPLICATE_KEY 163 /* Upholding foreign key constraints
- would lead to a duplicate key
- error in some other table. */
-#define HA_ERR_TABLE_NEEDS_UPGRADE 164 /* The table changed in storage engine */
-#define HA_ERR_TABLE_READONLY 165 /* The table is not writable */
+#define HA_ERR_DROP_INDEX_FK 162 /* Index needed in foreign key constr */
+/*
+ Upholding foreign key constraints would lead to a duplicate key error
+ in some other table.
+*/
+#define HA_ERR_FOREIGN_DUPLICATE_KEY 163
+/* The table changed in storage engine */
+#define HA_ERR_TABLE_NEEDS_UPGRADE 164
+#define HA_ERR_TABLE_READONLY 165 /* The table is not writable */
#define HA_ERR_AUTOINC_READ_FAILED 166 /* Failed to get next autoinc value */
#define HA_ERR_AUTOINC_ERANGE 167 /* Failed to set row autoinc value */
#define HA_ERR_GENERIC 168 /* Generic error */
-#define HA_ERR_RECORD_IS_THE_SAME 169 /* row not actually updated :
- new values same as the old values */
-
-#define HA_ERR_LOGGING_IMPOSSIBLE 170 /* It is not possible to log this
- statement */
+/* row not actually updated: new values same as the old values */
+#define HA_ERR_RECORD_IS_THE_SAME 169
+/* It is not possible to log this statement */
+#define HA_ERR_LOGGING_IMPOSSIBLE 170
#define HA_ERR_CORRUPT_EVENT 171 /* The event was corrupt, leading to
illegal data being read */
-#define HA_ERR_LAST 171 /*Copy last error nr.*/
-/* Add error numbers before HA_ERR_LAST and change it accordingly. */
+#define HA_ERR_NEW_FILE 172 /* New file format */
+#define HA_ERR_INITIALIZATION 173 /* Error during initialization */
+#define HA_ERR_FILE_TOO_SHORT 174 /* File too short */
+#define HA_ERR_WRONG_CRC 175 /* Wrong CRC on page */
+#define HA_ERR_LAST 175 /* Copy of last error nr */
+
+/* Number of different errors */
#define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1)
/* Other constants */
@@ -484,7 +503,7 @@ enum en_fieldtype {
};
enum data_file_type {
- STATIC_RECORD,DYNAMIC_RECORD,COMPRESSED_RECORD
+ STATIC_RECORD, DYNAMIC_RECORD, COMPRESSED_RECORD, BLOCK_RECORD
};
/* For key ranges */
@@ -536,4 +555,7 @@ typedef ulong ha_rows;
#define HA_VARCHAR_PACKLENGTH(field_length) ((field_length) < 256 ? 1 :2)
+/* invalidator function reference for Query Cache */
+typedef void (* invalidator_by_filename)(const char * filename);
+
#endif /* _my_base_h */
diff --git a/include/my_bit.h b/include/my_bit.h
new file mode 100644
index 00000000000..58e8bb39683
--- /dev/null
+++ b/include/my_bit.h
@@ -0,0 +1,107 @@
+/*
+ Some useful bit functions
+*/
+
+#ifdef HAVE_INLINE
+
+extern const char _my_bits_nbits[256];
+extern const uchar _my_bits_reverse_table[256];
+
+/*
+ Find smallest X in 2^X >= value
+ This can be used to divide a number with value by doing a shift instead
+*/
+
+STATIC_INLINE uint my_bit_log2(ulong value)
+{
+ uint bit;
+ for (bit=0 ; value > 1 ; value>>=1, bit++) ;
+ return bit;
+}
+
+STATIC_INLINE uint my_count_bits(ulonglong v)
+{
+#if SIZEOF_LONG_LONG > 4
+ /* The following code is a bit faster on 16 bit machines than if we would
+ only shift v */
+ ulong v2=(ulong) (v >> 32);
+ return (uint) (uchar) (_my_bits_nbits[(uchar) v] +
+ _my_bits_nbits[(uchar) (v >> 8)] +
+ _my_bits_nbits[(uchar) (v >> 16)] +
+ _my_bits_nbits[(uchar) (v >> 24)] +
+ _my_bits_nbits[(uchar) (v2)] +
+ _my_bits_nbits[(uchar) (v2 >> 8)] +
+ _my_bits_nbits[(uchar) (v2 >> 16)] +
+ _my_bits_nbits[(uchar) (v2 >> 24)]);
+#else
+ return (uint) (uchar) (_my_bits_nbits[(uchar) v] +
+ _my_bits_nbits[(uchar) (v >> 8)] +
+ _my_bits_nbits[(uchar) (v >> 16)] +
+ _my_bits_nbits[(uchar) (v >> 24)]);
+#endif
+}
+
+STATIC_INLINE uint my_count_bits_ushort(ushort v)
+{
+ return _my_bits_nbits[v];
+}
+
+
+/*
+ Next highest power of two
+
+ SYNOPSIS
+ my_round_up_to_next_power()
+ v Value to check
+
+ RETURN
+ Next or equal power of 2
+ Note: 0 will return 0
+
+ NOTES
+ Algorithm by Sean Anderson, according to:
+ http://graphics.stanford.edu/~seander/bithacks.html
+ (Orignal code public domain)
+
+ Comments shows how this works with 01100000000000000000000000001011
+*/
+
+STATIC_INLINE uint32 my_round_up_to_next_power(uint32 v)
+{
+ v--; /* 01100000000000000000000000001010 */
+ v|= v >> 1; /* 01110000000000000000000000001111 */
+ v|= v >> 2; /* 01111100000000000000000000001111 */
+ v|= v >> 4; /* 01111111110000000000000000001111 */
+ v|= v >> 8; /* 01111111111111111100000000001111 */
+ v|= v >> 16; /* 01111111111111111111111111111111 */
+ return v+1; /* 10000000000000000000000000000000 */
+}
+
+STATIC_INLINE uint32 my_clear_highest_bit(uint32 v)
+{
+ uint32 w=v >> 1;
+ w|= w >> 1;
+ w|= w >> 2;
+ w|= w >> 4;
+ w|= w >> 8;
+ w|= w >> 16;
+ return v & w;
+}
+
+STATIC_INLINE uint32 my_reverse_bits(uint32 key)
+{
+ return
+ (_my_bits_reverse_table[ key & 255] << 24) |
+ (_my_bits_reverse_table[(key>> 8) & 255] << 16) |
+ (_my_bits_reverse_table[(key>>16) & 255] << 8) |
+ _my_bits_reverse_table[(key>>24) ];
+}
+
+#else
+extern uint my_bit_log2(ulong value);
+extern uint32 my_round_up_to_next_power(uint32 v);
+uint32 my_clear_highest_bit(uint32 v);
+uint32 my_reverse_bits(uint32 key);
+extern uint my_count_bits(ulonglong v);
+extern uint my_count_bits_ushort(ushort v);
+#endif
diff --git a/include/my_dbug.h b/include/my_dbug.h
index 514cd17099b..a77e439b5db 100644
--- a/include/my_dbug.h
+++ b/include/my_dbug.h
@@ -101,7 +101,7 @@ extern FILE *_db_fp_(void);
#define DBUG_LONGJMP(a1) longjmp(a1)
#define DBUG_DUMP(keyword,a1,a2)
#define DBUG_END()
-#define DBUG_ASSERT(A)
+#define DBUG_ASSERT(A) do { } while(0)
#define DBUG_LOCK_FILE
#define DBUG_FILE (stderr)
#define DBUG_UNLOCK_FILE
diff --git a/include/my_global.h b/include/my_global.h
index 4b0786aa826..1287ae3ac2a 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -242,6 +242,8 @@
#endif
#undef inline_test_2
#undef inline_test_1
+/* helper macro for "instantiating" inline functions */
+#define STATIC_INLINE static inline
/*
The following macros are used to control inlining a bit more than
@@ -429,6 +431,9 @@ C_MODE_END
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
#ifdef HAVE_SYS_TIMEB_H
#include <sys/timeb.h> /* Avoid warnings on SCO */
#endif
@@ -571,7 +576,7 @@ typedef unsigned short ushort;
#define CMP_NUM(a,b) (((a) < (b)) ? -1 : ((a) == (b)) ? 0 : 1)
#define sgn(a) (((a) < 0) ? -1 : ((a) > 0) ? 1 : 0)
-#define swap_variables(t, a, b) { register t dummy; dummy= a; a= b; b= dummy; }
+#define swap_variables(t, a, b) { register t swap_dummy; swap_dummy= a; a= b; b= swap_dummy; }
#define test(a) ((a) ? 1 : 0)
#define set_if_bigger(a,b) do { if ((a) < (b)) (a)=(b); } while(0)
#define set_if_smaller(a,b) do { if ((a) > (b)) (a)=(b); } while(0)
@@ -1015,6 +1020,8 @@ typedef long long intptr;
#error sizeof(void *) is neither sizeof(int) nor sizeof(long) nor sizeof(long long)
#endif
+#define MY_ERRPTR ((void*)(intptr)1)
+
#ifdef USE_RAID
/*
The following is done with a if to not get problems with pre-processors
@@ -1476,6 +1483,7 @@ do { doubleget_union _tmp; \
#define dlerror() ""
#endif
+
#ifndef __NETWARE__
/*
* Include standard definitions of operator new and delete.
@@ -1506,6 +1514,13 @@ inline void operator delete[](void*, void*) { /* Do nothing */ }
#if !defined(max)
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+/*
+ Only Linux is known to need an explicit sync of the directory to make sure a
+ file creation/deletion/renaming in(from,to) this directory durable.
+*/
+#ifdef TARGET_OS_LINUX
+#define NEED_EXPLICIT_SYNC_DIR 1
#endif
#endif /* my_global_h */
diff --git a/include/my_handler.h b/include/my_handler.h
index d7cd0567f9c..21e57ce8170 100644
--- a/include/my_handler.h
+++ b/include/my_handler.h
@@ -18,10 +18,30 @@
#ifndef _my_handler_h
#define _my_handler_h
-#include "my_base.h"
-#include "m_ctype.h"
#include "myisampack.h"
+/*
+ There is a hard limit for the maximum number of keys as there are only
+ 8 bits in the index file header for the number of keys in a table.
+ This means that 0..255 keys can exist for a table. The idea of
+ HA_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on
+ a MyISAM table for which one has more keys than MyISAM is normally
+ compiled for. If you don't have this, you will get a core dump when
+ running myisamchk compiled for 128 keys on a table with 255 keys.
+*/
+
+#define HA_MAX_POSSIBLE_KEY 255 /* For myisamchk */
+/*
+ The following defines can be increased if necessary.
+ But beware the dependency of HA_MAX_POSSIBLE_KEY_BUFF and HA_MAX_KEY_LENGTH.
+*/
+
+#define HA_MAX_KEY_LENGTH 1000 /* Max length in bytes */
+#define HA_MAX_KEY_SEG 16 /* Max segments for key */
+
+#define HA_MAX_POSSIBLE_KEY_BUFF (HA_MAX_KEY_LENGTH + 24+ 6+6)
+#define HA_MAX_KEY_BUFF (HA_MAX_KEY_LENGTH+HA_MAX_KEY_SEG*6+8+8)
+
typedef struct st_HA_KEYSEG /* Key-portion */
{
CHARSET_INFO *charset;
@@ -38,33 +58,35 @@ typedef struct st_HA_KEYSEG /* Key-portion */
} HA_KEYSEG;
#define get_key_length(length,key) \
-{ if ((uchar) *(key) != 255) \
- length= (uint) (uchar) *((key)++); \
+{ if (*(uchar*) (key) != 255) \
+ length= (uint) *(uchar*) ((key)++); \
else \
- { length=mi_uint2korr((key)+1); (key)+=3; } \
+ { length= mi_uint2korr((key)+1); (key)+=3; } \
}
#define get_key_length_rdonly(length,key) \
-{ if ((uchar) *(key) != 255) \
- length= ((uint) (uchar) *((key))); \
+{ if (*(uchar*) (key) != 255) \
+ length= ((uint) *(uchar*) ((key))); \
else \
- { length=mi_uint2korr((key)+1); } \
+ { length= mi_uint2korr((key)+1); } \
}
#define get_key_pack_length(length,length_pack,key) \
-{ if ((uchar) *(key) != 255) \
- { length= (uint) (uchar) *((key)++); length_pack=1; }\
+{ if (*(uchar*) (key) != 255) \
+ { length= (uint) *(uchar*) ((key)++); length_pack= 1; }\
else \
- { length=mi_uint2korr((key)+1); (key)+=3; length_pack=3; } \
+ { length=mi_uint2korr((key)+1); (key)+= 3; length_pack= 3; } \
}
#define store_key_length_inc(key,length) \
{ if ((length) < 255) \
- { *(key)++=(length); } \
+ { *(key)++= (length); } \
else \
{ *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \
}
+#define size_to_store_key_length(length) ((length) < 255 ? 1 : 3)
+
#define get_rec_bits(bit_ptr, bit_ofs, bit_len) \
(((((uint16) (bit_ptr)[1] << 8) | (uint16) (bit_ptr)[0]) >> (bit_ofs)) & \
((1 << (bit_len)) - 1))
@@ -81,12 +103,20 @@ typedef struct st_HA_KEYSEG /* Key-portion */
#define clr_rec_bits(bit_ptr, bit_ofs, bit_len) \
set_rec_bits(0, bit_ptr, bit_ofs, bit_len)
-extern int mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint ,
- my_bool, my_bool);
-extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
- register uchar *b, uint key_length, uint nextflag,
+extern int ha_compare_text(CHARSET_INFO *, const uchar *, uint,
+ const uchar *, uint , my_bool, my_bool);
+extern int ha_key_cmp(register HA_KEYSEG *keyseg, register const uchar *a,
+ register const uchar *b, uint key_length, uint nextflag,
uint *diff_pos);
extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a);
+extern void my_handler_error_register(void);
+extern void my_handler_error_unregister(void);
+/*
+ Inside an in-memory data record, memory pointers to pieces of the
+ record (like BLOBs) are stored in their native byte order and in
+ this amount of bytes.
+*/
+#define portable_sizeof_char_ptr 8
#endif /* _my_handler_h */
diff --git a/include/my_pthread.h b/include/my_pthread.h
index 88abddc9e25..6ec745ff206 100644
--- a/include/my_pthread.h
+++ b/include/my_pthread.h
@@ -172,6 +172,7 @@ void pthread_exit(void *a); /* was #define pthread_exit(A) ExitThread(A)*/
#define pthread_detach_this_thread()
#define pthread_condattr_init(A)
#define pthread_condattr_destroy(A)
+#define pthread_yield() Sleep(0) /* according to MSDN */
#define my_pthread_getprio(thread_id) pthread_dummy(0)
@@ -397,6 +398,17 @@ void my_pthread_attr_getstacksize(pthread_attr_t *attrib, size_t *size);
int my_pthread_mutex_trylock(pthread_mutex_t *mutex);
#endif
+#if !defined(HAVE_PTHREAD_YIELD_ONE_ARG) && !defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
+/* no pthread_yield() available */
+#ifdef HAVE_SCHED_YIELD
+#define pthread_yield() sched_yield()
+#elif defined(HAVE_PTHREAD_YIELD_NP) /* can be Mac OS X */
+#define pthread_yield() pthread_yield_np()
+#elif defined(HAVE_THR_YIELD)
+#define pthread_yield() thr_yield()
+#endif
+#endif
+
/*
The defines set_timespec and set_timespec_nsec should be used
for calculating an absolute time at which
@@ -447,7 +459,7 @@ int my_pthread_mutex_trylock(pthread_mutex_t *mutex);
typedef struct st_safe_mutex_t
{
pthread_mutex_t global,mutex;
- const char *file;
+ const char *file, *name;
uint line,count;
pthread_t thread;
#ifdef SAFE_MUTEX_DETECT_DESTROY
@@ -472,7 +484,7 @@ typedef struct st_safe_mutex_info_t
#endif /* SAFE_MUTEX_DETECT_DESTROY */
int safe_mutex_init(safe_mutex_t *mp, const pthread_mutexattr_t *attr,
- const char *file, uint line);
+ const char *file, uint line, const char *name);
int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint line);
int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line);
int safe_mutex_destroy(safe_mutex_t *mp,const char *file, uint line);
@@ -495,7 +507,7 @@ void safe_mutex_end(FILE *file);
#undef pthread_cond_wait
#undef pthread_cond_timedwait
#undef pthread_mutex_trylock
-#define pthread_mutex_init(A,B) safe_mutex_init((A),(B),__FILE__,__LINE__)
+#define pthread_mutex_init(A,B) safe_mutex_init((A),(B),__FILE__,__LINE__,#A)
#define pthread_mutex_lock(A) safe_mutex_lock((A), FALSE, __FILE__, __LINE__)
#define pthread_mutex_unlock(A) safe_mutex_unlock((A),__FILE__,__LINE__)
#define pthread_mutex_destroy(A) safe_mutex_destroy((A),__FILE__,__LINE__)
@@ -681,6 +693,7 @@ struct st_my_thread_var
};
extern struct st_my_thread_var *_my_thread_var(void) __attribute__ ((const));
+extern void **my_thread_var_dbug();
extern uint my_thread_end_wait_time;
#define my_thread_var (_my_thread_var())
#define my_errno my_thread_var->thr_errno
diff --git a/include/my_sys.h b/include/my_sys.h
index e13c4cde78b..7c39e69ad6e 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -51,6 +51,7 @@ extern int NEAR my_errno; /* Last error in mysys */
#define MY_WME 16 /* Write message on error */
#define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */
#define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */
+#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */
#define MY_RAID 64 /* Support for RAID */
#define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */
#define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */
@@ -61,12 +62,14 @@ extern int NEAR my_errno; /* Last error in mysys */
#define MY_HOLD_ORIGINAL_MODES 128 /* my_copy() holds to file modes */
#define MY_REDEL_MAKE_BACKUP 256
#define MY_SEEK_NOT_DONE 32 /* my_lock may have to do a seek */
-#define MY_DONT_WAIT 64 /* my_lock() don't wait if can't lock */
+#define MY_SHORT_WAIT 64 /* my_lock() don't wait if can't lock */
+#define MY_FORCE_LOCK 128 /* use my_lock() even if disable_locking */
+#define MY_NO_WAIT 256 /* my_lock() don't wait at all */
#define MY_ZEROFILL 32 /* my_malloc(), fill array with zero */
#define MY_ALLOW_ZERO_PTR 64 /* my_realloc() ; zero ptr -> malloc */
#define MY_FREE_ON_ERROR 128 /* my_realloc() ; Free old ptr on error */
#define MY_HOLD_ON_ERROR 256 /* my_realloc() ; Return old ptr on error */
-#define MY_DONT_OVERWRITE_FILE 1024 /* my_copy: Don't overwrite file */
+#define MY_DONT_OVERWRITE_FILE 2048 /* my_copy: Don't overwrite file */
#define MY_THREADSAFE 2048 /* my_seek(): lock fd mutex */
#define MY_CHECK_ERROR 1 /* Params to my_end; Check open-close */
@@ -89,6 +92,8 @@ extern int NEAR my_errno; /* Last error in mysys */
#define ME_COLOUR1 ((1 << ME_HIGHBYTE)) /* Possibly error-colours */
#define ME_COLOUR2 ((2 << ME_HIGHBYTE))
#define ME_COLOUR3 ((3 << ME_HIGHBYTE))
+#define ME_JUST_INFO 1024 /**< not error but just info */
+#define ME_JUST_WARNING 2048 /**< not error but just warning */
/* Bits in last argument to fn_format */
#define MY_REPLACE_DIR 1 /* replace dir in name with 'dir' */
@@ -207,11 +212,13 @@ extern int errno; /* declare errno */
extern char NEAR errbuff[NRERRBUFFS][ERRMSGSIZE];
extern char *home_dir; /* Home directory for user */
extern const char *my_progname; /* program-name (printed in errors) */
+extern const char *my_progname_short; /* like above but without directory */
extern char NEAR curr_dir[]; /* Current directory for user */
extern int (*error_handler_hook)(uint my_err, const char *str,myf MyFlags);
extern int (*fatal_error_handler_hook)(uint my_err, const char *str,
myf MyFlags);
extern uint my_file_limit;
+extern ulong my_thread_stack_size;
#ifdef HAVE_LARGE_PAGES
extern my_bool my_use_large_pages;
@@ -276,7 +283,18 @@ enum cache_type
enum flush_type
{
- FLUSH_KEEP, FLUSH_RELEASE, FLUSH_IGNORE_CHANGED, FLUSH_FORCE_WRITE
+ FLUSH_KEEP, /* flush block and keep it in the cache */
+ FLUSH_RELEASE, /* flush block and remove it from the cache */
+ FLUSH_IGNORE_CHANGED, /* remove block from the cache */
+ /* as my_disable_flush_pagecache_blocks is always 0, it is
+ strictly equivalent to FLUSH_KEEP */
+ FLUSH_FORCE_WRITE,
+ /**
+ @brief like FLUSH_KEEP but return immediately if file is already being
+ flushed (even partially) by another thread; only for page cache,
+ forbidden for key cache.
+ */
+ FLUSH_KEEP_LAZY
};
typedef struct st_record_cache /* Used when cacheing records */
@@ -530,6 +548,7 @@ my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */
*(info)->current_pos)
typedef uint32 ha_checksum;
+extern ha_checksum my_crc_dbug_check;
/* Define the type of function to be passed to process_default_option_files */
typedef int (*Process_option_func)(void *ctx, const char *group_name,
@@ -627,6 +646,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags);
extern int my_fclose(FILE *fd,myf MyFlags);
extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags);
extern int my_sync(File fd, myf my_flags);
+extern int my_sync_dir(const char *dir_name, myf my_flags);
+extern int my_sync_dir_by_file(const char *file_name, myf my_flags);
extern int my_error _VARARGS((int nr,myf MyFlags, ...));
extern int my_printf_error _VARARGS((uint my_err, const char *format,
myf MyFlags, ...))
@@ -661,7 +682,7 @@ extern char *my_tmpdir(MY_TMPDIR *tmpdir);
extern void free_tmpdir(MY_TMPDIR *tmpdir);
extern void my_remember_signal(int signal_number,sig_handler (*func)(int));
-extern size_t dirname_part(char * to, const char *name, size_t *to_res_length);
+extern size_t dirname_part(char * to,const char *name, size_t *to_res_length);
extern size_t dirname_length(const char *name);
#define base_name(A) (A+dirname_length(A))
extern int test_if_hard_path(const char *dir_name);
@@ -709,7 +730,7 @@ extern sig_handler sigtstp_handler(int signal_number);
extern void handle_recived_signals(void);
extern sig_handler my_set_alarm_variable(int signo);
-extern void my_string_ptr_sort(uchar *base, uint items, size_t size);
+extern void my_string_ptr_sort(uchar *base,uint items,size_t size);
extern void radixsort_for_str_ptr(uchar* base[], uint number_of_elements,
size_t size_of_element,uchar *buffer[]);
extern qsort_t my_qsort(void *base_ptr, size_t total_elems, size_t size,
@@ -777,6 +798,7 @@ extern my_bool insert_dynamic(DYNAMIC_ARRAY *array,uchar * element);
extern uchar *alloc_dynamic(DYNAMIC_ARRAY *array);
extern uchar *pop_dynamic(DYNAMIC_ARRAY*);
extern my_bool set_dynamic(DYNAMIC_ARRAY *array,uchar * element,uint array_index);
+extern my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements);
extern void get_dynamic(DYNAMIC_ARRAY *array,uchar * element,uint array_index);
extern void delete_dynamic(DYNAMIC_ARRAY *array);
extern void delete_dynamic_element(DYNAMIC_ARRAY *array, uint array_index);
@@ -843,11 +865,9 @@ extern int unpackfrm(uchar **, size_t *, const uchar *);
extern ha_checksum my_checksum(ha_checksum crc, const uchar *mem,
size_t count);
-extern uint my_bit_log2(ulong value);
-extern uint32 my_round_up_to_next_power(uint32 v);
-extern uint my_count_bits(ulonglong v);
-extern uint my_count_bits_ushort(ushort v);
+extern void my_debug_put_break_here(void);
extern void my_sleep(ulong m_seconds);
+extern ulong crc32(ulong crc, const uchar *buf, uint len);
extern uint my_set_max_open_files(uint files);
void my_free_open_file_info(void);
@@ -865,7 +885,7 @@ extern int my_getncpus();
#ifndef MAP_NOSYNC
#define MAP_NOSYNC 0
#endif
-#ifndef MAP_NORESERVE
+#ifndef MAP_NORESERVE
#define MAP_NORESERVE 0 /* For irix and AIX */
#endif
@@ -904,6 +924,19 @@ int my_getpagesize(void);
int my_msync(int, void *, size_t, int);
+#define MY_UUID_SIZE 16
+void my_uuid_init(ulong seed1, ulong seed2);
+void my_uuid(uchar *guid);
+void my_uuid_end();
+
+struct my_rnd_struct {
+ unsigned long seed1,seed2,max_value;
+ double max_value_dbl;
+};
+
+void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2);
+double my_rnd(struct my_rnd_struct *rand_st);
+
/* character sets */
extern uint get_charset_number(const char *cs_name, uint cs_flags);
extern uint get_collation_number(const char *name);
diff --git a/include/myisam.h b/include/myisam.h
index 6a76a298dee..292a0015964 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -31,33 +31,19 @@ extern "C" {
#include "keycache.h"
#endif
#include "my_handler.h"
+#include <myisamchk.h>
#include <mysql/plugin.h>
/*
- There is a hard limit for the maximum number of keys as there are only
- 8 bits in the index file header for the number of keys in a table.
- This means that 0..255 keys can exist for a table. The idea of
- MI_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on
- a MyISAM table for which one has more keys than MyISAM is normally
- compiled for. If you don't have this, you will get a core dump when
- running myisamchk compiled for 128 keys on a table with 255 keys.
+ Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details
*/
-#define MI_MAX_POSSIBLE_KEY 255 /* For myisam_chk */
-#if MAX_INDEXES > MI_MAX_POSSIBLE_KEY
-#define MI_MAX_KEY MI_MAX_POSSIBLE_KEY /* Max allowed keys */
+
+#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY
+#define MI_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */
#else
#define MI_MAX_KEY MAX_INDEXES /* Max allowed keys */
#endif
-#define MI_MAX_POSSIBLE_KEY_BUFF (1024+6+6) /* For myisam_chk */
-/*
- The following defines can be increased if necessary.
- But beware the dependency of MI_MAX_POSSIBLE_KEY_BUFF and MI_MAX_KEY_LENGTH.
-*/
-#define MI_MAX_KEY_LENGTH 1000 /* Max length in bytes */
-#define MI_MAX_KEY_SEG 16 /* Max segments for key */
-
-#define MI_MAX_KEY_BUFF (MI_MAX_KEY_LENGTH+MI_MAX_KEY_SEG*6+8+8)
#define MI_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */
#define MI_NAME_IEXT ".MYI"
#define MI_NAME_DEXT ".MYD"
@@ -69,8 +55,6 @@ extern "C" {
#define MI_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */
#define MI_MAX_KEY_BLOCK_LENGTH 16384
-#define mi_portable_sizeof_char_ptr 8
-
/*
In the following macros '_keyno_' is 0 .. keys-1.
If there can be more keys than bits in the key_map, the highest bit
@@ -241,7 +225,7 @@ struct st_mi_bit_buff;
typedef struct st_columndef /* column information */
{
- int16 type; /* en_fieldtype */
+ enum en_fieldtype type;
uint16 length; /* length of field */
uint32 offset; /* Offset to position in row */
uint8 null_bit; /* If column may be 0 */
@@ -256,9 +240,6 @@ typedef struct st_columndef /* column information */
#endif
} MI_COLUMNDEF;
-/* invalidator function reference for Query Cache */
-typedef void (* invalidator_by_filename)(const char * filename);
-
extern char * myisam_log_filename; /* Name of logfile */
extern ulong myisam_block_size;
extern ulong myisam_concurrent_insert;
@@ -302,7 +283,7 @@ extern int mi_extra(struct st_myisam_info *file,
enum ha_extra_function function,
void *extra_arg);
extern int mi_reset(struct st_myisam_info *file);
-extern ha_rows mi_records_in_range(MI_INFO *info, int inx,
+extern ha_rows mi_records_in_range(MI_INFO *info,int inx,
key_range *min_key, key_range *max_key);
extern int mi_log(int activate_log);
extern int mi_is_changed(struct st_myisam_info *info);
@@ -310,195 +291,117 @@ extern int mi_delete_all_rows(struct st_myisam_info *info);
extern ulong _mi_calc_blob_length(uint length , const uchar *pos);
extern uint mi_get_pointer_length(ulonglong file_length, uint def);
-/* this is used to pass to mysql_myisamchk_table -- by Sasha Pachev */
+/* this is used to pass to mysql_myisamchk_table */
#define MYISAMCHK_REPAIR 1 /* equivalent to myisamchk -r */
#define MYISAMCHK_VERIFY 2 /* Verify, run repair if failure */
-/*
- Definitions needed for myisamchk.c
-
- Entries marked as "QQ to be removed" are NOT used to
- pass check/repair options to mi_check.c. They are used
- internally by myisamchk.c or/and ha_myisam.cc and should NOT
- be stored together with other flags. They should be removed
- from the following list to make addition of new flags possible.
-*/
-
-#define T_AUTO_INC 1
-#define T_AUTO_REPAIR 2 /* QQ to be removed */
-#define T_BACKUP_DATA 4
-#define T_CALC_CHECKSUM 8
-#define T_CHECK 16 /* QQ to be removed */
-#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */
-#define T_CREATE_MISSING_KEYS 64
-#define T_DESCRIPT 128
-#define T_DONT_CHECK_CHECKSUM 256
-#define T_EXTEND 512
-#define T_FAST (1L << 10) /* QQ to be removed */
-#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */
-#define T_FORCE_UNIQUENESS (1L << 12)
-#define T_INFO (1L << 13)
-#define T_MEDIUM (1L << 14)
-#define T_QUICK (1L << 15) /* QQ to be removed */
-#define T_READONLY (1L << 16) /* QQ to be removed */
-#define T_REP (1L << 17)
-#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */
-#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */
-#define T_RETRY_WITHOUT_QUICK (1L << 20)
-#define T_SAFE_REPAIR (1L << 21)
-#define T_SILENT (1L << 22)
-#define T_SORT_INDEX (1L << 23) /* QQ to be removed */
-#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */
-#define T_STATISTICS (1L << 25)
-#define T_UNPACK (1L << 26)
-#define T_UPDATE_STATE (1L << 27)
-#define T_VERBOSE (1L << 28)
-#define T_VERY_SILENT (1L << 29)
-#define T_WAIT_FOREVER (1L << 30)
-#define T_WRITE_LOOP ((ulong) 1L << 31)
-
-#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL)
-
-/*
- Flags used by myisamchk.c or/and ha_myisam.cc that are NOT passed
- to mi_check.c follows:
-*/
-
-#define TT_USEFRM 1
-#define TT_FOR_UPGRADE 2
+typedef uint mi_bit_type;
-#define O_NEW_INDEX 1 /* Bits set in out_flag */
-#define O_NEW_DATA 2
-#define O_DATA_LOST 4
+typedef struct st_mi_bit_buff
+{ /* Used for packing of record */
+ mi_bit_type current_byte;
+ uint bits;
+ uchar *pos, *end, *blob_pos, *blob_end;
+ uint error;
+} MI_BIT_BUFF;
-/* these struct is used by my_check to tell it what to do */
-typedef struct st_sort_key_blocks /* Used when sorting */
+typedef struct st_sort_info
{
- uchar *buff,*end_pos;
- uchar lastkey[MI_MAX_POSSIBLE_KEY_BUFF];
- uint last_length;
- int inited;
-} SORT_KEY_BLOCKS;
-
-
-/*
- MyISAM supports several statistics collection methods. Currently statistics
- collection method is not stored in MyISAM file and has to be specified for
- each table analyze/repair operation in MI_CHECK::stats_method.
-*/
+#ifdef THREAD
+ /* sync things */
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+#endif
+ MI_INFO *info;
+ HA_CHECK *param;
+ uchar *buff;
+ SORT_KEY_BLOCKS *key_block, *key_block_end;
+ SORT_FT_BUF *ft_buf;
+ my_off_t filelength, dupp, buff_length;
+ ha_rows max_records;
+ uint current_key, total_keys;
+ uint got_error, threads_running;
+ myf myf_rw;
+ enum data_file_type new_data_file_type;
+} MI_SORT_INFO;
-typedef enum
-{
- /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */
- MI_STATS_METHOD_NULLS_NOT_EQUAL,
- /* Treat NULLs as equal when collecting statistics (like 4.0 did) */
- MI_STATS_METHOD_NULLS_EQUAL,
- /* Ignore NULLs - count only tuples without NULLs in the index components */
- MI_STATS_METHOD_IGNORE_NULLS
-} enum_mi_stats_method;
-
-typedef struct st_mi_check_param
+typedef struct st_mi_sort_param
{
- ulonglong auto_increment_value;
- ulonglong max_data_file_length;
- ulonglong keys_in_use;
- ulonglong max_record_length;
- my_off_t search_after_block;
- my_off_t new_file_pos,key_file_blocks;
- my_off_t keydata,totaldata,key_blocks,start_check_pos;
- ha_rows total_records,total_deleted;
- ha_checksum record_checksum,glob_crc;
- ulong use_buffers,read_buffer_length,write_buffer_length,
- sort_buffer_length,sort_key_blocks;
- uint out_flag,warning_printed,error_printed,verbose;
- uint opt_sort_key,total_files,max_level;
- uint testflag, key_cache_block_size;
- uint8 language;
- my_bool using_global_keycache, opt_lock_memory, opt_follow_links;
- my_bool retry_repair, force_sort;
- char temp_filename[FN_REFLEN],*isam_file_name;
- MY_TMPDIR *tmpdir;
- int tmpfile_createflag;
- myf myf_rw;
- IO_CACHE read_cache;
+ pthread_t thr;
+ IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
+ DYNAMIC_ARRAY buffpek;
+ MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
+ MI_KEYDEF *keyinfo;
+ MI_SORT_INFO *sort_info;
+ HA_KEYSEG *seg;
+ uchar **sort_keys;
+ uchar *rec_buff;
+ void *wordlist, *wordptr;
+ MEM_ROOT wordroot;
+ uchar *record;
+ MY_TMPDIR *tmpdir;
+
/*
The next two are used to collect statistics, see update_key_parts for
description.
*/
- ulonglong unique_count[MI_MAX_KEY_SEG+1];
- ulonglong notnull_count[MI_MAX_KEY_SEG+1];
-
- ha_checksum key_crc[MI_MAX_POSSIBLE_KEY];
- ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY];
- void *thd;
- const char *db_name, *table_name;
- const char *op_name;
- enum_mi_stats_method stats_method;
-} MI_CHECK;
-
-typedef struct st_sort_ft_buf
-{
- uchar *buf, *end;
- int count;
- uchar lastkey[MI_MAX_KEY_BUFF];
-} SORT_FT_BUF;
+ ulonglong unique[HA_MAX_KEY_SEG+1];
+ ulonglong notnull[HA_MAX_KEY_SEG+1];
+
+ my_off_t pos,max_pos,filepos,start_recpos;
+ uint key, key_length,real_key_length,sortbuff_size;
+ uint maxbuffers, keys, find_length, sort_keys_length;
+ my_bool fix_datafile, master;
+ my_bool calc_checksum; /* calculate table checksum */
+
+ int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *);
+ int (*key_read)(struct st_mi_sort_param *,void *);
+ int (*key_write)(struct st_mi_sort_param *, const void *);
+ void (*lock_in_memory)(HA_CHECK *);
+ NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **,
+ uint , struct st_buffpek *, IO_CACHE *);
+ NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint);
+ NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,uchar *,
+ uint, uint);
+} MI_SORT_PARAM;
-typedef struct st_sort_info
-{
- my_off_t filelength,dupp,buff_length;
- ha_rows max_records;
- uint current_key, total_keys;
- myf myf_rw;
- enum data_file_type new_data_file_type;
- MI_INFO *info;
- MI_CHECK *param;
- uchar *buff;
- SORT_KEY_BLOCKS *key_block,*key_block_end;
- SORT_FT_BUF *ft_buf;
- /* sync things */
- uint got_error, threads_running;
-#ifdef THREAD
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-#endif
-} SORT_INFO;
/* functions in mi_check */
-void myisamchk_init(MI_CHECK *param);
-int chk_status(MI_CHECK *param, MI_INFO *info);
-int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag);
-int chk_size(MI_CHECK *param, MI_INFO *info);
-int chk_key(MI_CHECK *param, MI_INFO *info);
-int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend);
-int mi_repair(MI_CHECK *param, register MI_INFO *info,
+void myisamchk_init(HA_CHECK *param);
+int chk_status(HA_CHECK *param, MI_INFO *info);
+int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag);
+int chk_size(HA_CHECK *param, MI_INFO *info);
+int chk_key(HA_CHECK *param, MI_INFO *info);
+int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend);
+int mi_repair(HA_CHECK *param, register MI_INFO *info,
char * name, int rep_quick);
-int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name);
-int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
+int mi_sort_index(HA_CHECK *param, register MI_INFO *info, char * name);
+int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick);
-int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
+int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick);
int change_to_newfile(const char * filename, const char * old_ext,
const char * new_ext, uint raid_chunks,
myf myflags);
-int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type,
+int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type,
const char *filetype, const char *filename);
-void lock_memory(MI_CHECK *param);
-void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
+void lock_memory(HA_CHECK *param);
+void update_auto_increment_key(HA_CHECK *param, MI_INFO *info,
my_bool repair);
-int update_state_info(MI_CHECK *param, MI_INFO *info,uint update);
+int update_state_info(HA_CHECK *param, MI_INFO *info,uint update);
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
ulonglong *unique, ulonglong *notnull,
ulonglong records);
-int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
+int filecopy(HA_CHECK *param, File to,File from,my_off_t start,
my_off_t length, const char *type);
int movepoint(MI_INFO *info,uchar *record,my_off_t oldpos,
my_off_t newpos, uint prot_key);
-int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile);
+int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile);
int test_if_almost_full(MI_INFO *info);
-int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename);
+int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename);
void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows);
my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, ulonglong key_map,
my_bool force);
@@ -512,6 +415,13 @@ void mi_change_key_cache(KEY_CACHE *old_key_cache,
KEY_CACHE *new_key_cache);
int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves);
+int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile);
+int flush_pending_blocks(MI_SORT_PARAM *param);
+int sort_ft_buf_flush(MI_SORT_PARAM *sort_param);
+int thr_write_keys(MI_SORT_PARAM *sort_param);
+int sort_write_record(MI_SORT_PARAM *sort_param);
+int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/myisamchk.h b/include/myisamchk.h
new file mode 100644
index 00000000000..3c5d59884be
--- /dev/null
+++ b/include/myisamchk.h
@@ -0,0 +1,166 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Definitions needed for myisamchk/mariachk.c */
+
+/*
+ Entries marked as "QQ to be removed" are NOT used to
+ pass check/repair options to xxx_check.c. They are used
+ internally by xxxchk.c or/and ha_xxxx.cc and should NOT
+ be stored together with other flags. They should be removed
+ from the following list to make addition of new flags possible.
+*/
+
+#ifndef _myisamchk_h
+#define _myisamchk_h
+
+#define T_AUTO_INC 1
+#define T_AUTO_REPAIR 2 /* QQ to be removed */
+#define T_BACKUP_DATA 4
+#define T_CALC_CHECKSUM 8
+#define T_CHECK 16 /* QQ to be removed */
+#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */
+#define T_CREATE_MISSING_KEYS 64
+#define T_DESCRIPT 128
+#define T_DONT_CHECK_CHECKSUM 256
+#define T_EXTEND 512
+#define T_FAST (1L << 10) /* QQ to be removed */
+#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */
+#define T_FORCE_UNIQUENESS (1L << 12)
+#define T_INFO (1L << 13)
+#define T_MEDIUM (1L << 14)
+#define T_QUICK (1L << 15) /* QQ to be removed */
+#define T_READONLY (1L << 16) /* QQ to be removed */
+#define T_REP (1L << 17)
+#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */
+#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */
+#define T_RETRY_WITHOUT_QUICK (1L << 20)
+#define T_SAFE_REPAIR (1L << 21)
+#define T_SILENT (1L << 22)
+#define T_SORT_INDEX (1L << 23) /* QQ to be removed */
+#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */
+#define T_STATISTICS (1L << 25)
+#define T_UNPACK (1L << 26)
+#define T_UPDATE_STATE (1L << 27)
+#define T_VERBOSE (1L << 28)
+#define T_VERY_SILENT (1L << 29)
+#define T_WAIT_FOREVER (1L << 30)
+#define T_WRITE_LOOP ((ulong) 1L << 31)
+
+#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL)
+
+/*
+ Flags used by xxxxchk.c or/and ha_xxxx.cc that are NOT passed
+ to xxxcheck.c follows:
+*/
+
+#define TT_USEFRM 1
+#define TT_FOR_UPGRADE 2
+
+#define O_NEW_INDEX 1 /* Bits set in out_flag */
+#define O_NEW_DATA 2
+#define O_DATA_LOST 4
+
+typedef struct st_sort_key_blocks /* Used when sorting */
+{
+ uchar *buff, *end_pos;
+ uchar lastkey[HA_MAX_POSSIBLE_KEY_BUFF];
+ uint last_length;
+ int inited;
+} SORT_KEY_BLOCKS;
+
+
+/*
+ MARIA/MYISAM supports several statistics collection
+ methods. Currently statistics collection method is not stored in
+ MARIA file and has to be specified for each table analyze/repair
+ operation in MI_CHECK::stats_method.
+*/
+
+typedef enum
+{
+ /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */
+ MI_STATS_METHOD_NULLS_NOT_EQUAL,
+ /* Treat NULLs as equal when collecting statistics (like 4.0 did) */
+ MI_STATS_METHOD_NULLS_EQUAL,
+ /* Ignore NULLs - count only tuples without NULLs in the index components */
+ MI_STATS_METHOD_IGNORE_NULLS
+} enum_handler_stats_method;
+
+
+typedef struct st_handler_check_param
+{
+ char *isam_file_name;
+ MY_TMPDIR *tmpdir;
+ void *thd;
+ const char *db_name, *table_name, *op_name;
+ ulonglong auto_increment_value;
+ ulonglong max_data_file_length;
+ ulonglong keys_in_use;
+ ulonglong max_record_length;
+ /*
+ The next two are used to collect statistics, see update_key_parts for
+ description.
+ */
+ ulonglong unique_count[HA_MAX_KEY_SEG + 1];
+ ulonglong notnull_count[HA_MAX_KEY_SEG + 1];
+
+ my_off_t search_after_block;
+ my_off_t new_file_pos, key_file_blocks;
+ my_off_t keydata, totaldata, key_blocks, start_check_pos;
+ my_off_t used, empty, splits, del_length, link_used;
+ ha_rows total_records, total_deleted, records,del_blocks;
+ ha_rows full_page_count, tail_count;
+ ha_checksum record_checksum, glob_crc;
+ ha_checksum key_crc[HA_MAX_POSSIBLE_KEY];
+ ha_checksum tmp_key_crc[HA_MAX_POSSIBLE_KEY];
+ ha_checksum tmp_record_checksum;
+ ulonglong org_key_map;
+ size_t use_buffers, read_buffer_length, write_buffer_length;
+ size_t sort_buffer_length, sort_key_blocks;
+ ulong rec_per_key_part[HA_MAX_KEY_SEG * HA_MAX_POSSIBLE_KEY];
+ double new_rec_per_key_part[HA_MAX_KEY_SEG * HA_MAX_POSSIBLE_KEY];
+ uint out_flag, warning_printed, error_printed, verbose;
+ uint opt_sort_key, total_files, max_level;
+ uint testflag, key_cache_block_size, pagecache_block_size;
+ int tmpfile_createflag, err_count;
+ myf myf_rw;
+ uint8 language;
+ my_bool using_global_keycache, opt_lock_memory, opt_follow_links;
+ my_bool retry_repair, force_sort, calc_checksum, static_row_size;
+ char temp_filename[FN_REFLEN];
+ IO_CACHE read_cache;
+ enum_handler_stats_method stats_method;
+} HA_CHECK;
+
+
+typedef struct st_sort_ftbuf
+{
+ uchar *buf, *end;
+ int count;
+ uchar lastkey[HA_MAX_KEY_BUFF];
+} SORT_FT_BUF;
+
+
+typedef struct st_buffpek {
+ my_off_t file_pos; /* Where we are in the sort file */
+ uchar *base, *key; /* Key pointers */
+ ha_rows count; /* Number of rows in table */
+ ulong mem_count; /* numbers of keys in memory */
+ ulong max_keys; /* Max keys in buffert */
+} BUFFPEK;
+
+#endif /* _myisamchk_h */
diff --git a/include/mysql_com.h b/include/mysql_com.h
index 7eefad44716..06f568fe06b 100644
--- a/include/mysql_com.h
+++ b/include/mysql_com.h
@@ -365,11 +365,7 @@ void my_net_set_read_timeout(NET *net, uint timeout);
struct sockaddr;
int my_connect(my_socket s, const struct sockaddr *name, unsigned int namelen,
unsigned int timeout);
-
-struct rand_struct {
- unsigned long seed1,seed2,max_value;
- double max_value_dbl;
-};
+struct my_rnd_struct;
#ifdef __cplusplus
}
@@ -423,10 +419,8 @@ extern "C" {
implemented in sql/password.c
*/
-void randominit(struct rand_struct *, unsigned long seed1,
- unsigned long seed2);
-double my_rnd(struct rand_struct *);
-void create_random_string(char *to, unsigned int length, struct rand_struct *rand_st);
+void create_random_string(char *to, unsigned int length,
+ struct my_rnd_struct *rand_st);
void hash_password(unsigned long *to, const char *password, unsigned int password_len);
void make_scrambled_password_323(char *to, const char *password);
diff --git a/include/wqueue.h b/include/wqueue.h
new file mode 100644
index 00000000000..bacabb8c401
--- /dev/null
+++ b/include/wqueue.h
@@ -0,0 +1,26 @@
+
+#ifndef _wqueue_h
+#define _wqueue_h
+
+#include <my_global.h>
+#include <my_pthread.h>
+
+/* info about requests in a waiting queue */
+typedef struct st_pagecache_wqueue
+{
+ struct st_my_thread_var *last_thread; /* circular list of waiting
+ threads */
+} WQUEUE;
+
+#ifdef THREAD
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_add_and_wait(WQUEUE *wqueue,
+ struct st_my_thread_var *thread,
+ pthread_mutex_t *lock);
+void wqueue_release_queue(WQUEUE *wqueue);
+
+#endif
+
+#endif
diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt
index b761907ec9d..8ed723ea419 100755
--- a/libmysql/CMakeLists.txt
+++ b/libmysql/CMakeLists.txt
@@ -71,10 +71,10 @@ SET(CLIENT_SOURCES ../mysys/array.c ../strings/bchange.c ../strings/bmove.c
../strings/ctype-simple.c ../strings/ctype-sjis.c ../strings/ctype-tis620.c
../strings/ctype-uca.c ../strings/ctype-ucs2.c ../strings/ctype-ujis.c
../strings/ctype-utf8.c ../strings/ctype-win1250ch.c ../strings/ctype.c
- ../mysys/default.c errmsg.c ../mysys/errors.c
+ ../mysys/default.c errmsg.c ../mysys/errors.c ../mysys/my_sync.c
../mysys/hash.c ../mysys/my_sleep.c ../mysys/default_modify.c
get_password.c ../strings/int2str.c ../strings/is_prefix.c
- libmysql.c ../mysys/list.c ../strings/llstr.c
+ libmysql.c ../mysys/list.c ../strings/llstr.c ../mysys/my_rnd.c
../strings/longlong2str.c manager.c ../mysys/mf_arr_appstr.c ../mysys/mf_cache.c
../mysys/mf_dirname.c ../mysys/mf_fn_ext.c ../mysys/mf_format.c
../mysys/mf_iocache.c ../mysys/mf_iocache2.c ../mysys/mf_loadpath.c
diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared
index 53b24c4fb42..cbee8673164 100644
--- a/libmysql/Makefile.shared
+++ b/libmysql/Makefile.shared
@@ -66,9 +66,9 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
my_compress.lo array.lo my_once.lo list.lo my_net.lo \
charset.lo charset-def.lo hash.lo mf_iocache.lo \
mf_iocache2.lo my_seek.lo my_sleep.lo \
- my_pread.lo mf_cache.lo md5.lo sha1.lo \
+ my_pread.lo mf_cache.lo md5.lo sha1.lo my_rnd.lo \
my_getopt.lo my_gethostbyname.lo my_port.lo \
- my_rename.lo my_chsize.lo my_getsystime.lo
+ my_rename.lo my_chsize.lo my_sync.lo my_getsystime.lo
sqlobjects = net.lo
sql_cmn_objects = pack.lo client.lo my_time.lo
diff --git a/mysql-test/extra/rpl_tests/rpl_flsh_tbls.test b/mysql-test/extra/rpl_tests/rpl_flsh_tbls.test
index ee6b0ed1426..209ef6189d1 100644
--- a/mysql-test/extra/rpl_tests/rpl_flsh_tbls.test
+++ b/mysql-test/extra/rpl_tests/rpl_flsh_tbls.test
@@ -9,9 +9,9 @@
let $SERVER_VERSION=`select version()`;
-create table t1 (a int);
+create table t1 (a int) ENGINE=MyISAM;
insert into t1 values (10);
-create table t2 (a int);
+create table t2 (a int) ENGINE=MyISAM;
create table t3 (a int) engine=merge union(t1);
create table t4 (a int);
# We force the slave to open t3 (because we want to try confusing him) with this :
diff --git a/mysql-test/extra/rpl_tests/rpl_insert_delayed.test b/mysql-test/extra/rpl_tests/rpl_insert_delayed.test
index e492903afad..16e6bb3c960 100644
--- a/mysql-test/extra/rpl_tests/rpl_insert_delayed.test
+++ b/mysql-test/extra/rpl_tests/rpl_insert_delayed.test
@@ -17,7 +17,7 @@ select @@global.binlog_format;
# happened only in statement-based binlogging.
#
-CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64));
+CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64)) ENGINE=MyISAM;
let $query = "INSERT DELAYED INTO t1 VALUES (null, 'Dr. No'), (null, 'From Russia With Love'), (null, 'Goldfinger'), (null, 'Thunderball'), (null, 'You Only Live Twice')";
--exec $MYSQL_SLAP --silent --concurrency=5 --iterations=200 --query=$query --delimiter=";"
diff --git a/mysql-test/include/have_maria.inc b/mysql-test/include/have_maria.inc
new file mode 100644
index 00000000000..955e2305ca5
--- /dev/null
+++ b/mysql-test/include/have_maria.inc
@@ -0,0 +1,4 @@
+-- require r/have_maria.require
+disable_query_log;
+show variables like "have_maria";
+enable_query_log;
diff --git a/mysql-test/include/maria_empty_logs.inc b/mysql-test/include/maria_empty_logs.inc
new file mode 100644
index 00000000000..7613cea2a52
--- /dev/null
+++ b/mysql-test/include/maria_empty_logs.inc
@@ -0,0 +1,33 @@
+# Maria help script.
+# Cleans up all logs to give recovery a fresh start.
+
+# API: none, just uses vardir, port and socket.
+
+connection admin;
+
+-- echo * shut down mysqld, removed logs, restarted it
+append_file $MYSQLTEST_VARDIR/tmp/master0.expect;
+wait-maria_empty_logs.inc
+EOF
+
+--exec $MYSQLADMIN --no-defaults -S $MASTER_MYSOCK -P $MASTER_MYPORT -u root --password= shutdown 2>&1;
+
+remove_file $MYSQLTEST_VARDIR/master-data/maria_log_control;
+remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000001;
+-- error 0,1 # maybe there is just one log
+remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000002;
+# Hope there were not more than these logs.
+
+-- error 0,1
+remove_file $MYSQLTEST_VARDIR/master-data/maria_recovery.trace;
+
+append_file $MYSQLTEST_VARDIR/tmp/master0.expect;
+restart-maria_empty_logs.inc
+EOF
+
+--source include/wait_until_connected_again.inc
+
+connection default;
+# the effect of "use" is lost after a restart so we are back into db "test",
+# because connection 'default' was created with db "test".
+use mysqltest;
diff --git a/mysql-test/include/maria_make_snapshot.inc b/mysql-test/include/maria_make_snapshot.inc
new file mode 100644
index 00000000000..b457f3e1a68
--- /dev/null
+++ b/mysql-test/include/maria_make_snapshot.inc
@@ -0,0 +1,48 @@
+# Maria helper script
+# Copies table' data and index file to other directory, or back, or compares.
+# The other directory looks like a database directory, so that we can
+# read copies from inside mysqld, that's also why we copy the frm.
+
+# "mms" is a namespace for Maria_Make_Snapshot
+
+# API:
+# 1) set one of
+# $mms_copy : to copy table from database to spare directory
+# $mms_reverse : to copy it back
+# $mms_compare_physically : to compare both byte-for-byte
+# 2) set $mms_table_to_use to a number N: table will be mysqltest.tN
+# 3) set $mms_purpose to say what this copy is for (influences the naming
+# of the spare directory).
+
+if ($mms_copy)
+{
+ --echo * copied t$mms_table_to_use for $mms_purpose
+ copy_file $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAD $MYSQLTEST_VARDIR/master-data/mysqltest_for_$mms_purpose/t$mms_table_to_use.MAD;
+ copy_file $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAI $MYSQLTEST_VARDIR/master-data/mysqltest_for_$mms_purpose/t$mms_table_to_use.MAI;
+ copy_file $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.frm $MYSQLTEST_VARDIR/master-data/mysqltest_for_$mms_purpose/t$mms_table_to_use.frm;
+}
+
+if ($mms_reverse_copy)
+{
+ # do not call this without flushing target table first!
+ --echo * copied t$mms_table_to_use back for $mms_purpose
+ -- error 0,1
+ remove_file $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAD;
+ copy_file $MYSQLTEST_VARDIR/master-data/mysqltest_for_$mms_purpose/t$mms_table_to_use.MAD $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAD;
+ -- error 0,1
+ remove_file $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAI;
+ copy_file $MYSQLTEST_VARDIR/master-data/mysqltest_for_$mms_purpose/t$mms_table_to_use.MAI $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAI;
+}
+
+if ($mms_compare_physically)
+{
+ # After the UNDO phase this is normally impossible
+ # (UNDO execution has created new log records => pages have new LSNs).
+ # So, do this only when testing REDO phase.
+ # If UNDO phase, we nevertheless compare checksums
+ # (see maria_verify_recovery.inc).
+ --echo * compared t$mms_table_to_use to old version
+ diff_files $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAD $MYSQLTEST_VARDIR/master-data/mysqltest_for_$mms_purpose/t$mms_table_to_use.MAD;
+# index file not yet recovered
+# diff_files $MYSQLTEST_VARDIR/master-data/mysqltest/t$mms_table_to_use.MAI $MYSQLTEST_VARDIR/master-data/mysqltest_for_$mms_purpose/t$mms_table_to_use.MAI;
+}
diff --git a/mysql-test/include/maria_make_snapshot_for_comparison.inc b/mysql-test/include/maria_make_snapshot_for_comparison.inc
new file mode 100644
index 00000000000..71b821b5212
--- /dev/null
+++ b/mysql-test/include/maria_make_snapshot_for_comparison.inc
@@ -0,0 +1,30 @@
+# Maria helper script
+# Copies clean tables' data and index file to other directory
+# Tables are t1...t[$mms_tables]
+# They are later used as a reference to see if recovery works.
+
+# API:
+# set $mms_tables to N, the script will cover tables mysqltest.t1,...tN
+
+connection admin;
+
+let $mms_table_to_use=$mms_tables;
+let $mms_purpose=comparison;
+let $mms_copy=1;
+
+--disable_query_log
+--disable_warnings
+eval drop database if exists mysqltest_for_$mms_purpose;
+--enable_warnings
+eval create database mysqltest_for_$mms_purpose;
+--enable_query_log
+
+while ($mms_table_to_use)
+{
+ # to serve as a reference, table must be in a clean state
+ eval flush table t$mms_table_to_use;
+ -- source include/maria_make_snapshot.inc
+ dec $mms_table_to_use;
+}
+let $mms_copy=0;
+connection default;
diff --git a/mysql-test/include/maria_make_snapshot_for_feeding_recovery.inc b/mysql-test/include/maria_make_snapshot_for_feeding_recovery.inc
new file mode 100644
index 00000000000..2ca6fb07808
--- /dev/null
+++ b/mysql-test/include/maria_make_snapshot_for_feeding_recovery.inc
@@ -0,0 +1,35 @@
+# Maria helper script
+# Copies tables' data and index file to other directory, and control file.
+# Tables are t1...t[$mms_tables].
+# Later, mysqld is shutdown, and that snapshot is put back into the
+# datadir, control file too ("flashing recovery's brain"), and recovery is let
+# to run on it (see maria_verify_recovery.inc).
+
+# API:
+# set $mms_tables to N, the script will cover tables mysqltest.t1,...tN
+
+connection admin;
+
+let $mms_table_to_use=$mms_tables;
+let $mms_purpose=feeding_recovery;
+let $mms_copy=1;
+
+--disable_query_log
+--disable_warnings
+eval drop database if exists mysqltest_for_$mms_purpose;
+--enable_warnings
+eval create database mysqltest_for_$mms_purpose;
+--enable_query_log
+
+while ($mms_table_to_use)
+{
+ -- source include/maria_make_snapshot.inc
+ dec $mms_table_to_use;
+}
+let $mms_copy=0;
+
+-- error 0,1
+remove_file $MYSQLTEST_VARDIR/tmp/mms_for_$mms_purpose.maria_log_control;
+copy_file $MYSQLTEST_VARDIR/master-data/maria_log_control $MYSQLTEST_VARDIR/tmp/mms_for_$mms_purpose.maria_log_control;
+
+connection default;
diff --git a/mysql-test/include/maria_verify_recovery.inc b/mysql-test/include/maria_verify_recovery.inc
new file mode 100644
index 00000000000..48d799162a4
--- /dev/null
+++ b/mysql-test/include/maria_verify_recovery.inc
@@ -0,0 +1,96 @@
+# Maria helper script.
+# Runs recovery, compare with expected table data.
+
+# API:
+# 1) set $mms_tables to N, the script will cover tables mysqltest.t1,...tN
+# 2) set $mvr_debug_option to the crash way
+# 3) set $mvr_crash_statement to the statement which will trigger a crash
+# 4) set $mvr_restore_old_snapshot to 1 if you want recovery to run on
+# an old copy of tables and of the control file, 0 for normal recovery.
+# 5) set $mms_compare_physically to 1 if you want a physical byte-for-byte
+# comparison with expected table. Checksum comparison is always done.
+# "mvr" is a namespace for Maria_Verify_Recovery
+
+connection admin;
+
+# we may do a copy-back of tables before comparison, so save comparison
+# request made by caller:
+let $mms_compare_physically_save=$mms_compare_physically;
+let $mms_compare_physically=0;
+
+# warn mtr that mysqld is going to die and should not be restarted immediately
+#append_file $MYSQLTEST_VARDIR/tmp/master0.expect;
+#wait-maria_verify_recovery.inc
+#EOF
+# todo: remove this "system" and uncomment above when BUG#32296 is fixed
+system echo wait-maria_verify_recovery.inc >> $MYSQLTEST_VARDIR/tmp/master0.expect;
+
+# flush page cache and log, only log, or nothing, and kill mysqld with
+# abort().
+# When we restore an old snapshot, we could just kill mysqld nicely,
+# but that would implicitely commit all work, which the tester may
+# not want (tester may want to observe rollback happening).
+
+eval SET SESSION debug=$mvr_debug_option;
+--echo * crashing mysqld intentionally
+--error 2013
+eval $mvr_crash_statement; # this will crash (DBUG magic)
+
+if ($mvr_restore_old_snapshot)
+{
+
+ # copy snapshot made by maria_make_snapshot_for_feeding_recovery back
+ # into datadir.
+
+ let $mms_table_to_use=$mms_tables;
+ let $mms_purpose=feeding_recovery;
+ let $mms_reverse_copy=1;
+ while ($mms_table_to_use)
+ {
+ -- source include/maria_make_snapshot.inc
+ dec $mms_table_to_use;
+ }
+ let $mms_reverse_copy=0;
+
+ # also copy back control file, to force recovery to start from an early
+ # point, ignoring further checkpoints.
+ -- error 0,1
+ remove_file $MYSQLTEST_VARDIR/master-data/maria_log_control;
+ copy_file $MYSQLTEST_VARDIR/tmp/mms_for_$mms_purpose.maria_log_control $MYSQLTEST_VARDIR/master-data/maria_log_control;
+}
+
+--echo * recovery happens
+# let mtr restart mysqld (and thus execute the maria log)
+#append_file $MYSQLTEST_VARDIR/tmp/master0.expect;
+#restart-maria_verify_recovery.inc
+#EOF
+system echo restart-maria_verify_recovery.inc >> $MYSQLTEST_VARDIR/tmp/master0.expect;
+
+--source include/wait_until_connected_again.inc
+
+# Compare that tables of $mms_tables are identical to old.
+# We always compare with CHECKSUM TABLE, and if requested (which makes sense
+# only for testing the REDO phase, as UNDO phase generates new records so new
+# LSNs on pages.) with a physical byte-for-byte comparison.
+let $mms_table_to_use=$mms_tables;
+let $mms_purpose=comparison;
+let $mms_compare_physically=$mms_compare_physically_save;
+while ($mms_table_to_use)
+{
+ eval check table t$mms_table_to_use extended;
+ --echo * testing that checksum after recovery is as expected
+ let $new_checksum=`CHECKSUM TABLE t$mms_table_to_use`;
+ let $old_checksum=`CHECKSUM TABLE mysqltest_for_$mms_purpose.t$mms_table_to_use`;
+ # the $ text variables above are of the form "db.tablename\tchecksum",
+ # as db differs, we use substring().
+ --disable_query_log
+ eval select if(substring("$new_checksum",instr("$new_checksum",".t1")) = substring("$old_checksum",instr("$old_checksum",".t1")),"ok","failure") as "Checksum-check";
+ --enable_query_log
+ # this script may compare physically or do nothing
+ -- source include/maria_make_snapshot.inc
+ dec $mms_table_to_use;
+}
+
+connection default;
+# the effect of "use" is lost after a restart so we are back into db "test"
+use mysqltest;
diff --git a/mysql-test/include/ps_conv.inc b/mysql-test/include/ps_conv.inc
index 195d1061664..8cbe9450063 100644
--- a/mysql-test/include/ps_conv.inc
+++ b/mysql-test/include/ps_conv.inc
@@ -52,7 +52,7 @@ set @arg14= 'abc';
set @arg14= NULL ;
set @arg15= CAST('abc' as binary) ;
set @arg15= NULL ;
-create table t5 as select
+eval create table t5 engine = MyISAM as select
8 as const01, @arg01 as param01,
8.0 as const02, @arg02 as param02,
80.00000000000e-1 as const03, @arg03 as param03,
diff --git a/mysql-test/include/wait_until_connected_again.inc b/mysql-test/include/wait_until_connected_again.inc
index dc96f646cb3..d803dd2f699 100644
--- a/mysql-test/include/wait_until_connected_again.inc
+++ b/mysql-test/include/wait_until_connected_again.inc
@@ -1,12 +1,14 @@
#
# Include this script to wait until the connection to the
-# server has been restored or timeout occurs
+# server has been restored or timeout occurs.
+# You should have done --enable_reconnect first
--disable_result_log
--disable_query_log
let $counter= 500;
+let $mysql_errno= 1;
while ($mysql_errno)
{
- --error 0,2002,2006
+ --error 0,2002,2003,2006
show status;
dec $counter;
diff --git a/mysql-test/lib/mtr_process.pl b/mysql-test/lib/mtr_process.pl
index 8fd900330da..566ccfe0778 100644
--- a/mysql-test/lib/mtr_process.pl
+++ b/mysql-test/lib/mtr_process.pl
@@ -474,12 +474,6 @@ sub mtr_kill_leftovers () {
}
}
}
- else
- {
- mtr_warning("Found non pid file $elem in $rundir")
- if -f "$rundir/$elem";
- next;
- }
}
closedir(RUNDIR);
@@ -886,15 +880,33 @@ sub check_expected_crash_and_restart($)
mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
$mysqld->{'pid'}= 0;
- # Check if crash expected and restart if it was
+ # Check if crash expected, and restart if it was
my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" .
"$mysqld->{'idx'}" . ".expect";
- if ( -f $expect_file )
+ while ( 1 )
{
- mtr_verbose("Crash was expected, file $expect_file exists");
- mysqld_start($mysqld, $mysqld->{'start_opts'},
- $mysqld->{'start_slave_master_info'});
- unlink($expect_file);
+ if ( -f $expect_file )
+ {
+ mtr_verbose("Crash was expected, file $expect_file exists");
+ my $expect_file_handler;
+ open($expect_file_handler, "<$expect_file") or die;
+ my @expect_lines= <$expect_file_handler>;
+ close $expect_file_handler;
+ # look at most recent order by the test
+ my $expect_content= pop @expect_lines;
+ chomp $expect_content;
+ if ( $expect_content =~ /^wait/ )
+ {
+ mtr_verbose("Test asks that we wait before restart");
+ # Millisceond sleep emulated with select
+ select(undef, undef, undef, (0.1));
+ next;
+ }
+ unlink($expect_file);
+ mysqld_start($mysqld, $mysqld->{'start_opts'},
+ $mysqld->{'start_slave_master_info'});
+ }
+ last;
}
return;
@@ -914,8 +926,8 @@ sub check_expected_crash_and_restart($)
if ( -f $expect_file )
{
mtr_verbose("Crash was expected, file $expect_file exists");
- ndbmgmd_start($cluster);
unlink($expect_file);
+ ndbmgmd_start($cluster);
}
return;
}
@@ -933,9 +945,9 @@ sub check_expected_crash_and_restart($)
if ( -f $expect_file )
{
mtr_verbose("Crash was expected, file $expect_file exists");
+ unlink($expect_file);
ndbd_start($cluster, $ndbd->{'idx'},
$ndbd->{'start_extra_args'});
- unlink($expect_file);
}
return;
}
diff --git a/mysql-test/lib/mtr_report.pl b/mysql-test/lib/mtr_report.pl
index 73598fc1bac..310acd9b3ac 100644
--- a/mysql-test/lib/mtr_report.pl
+++ b/mysql-test/lib/mtr_report.pl
@@ -216,7 +216,7 @@ sub mtr_report_stats ($) {
# the "var/log/*.err" files. We save this info in "var/log/warnings"
# ----------------------------------------------------------------------
- if ( ! $::glob_use_running_server )
+ if ( ! $::glob_use_running_server && !$::opt_extern)
{
# Save and report if there was any fatal warnings/errors in err logs
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index fb94cb80ee5..7e6094d6592 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -175,6 +175,7 @@ our $opt_big_test= 0;
our @opt_combination;
our @opt_extra_mysqld_opt;
+our @opt_extra_mysqltest_opt;
our $opt_compress;
our $opt_ssl;
@@ -590,6 +591,9 @@ sub command_line_setup () {
# Extra options used when starting mysqld
'mysqld=s' => \@opt_extra_mysqld_opt,
+ # Extra options used when starting mysqld
+ 'mysqltest=s' => \@opt_extra_mysqltest_opt,
+
# Run test on running server
'extern' => \$opt_extern,
'ndb-connectstring=s' => \$opt_ndbconnectstring,
@@ -954,7 +958,7 @@ sub command_line_setup () {
# --------------------------------------------------------------------------
if ($opt_extern)
{
- mtr_report("Disable instance manager when running with extern mysqld");
+ # mtr_report("Disable instance manager when running with extern mysqld");
$opt_skip_im= 1;
}
elsif ( $mysql_version_id < 50000 )
@@ -1323,19 +1327,6 @@ sub command_line_setup () {
$path_ndb_testrun_log= "$opt_vardir/log/ndb_testrun.log";
$path_snapshot= "$opt_tmpdir/snapshot_$opt_master_myport/";
-
- if ( $opt_valgrind and $opt_debug )
- {
- # When both --valgrind and --debug is selected, send
- # all output to the trace file, making it possible to
- # see the exact location where valgrind complains
- foreach my $mysqld (@{$master}, @{$slave})
- {
- my $sidx= $mysqld->{idx} ? "$mysqld->{idx}" : "";
- $mysqld->{path_myerr}=
- "$opt_vardir/log/" . $mysqld->{type} . "$sidx.trace";
- }
- }
}
#
@@ -2117,7 +2108,10 @@ sub environment_setup () {
$ENV{'MYSQL_FIX_SYSTEM_TABLES'}= $cmdline_mysql_fix_system_tables;
}
- $ENV{'MYSQL_FIX_PRIVILEGE_TABLES'}= $file_mysql_fix_privilege_tables;
+ if ( !$opt_extern )
+ {
+ $ENV{'MYSQL_FIX_PRIVILEGE_TABLES'}= $file_mysql_fix_privilege_tables;
+ }
# ----------------------------------------------------
# Setup env so childs can execute my_print_defaults
@@ -2417,6 +2411,25 @@ sub setup_vardir() {
{
unlink($name);
}
+ if ( $opt_valgrind and $opt_debug )
+ {
+ # When both --valgrind and --debug is selected, send
+ # all output to the trace file, making it possible to
+ # see the exact location where valgrind complains
+ foreach my $mysqld (@{$master}, @{$slave})
+ {
+ my $sidx= $mysqld->{idx} ? "$mysqld->{idx}" : "";
+ my $trace_name= "$opt_vardir/log/" . $mysqld->{type} . "$sidx.trace";
+ open(LOG, ">$mysqld->{path_myerr}") or die "Can't create $mysqld->{path_myerr}\n";
+ print LOG "
+NOTE: When running with --valgrind --debug the output from the .err file is
+stored together with the trace file to make it easier to find the exact
+position for valgrind errors.
+See trace file $trace_name.\n";
+ close(LOG);
+ $mysqld->{path_myerr}= $trace_name;
+ }
+ }
}
@@ -3102,6 +3115,7 @@ sub install_db ($$) {
mtr_add_arg($args, "--datadir=%s", $data_dir);
mtr_add_arg($args, "--loose-skip-innodb");
mtr_add_arg($args, "--loose-skip-ndbcluster");
+ mtr_add_arg($args, "--sync-frm=0");
mtr_add_arg($args, "--tmpdir=.");
mtr_add_arg($args, "--core-file");
@@ -3857,6 +3871,7 @@ sub mysqld_arguments ($$$$) {
mtr_add_arg($args, "%s--datadir=%s", $prefix,
$mysqld->{'path_myddir'});
+ mtr_add_arg($args, "--sync-frm=0"); # Faster test
if ( $mysql_version_id >= 50106 )
{
@@ -4843,6 +4858,11 @@ sub run_mysqltest ($) {
mtr_add_arg($args, "--skip-ssl");
}
+ foreach my $arg ( @opt_extra_mysqltest_opt )
+ {
+ mtr_add_arg($args, "%s", $arg);
+ }
+
# ----------------------------------------------------------------------
# If embedded server, we create server args to give mysqltest to pass on
# ----------------------------------------------------------------------
@@ -4959,12 +4979,7 @@ sub gdb_arguments {
{
# write init file for mysqld
mtr_tofile($gdb_init_file,
- "set args $str\n" .
- "break mysql_parse\n" .
- "commands 1\n" .
- "disable 1\n" .
- "end\n" .
- "run");
+ "set args $str\n");
}
if ( $opt_manual_gdb )
@@ -5024,11 +5039,7 @@ sub ddd_arguments {
# write init file for mysqld
mtr_tofile($gdb_init_file,
"file $$exe\n" .
- "set args $str\n" .
- "break mysql_parse\n" .
- "commands 1\n" .
- "disable 1\n" .
- "end");
+ "set args $str\n");
}
if ( $opt_manual_ddd )
diff --git a/mysql-test/r/alter_table.result b/mysql-test/r/alter_table.result
index 68b636829fc..b536d343615 100644
--- a/mysql-test/r/alter_table.result
+++ b/mysql-test/r/alter_table.result
@@ -298,7 +298,7 @@ t1 0 a 1 a A 3 NULL NULL YES BTREE
t1 0 a 2 b A 300 NULL NULL YES BTREE
t1 1 b 1 b A 100 NULL NULL YES BTREE
drop table t1;
-CREATE TABLE t1 (i int(10), index(i) );
+CREATE TABLE t1 (i int(10), index(i) ) ENGINE=MyISAM;
ALTER TABLE t1 DISABLE KEYS;
INSERT DELAYED INTO t1 VALUES(1),(2),(3);
ALTER TABLE t1 ENABLE KEYS;
diff --git a/mysql-test/r/binlog_unsafe.result b/mysql-test/r/binlog_unsafe.result
index 47284ed8bc3..8467a18aa6b 100644
--- a/mysql-test/r/binlog_unsafe.result
+++ b/mysql-test/r/binlog_unsafe.result
@@ -11,3 +11,4 @@ Level Warning
Code 1592
Message Statement is not safe to log in statement format.
DROP TABLE t1,t2,t3;
+DROP VIEW v1;
diff --git a/mysql-test/r/create.result b/mysql-test/r/create.result
index 0613c9ba488..bbae455c91a 100644
--- a/mysql-test/r/create.result
+++ b/mysql-test/r/create.result
@@ -1717,7 +1717,7 @@ t1 CREATE TABLE `t1` (
`TIME` bigint(7) NOT NULL DEFAULT '0',
`STATE` varchar(64) DEFAULT NULL,
`INFO` longtext
-) ENGINE=MyISAM DEFAULT CHARSET=utf8
+) ENGINE=MARIA DEFAULT CHARSET=utf8
drop table t1;
create temporary table t1 like information_schema.processlist;
show create table t1;
@@ -1731,7 +1731,7 @@ t1 CREATE TEMPORARY TABLE `t1` (
`TIME` bigint(7) NOT NULL DEFAULT '0',
`STATE` varchar(64) DEFAULT NULL,
`INFO` longtext
-) ENGINE=MyISAM DEFAULT CHARSET=utf8
+) ENGINE=MARIA DEFAULT CHARSET=utf8
drop table t1;
create table t1 like information_schema.character_sets;
show create table t1;
diff --git a/mysql-test/r/have_maria.require b/mysql-test/r/have_maria.require
new file mode 100644
index 00000000000..02988af6976
--- /dev/null
+++ b/mysql-test/r/have_maria.require
@@ -0,0 +1,2 @@
+Variable_name Value
+have_maria YES
diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result
index d0c586de8d4..0ca4ffc943e 100644
--- a/mysql-test/r/innodb.result
+++ b/mysql-test/r/innodb.result
@@ -1416,7 +1416,7 @@ insert t2 select * from t1;
insert t3 select * from t1;
checksum table t1, t2, t3, t4 quick;
Table Checksum
-test.t1 2948697075
+test.t1 3442722830
test.t2 NULL
test.t3 NULL
test.t4 NULL
@@ -1424,17 +1424,17 @@ Warnings:
Error 1146 Table 'test.t4' doesn't exist
checksum table t1, t2, t3, t4;
Table Checksum
-test.t1 2948697075
-test.t2 2948697075
-test.t3 2948697075
+test.t1 3442722830
+test.t2 3442722830
+test.t3 3442722830
test.t4 NULL
Warnings:
Error 1146 Table 'test.t4' doesn't exist
checksum table t1, t2, t3, t4 extended;
Table Checksum
-test.t1 2948697075
-test.t2 2948697075
-test.t3 2948697075
+test.t1 3442722830
+test.t2 3442722830
+test.t3 3442722830
test.t4 NULL
Warnings:
Error 1146 Table 'test.t4' doesn't exist
diff --git a/mysql-test/r/maria-big.result b/mysql-test/r/maria-big.result
new file mode 100644
index 00000000000..8f7bbc74123
--- /dev/null
+++ b/mysql-test/r/maria-big.result
@@ -0,0 +1,63 @@
+set storage_engine=maria;
+affected rows: 0
+set global maria_log_file_size=4294967295;
+affected rows: 0
+drop table if exists t1, t2;
+affected rows: 0
+create table t1(a char(3));
+affected rows: 0
+insert into t1 values("abc");
+affected rows: 1
+insert into t1 select "def" from t1;
+affected rows: 1
+info: Records: 1 Duplicates: 0 Warnings: 0
+insert into t1 select "ghi" from t1;
+affected rows: 2
+info: Records: 2 Duplicates: 0 Warnings: 0
+insert into t1 select "jkl" from t1;
+affected rows: 4
+info: Records: 4 Duplicates: 0 Warnings: 0
+insert into t1 select "mno" from t1;
+affected rows: 8
+info: Records: 8 Duplicates: 0 Warnings: 0
+insert into t1 select "pqr" from t1;
+affected rows: 16
+info: Records: 16 Duplicates: 0 Warnings: 0
+insert into t1 select "stu" from t1;
+affected rows: 32
+info: Records: 32 Duplicates: 0 Warnings: 0
+insert into t1 select "vwx" from t1;
+affected rows: 64
+info: Records: 64 Duplicates: 0 Warnings: 0
+insert into t1 select "yza" from t1;
+affected rows: 128
+info: Records: 128 Duplicates: 0 Warnings: 0
+insert into t1 select "ceg" from t1;
+affected rows: 256
+info: Records: 256 Duplicates: 0 Warnings: 0
+insert into t1 select "ikm" from t1;
+affected rows: 512
+info: Records: 512 Duplicates: 0 Warnings: 0
+insert into t1 select "oqs" from t1;
+affected rows: 1024
+info: Records: 1024 Duplicates: 0 Warnings: 0
+select count(*) from t1;
+count(*)
+2048
+affected rows: 1
+insert into t1 select "uwy" from t1;
+affected rows: 2048
+info: Records: 2048 Duplicates: 0 Warnings: 0
+create table t2 select * from t1;
+affected rows: 4096
+info: Records: 4096 Duplicates: 0 Warnings: 0
+select count(*) from t1;
+count(*)
+4096
+affected rows: 1
+select count(*) from t2;
+count(*)
+4096
+affected rows: 1
+drop table t1, t2;
+affected rows: 0
diff --git a/mysql-test/r/maria-connect.result b/mysql-test/r/maria-connect.result
new file mode 100644
index 00000000000..93beb1f13c5
--- /dev/null
+++ b/mysql-test/r/maria-connect.result
@@ -0,0 +1,24 @@
+set global storage_engine=maria;
+set session storage_engine=maria;
+set global maria_log_file_size=4294967295;
+drop table if exists t1;
+SET SQL_WARNINGS=1;
+RESET MASTER;
+set binlog_format=statement;
+CREATE TABLE t1 (a int primary key);
+insert t1 values (1),(2),(3);
+insert t1 values (4),(2),(5);
+ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
+select * from t1;
+a
+1
+2
+3
+4
+SHOW BINLOG EVENTS FROM 106;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 106 Query 1 204 use `test`; CREATE TABLE t1 (a int primary key)
+master-bin.000001 204 Query 1 295 use `test`; insert t1 values (1),(2),(3)
+master-bin.000001 295 Query 1 386 use `test`; insert t1 values (4),(2),(5)
+drop table t1;
+set binlog_format=default;
diff --git a/mysql-test/r/maria-purge.result b/mysql-test/r/maria-purge.result
new file mode 100644
index 00000000000..ade5426b327
--- /dev/null
+++ b/mysql-test/r/maria-purge.result
@@ -0,0 +1,92 @@
+set global storage_engine=maria;
+set session storage_engine=maria;
+set global maria_log_file_size=4294967296;
+drop table if exists t1,t2;
+SET SQL_WARNINGS=1;
+CREATE TABLE t1 (
+STRING_DATA char(255) default NULL
+);
+CREATE TABLE t2 (
+STRING_DATA char(255) default NULL
+);
+INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA');
+INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD');
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+set global maria_log_file_size=16777216;
+set global maria_checkpoint_interval=30;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000002 in use
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+set global maria_checkpoint_interval=30;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000004 in use
+set global maria_log_file_size=16777216;
+select @@global.maria_log_file_size;
+@@global.maria_log_file_size
+16777216
+set global maria_checkpoint_interval=30;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000004 in use
+set global maria_log_file_size=8388608;
+select @@global.maria_log_file_size;
+@@global.maria_log_file_size
+8388608
+set global maria_log_purge_type=at_flush;
+insert into t1 select * from t2;
+set global maria_checkpoint_interval=30;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000004 free
+maria master-data/maria_log.00000005 free
+maria master-data/maria_log.00000006 free
+maria master-data/maria_log.00000007 free
+maria master-data/maria_log.00000008 in use
+flush logs;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000008 in use
+set global maria_log_file_size=16777216;
+set global maria_log_purge_type=external;
+insert into t1 select * from t2;
+set global maria_checkpoint_interval=30;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000008 free
+maria master-data/maria_log.00000009 in use
+flush logs;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000008 free
+maria master-data/maria_log.00000009 in use
+set global maria_log_purge_type=immediate;
+insert into t1 select * from t2;
+set global maria_checkpoint_interval=30;
+SHOW ENGINE maria logs;
+Type Name Status
+maria master-data/maria_log.00000011 in use
+drop table t1, t2;
diff --git a/mysql-test/r/maria-recovery-bitmap.result b/mysql-test/r/maria-recovery-bitmap.result
new file mode 100644
index 00000000000..4eb1d2f491b
--- /dev/null
+++ b/mysql-test/r/maria-recovery-bitmap.result
@@ -0,0 +1,29 @@
+drop database if exists mysqltest;
+create database mysqltest;
+use mysqltest;
+* shut down mysqld, removed logs, restarted it
+use mysqltest;
+create table t1 (a varchar(10000)) engine=maria;
+* TEST of over-allocated bitmap not flushed by checkpoint
+insert into t1 values ("bbbbbbb");
+flush table t1;
+* copied t1 for comparison
+insert into t1 values ("bbbbbbb");
+delete from t1 limit 1;
+set session debug="+d,info,enter,exit,maria_over_alloc_bitmap";
+insert into t1 values ("aaaaaaaaa");
+set global maria_checkpoint_interval=1;
+SET SESSION debug="+d,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+drop database mysqltest_for_comparison;
+drop database mysqltest;
diff --git a/mysql-test/r/maria-recovery.result b/mysql-test/r/maria-recovery.result
new file mode 100644
index 00000000000..3a884dcae63
--- /dev/null
+++ b/mysql-test/r/maria-recovery.result
@@ -0,0 +1,219 @@
+set global maria_log_file_size=4294967295;
+drop database if exists mysqltest;
+create database mysqltest;
+use mysqltest;
+* shut down mysqld, removed logs, restarted it
+use mysqltest;
+create table t1 (a varchar(1000)) engine=maria;
+* TEST of REDO: see if recovery can reconstruct if we give it an old table
+* copied t1 for feeding_recovery
+insert into t1 values ("00000000");
+flush table t1;
+* copied t1 for comparison
+SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* copied t1 back for feeding_recovery
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+* compared t1 to old version
+use mysqltest;
+select * from t1;
+a
+00000000
+* TEST of REDO+UNDO: normal recovery test (no moving tables under its feet)
+insert into t1 values ("00000000");
+flush table t1;
+* copied t1 for comparison
+lock tables t1 write;
+insert into t1 values ("aaaaaaaaa");
+SET SESSION debug="+d,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+select * from t1;
+a
+00000000
+00000000
+insert into t1 values ("00000000");
+flush table t1;
+* copied t1 for comparison
+lock tables t1 write;
+insert into t1 values ("aaaaaaaaa");
+SET SESSION debug="+d,maria_flush_whole_page_cache,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+select * from t1;
+a
+00000000
+00000000
+00000000
+insert into t1 values ("00000000");
+flush table t1;
+* copied t1 for comparison
+lock tables t1 write;
+insert into t1 values ("aaaaaaaaa");
+SET SESSION debug="+d,maria_flush_states,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+select * from t1;
+a
+00000000
+00000000
+00000000
+00000000
+insert into t1 values ("00000000");
+flush table t1;
+* copied t1 for comparison
+lock tables t1 write;
+insert into t1 values ("aaaaaaaaa");
+SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+select * from t1;
+a
+00000000
+00000000
+00000000
+00000000
+00000000
+drop table t1;
+* TEST of two REDOs for same page in one REDO group
+* shut down mysqld, removed logs, restarted it
+use mysqltest;
+CREATE TABLE t1 (
+i int,
+b blob default NULL,
+c varchar(6000) default NULL
+) ENGINE=MARIA CHECKSUM=1;
+* copied t1 for feeding_recovery
+INSERT INTO t1 VALUES (1, REPEAT('a', 5000), REPEAT('b', 5000));
+UPDATE t1 SET i=3, b=CONCAT(b,'c') WHERE i=1;
+SELECT LENGTH(b) FROM t1 WHERE i=3;
+LENGTH(b)
+5001
+flush table t1;
+* copied t1 for comparison
+SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* copied t1 back for feeding_recovery
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+SELECT LENGTH(b) FROM t1 WHERE i=3;
+LENGTH(b)
+5001
+drop table t1;
+* TEST of INSERT vs state.auto_increment
+* shut down mysqld, removed logs, restarted it
+use mysqltest;
+CREATE TABLE t1 (
+i int auto_increment primary key,
+c varchar(6),
+key(c)
+) ENGINE=MARIA;
+insert into t1 values(null,"b");
+* copied t1 for feeding_recovery
+insert into t1 values(null,"a"), (null,"c"), (null,"d");
+delete from t1 where c="d";
+flush table t1;
+* copied t1 for comparison
+SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* copied t1 back for feeding_recovery
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ `c` varchar(6) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `c` (`c`)
+) ENGINE=MARIA AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
+* TEST of UPDATE vs state.auto_increment
+* copied t1 for feeding_recovery
+update t1 set i=15 where c="a";
+flush table t1;
+* copied t1 for comparison
+SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* copied t1 back for feeding_recovery
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ `c` varchar(6) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `c` (`c`)
+) ENGINE=MARIA AUTO_INCREMENT=16 DEFAULT CHARSET=latin1
+drop table t1;
+drop database mysqltest_for_feeding_recovery;
+drop database mysqltest_for_comparison;
+drop database mysqltest;
diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result
new file mode 100644
index 00000000000..ca72c687ec9
--- /dev/null
+++ b/mysql-test/r/maria.result
@@ -0,0 +1,2081 @@
+set global storage_engine=maria;
+set session storage_engine=maria;
+set global maria_log_file_size=4294967295;
+drop table if exists t1,t2;
+SET SQL_WARNINGS=1;
+CREATE TABLE t1 (
+STRING_DATA char(255) default NULL,
+KEY string_data (STRING_DATA)
+);
+INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA');
+INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD');
+INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF');
+INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG');
+INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH');
+INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW');
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a));
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+delete from t1 where (a & 1);
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+flush table t1;
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+drop table t1;
+create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b));
+insert into t1 (b) values (1),(2),(2),(2),(2);
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 0 PRIMARY 1 a A 5 NULL NULL BTREE
+t1 1 b 1 b A 1 NULL NULL BTREE
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status Table is already up to date
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 0 PRIMARY 1 a A 5 NULL NULL BTREE
+t1 1 b 1 b A 1 NULL NULL BTREE
+drop table t1;
+create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b));
+insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4);
+explain select * from t1 order by a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select * from t1 order by b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select * from t1 order by c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select a from t1 order by a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 4 NULL 4 Using index
+explain select b from t1 order by b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL b 4 NULL 4 Using index
+explain select a,b from t1 order by b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select a,b from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4
+explain select a,b,c from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4
+drop table t1;
+set autocommit=0;
+begin;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (2), (3);
+LOCK TABLES t1 WRITE;
+INSERT INTO t1 VALUES (1), (2), (3);
+commit;
+set autocommit=1;
+UNLOCK TABLES;
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+DROP TABLE t1;
+create table t1 ( t1 char(255), key(t1(250)));
+insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169');
+insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203');
+insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767');
+insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907');
+insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011');
+insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101');
+insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564');
+insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002');
+insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834');
+insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016');
+insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899');
+insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482');
+insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139');
+insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477');
+insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755');
+insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188');
+insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454');
+insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656');
+insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741');
+insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178');
+insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049');
+insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635');
+insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573');
+insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878');
+insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077');
+delete from t1 where t1>'2';
+insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'),
+('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'),
+('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47');
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8
+int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17
+int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int,
+i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34
+int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int,
+i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51
+int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int,
+i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68
+int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int,
+i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85
+int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int,
+i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102
+int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110
+int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118
+int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126
+int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134
+int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142
+int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150
+int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158
+int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166
+int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174
+int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182
+int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190
+int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198
+int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206
+int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214
+int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222
+int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230
+int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238
+int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246
+int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254
+int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262
+int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270
+int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278
+int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286
+int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294
+int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302
+int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310
+int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318
+int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326
+int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334
+int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342
+int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350
+int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358
+int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366
+int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374
+int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382
+int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390
+int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398
+int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406
+int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414
+int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422
+int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430
+int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438
+int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446
+int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454
+int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462
+int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470
+int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478
+int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486
+int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494
+int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502
+int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510
+int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518
+int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526
+int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534
+int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542
+int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550
+int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558
+int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566
+int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574
+int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582
+int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590
+int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598
+int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606
+int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614
+int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622
+int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630
+int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638
+int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646
+int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654
+int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662
+int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670
+int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678
+int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686
+int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694
+int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702
+int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710
+int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718
+int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726
+int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734
+int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742
+int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750
+int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758
+int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766
+int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774
+int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782
+int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790
+int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798
+int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806
+int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814
+int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822
+int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830
+int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838
+int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846
+int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854
+int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862
+int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870
+int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878
+int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886
+int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894
+int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902
+int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910
+int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918
+int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926
+int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934
+int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942
+int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950
+int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958
+int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966
+int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974
+int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982
+int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990
+int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998
+int, i999 int, i1000 int, b blob) row_format=dynamic;
+insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei");
+update t1 set b=repeat('a',256);
+update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+delete from t1 where i8=1;
+select i1,i2 from t1;
+i1 i2
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+CREATE TABLE `t1` (
+`post_id` mediumint(8) unsigned NOT NULL auto_increment,
+`topic_id` mediumint(8) unsigned NOT NULL default '0',
+`post_time` datetime NOT NULL default '0000-00-00 00:00:00',
+`post_text` text NOT NULL,
+`icon_url` varchar(10) NOT NULL default '',
+`sign` tinyint(1) unsigned NOT NULL default '0',
+`post_edit` varchar(150) NOT NULL default '',
+`poster_login` varchar(35) NOT NULL default '',
+`ip` varchar(15) NOT NULL default '',
+PRIMARY KEY (`post_id`),
+KEY `post_time` (`post_time`),
+KEY `ip` (`ip`),
+KEY `poster_login` (`poster_login`),
+KEY `topic_id` (`topic_id`),
+FULLTEXT KEY `post_text` (`post_text`)
+) TRANSACTIONAL=0;
+INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test');
+REPAIR TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e));
+ERROR 42000: Specified key was too long; max key length is 1112 bytes
+CREATE TABLE t1 (a varchar(32000), unique key(a));
+ERROR 42000: Specified key was too long; max key length is 1112 bytes
+CREATE TABLE t1 (a varchar(1), b varchar(1), key (a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b));
+ERROR 42000: Too many key parts specified; max 16 parts allowed
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255));
+ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e);
+ERROR 42000: Specified key was too long; max key length is 1112 bytes
+DROP TABLE t1;
+CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a));
+INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4);
+create table t2 (a int not null, b int, c int, key(b), key(c), key(a));
+INSERT into t2 values (1,1,1), (2,2,2);
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 b 1 b A 5 NULL NULL YES BTREE
+t1 1 c 1 c A 5 NULL NULL YES BTREE
+t1 1 a 1 a A 1 NULL NULL BTREE
+t1 1 a 2 b A 5 NULL NULL YES BTREE
+t1 1 c_2 1 c A 5 NULL NULL YES BTREE
+t1 1 c_2 2 a A 5 NULL NULL BTREE
+explain select * from t1,t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL a NULL NULL NULL 2
+1 SIMPLE t1 ref a a 4 test.t2.a 3
+explain select * from t1,t2 force index(a) where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL a NULL NULL NULL 2
+1 SIMPLE t1 ref a a 4 test.t2.a 3
+explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL a NULL NULL NULL 2
+1 SIMPLE t1 ref a a 4 test.t2.a 3
+explain select * from t1,t2 where t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL b NULL NULL NULL 2
+1 SIMPLE t1 ref b b 5 test.t2.b 1 Using where
+explain select * from t1,t2 force index(c) where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 2
+1 SIMPLE t1 ref a a 4 test.t2.a 3
+explain select * from t1 where a=0 or a=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 4 NULL 4 Using where
+explain select * from t1 force index (a) where a=0 or a=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 4 NULL 4 Using where
+explain select * from t1 where c=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref c,c_2 c 5 const 1 Using where
+explain select * from t1 use index() where c=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where
+drop table t1,t2;
+create table t1 (a int not null auto_increment primary key, b varchar(255));
+insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100));
+update t1 set b=repeat(left(b,1),200) where a=1;
+delete from t1 where (a & 1)= 0;
+update t1 set b=repeat('e',200) where a=1;
+flush tables;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+update t1 set b=repeat(left(b,1),255) where a between 1 and 5;
+update t1 set b=repeat(left(b,1),10) where a between 32 and 43;
+update t1 set b=repeat(left(b,1),2) where a between 64 and 66;
+update t1 set b=repeat(left(b,1),65) where a between 67 and 70;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+insert into t1 (b) values (repeat('z',100));
+update t1 set b="test" where left(b,1) > 'n';
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 ( a text not null, key a (a(20)));
+insert into t1 values ('aaa '),('aaa'),('aa');
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+select concat(a,'.') from t1 where a='aaa';
+concat(a,'.')
+aaa .
+aaa.
+select concat(a,'.') from t1 where binary a='aaa';
+concat(a,'.')
+aaa.
+update t1 set a='bbb' where a='aaa';
+select concat(a,'.') from t1;
+concat(a,'.')
+bbb.
+bbb.
+aa.
+drop table t1;
+create table t1 ( a text not null, key a (a(20))) row_format=dynamic;
+insert into t1 values ('aaa '),('aaa'),('aa');
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+select concat(a,'.') from t1 where a='aaa';
+concat(a,'.')
+aaa .
+aaa.
+select concat(a,'.') from t1 where binary a='aaa';
+concat(a,'.')
+aaa.
+update t1 set a='bbb' where a='aaa';
+select concat(a,'.') from t1;
+concat(a,'.')
+bbb.
+bbb.
+aa.
+drop table t1;
+create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10)));
+insert into t1 values('807780', '477', '165');
+insert into t1 values('807780', '477', '162');
+insert into t1 values('807780', '472', '162');
+select * from t1 where a='807780' and b='477' and c='165';
+a b c
+807780 477 165
+drop table t1;
+CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a));
+INSERT t1 VALUES ("can \tcan");
+INSERT t1 VALUES ("can can");
+INSERT t1 VALUES ("can");
+SELECT * FROM t1;
+a
+can can
+can
+can can
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1;
+create table t1 (a blob);
+insert into t1 values('a '),('a');
+select concat(a,'.') from t1 where a='a';
+concat(a,'.')
+a.
+select concat(a,'.') from t1 where a='a ';
+concat(a,'.')
+a .
+alter table t1 add key(a(2));
+select concat(a,'.') from t1 where a='a';
+concat(a,'.')
+a.
+select concat(a,'.') from t1 where a='a ';
+concat(a,'.')
+a .
+drop table t1;
+create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20)));
+insert into t1 (b) values ('a'),('b'),('c');
+select concat(b,'.') from t1;
+concat(b,'.')
+a.
+b.
+c.
+update t1 set b='b ' where a=2;
+update t1 set b='b ' where a > 1;
+ERROR 23000: Duplicate entry 'b ' for key 'b'
+insert into t1 (b) values ('b');
+ERROR 23000: Duplicate entry 'b' for key 'b'
+select * from t1;
+a b
+1 a
+2 b
+3 c
+delete from t1 where b='b';
+select a,concat(b,'.') from t1;
+a concat(b,'.')
+1 a.
+3 c.
+drop table t1;
+create table t1 (a int not null);
+create table t2 (a int not null, primary key (a));
+insert into t1 values (1);
+insert into t2 values (1),(2);
+select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+a
+1
+select distinct t1.a from t1,t2 order by t2.a;
+a
+1
+select sql_big_result distinct t1.a from t1,t2;
+a
+1
+explain select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary
+1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct
+explain select distinct t1.a from t1,t2 order by t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary
+1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct
+drop table t1,t2;
+create table t1 (
+c1 varchar(32),
+key (c1)
+);
+alter table t1 disable keys;
+insert into t1 values ('a'), ('b');
+select c1 from t1 order by c1 limit 1;
+c1
+a
+drop table t1;
+create table t1 (a int not null, primary key(a)) ROW_FORMAT=FIXED;
+create table t2 (a int not null, b int not null, primary key(a,b)) ROW_FORMAT=FIXED;
+insert into t1 values (1),(2),(3),(4),(5),(6);
+insert into t2 values (1,1),(2,1);
+set autocommit=0;
+begin;
+lock tables t1 read local, t2 read local;
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+a a b
+1 1 1
+2 2 1
+insert into t2 values(2,0);
+commit;
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+a a b
+1 1 1
+2 2 1
+drop table t1,t2;
+CREATE TABLE t1 (c1 varchar(250) NOT NULL) ROW_FORMAT=DYNAMIC;
+CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)) ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003');
+INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004');
+LOCK TABLES t1 READ LOCAL, t2 READ LOCAL;
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+t1c1 t2c1
+INSERT INTO t2 VALUES ('test000001'), ('test000005');
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+t1c1 t2c1
+DROP TABLE t1,t2;
+CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`));
+Got one of the listed errors
+create table t1 (a int, b varchar(200), c text not null) checksum=1;
+create table t2 (a int, b varchar(200), c text not null) checksum=0;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+checksum table t1, t2, t3 quick;
+Table Checksum
+test.t1 3442722830
+test.t2 NULL
+test.t3 NULL
+Warnings:
+Error 1146 Table 'test.t3' doesn't exist
+checksum table t1, t2, t3;
+Table Checksum
+test.t1 3442722830
+test.t2 3442722830
+test.t3 NULL
+Warnings:
+Error 1146 Table 'test.t3' doesn't exist
+checksum table t1, t2, t3 extended;
+Table Checksum
+test.t1 3442722830
+test.t2 3442722830
+test.t3 NULL
+Warnings:
+Error 1146 Table 'test.t3' doesn't exist
+drop table t1,t2;
+create table t1 (a int, key (a));
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A NULL NULL NULL YES BTREE
+alter table t1 disable keys;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A NULL NULL NULL YES BTREE disabled
+create table t2 (a int);
+set @@rand_seed1=31415926,@@rand_seed2=2718281828;
+insert t1 select * from t2;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A NULL NULL NULL YES BTREE disabled
+alter table t1 enable keys;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 1000 NULL NULL YES BTREE
+alter table t1 engine=heap;
+alter table t1 disable keys;
+Warnings:
+Note 1031 Table storage engine for 't1' doesn't have this option
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a NULL 500 NULL NULL YES HASH
+drop table t1,t2;
+create table t1 ( a tinytext, b char(1), index idx (a(1),b) );
+insert into t1 values (null,''), (null,'');
+explain select count(*) from t1 where a is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref idx idx 4 const 1 Using where
+select count(*) from t1 where a is null;
+count(*)
+2
+drop table t1;
+create table t1 (c1 int, c2 varchar(4) not null default '',
+key(c2(3))) default charset=utf8;
+insert into t1 values (1,'A'), (2, 'B'), (3, 'A');
+update t1 set c2='A B' where c1=2;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (c1 int);
+insert into t1 values (1),(2),(3),(4);
+checksum table t1;
+Table Checksum
+test.t1 149057747
+delete from t1 where c1 = 1;
+create table t2 as select * from t1;
+checksum table t1;
+Table Checksum
+test.t1 984116287
+checksum table t2;
+Table Checksum
+test.t2 984116287
+drop table t1, t2;
+CREATE TABLE t1 (
+twenty int(4),
+hundred int(4) NOT NULL
+) CHECKSUM=1;
+INSERT INTO t1 VALUES (11,91);
+check table t1 extended;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+checksum table t1;
+Table Checksum
+test.t1 3235292310
+checksum table t1 extended;
+Table Checksum
+test.t1 3235292310
+alter table t1 row_format=fixed;
+checksum table t1;
+Table Checksum
+test.t1 3235292310
+alter table t1 row_format=dynamic;
+checksum table t1;
+Table Checksum
+test.t1 4183529555
+alter table t1 engine=myisam;
+checksum table t1;
+Table Checksum
+test.t1 4183529555
+drop table t1;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_unequal
+create table t1 (a int, key(a));
+insert into t1 values (0),(1),(2),(3),(4);
+insert into t1 select NULL from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+set maria_stats_method=nulls_equal;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_equal
+insert into t1 values (11);
+delete from t1 where a=11;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 5 NULL NULL YES BTREE
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 5 NULL NULL YES BTREE
+set maria_stats_method=DEFAULT;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_unequal
+insert into t1 values (11);
+delete from t1 where a=11;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+drop table t1;
+set maria_stats_method=nulls_ignored;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_ignored
+create table t1 (
+a char(3), b char(4), c char(5), d char(6),
+key(a,b,c,d)
+);
+insert into t1 values ('bcd','def1', NULL, 'zz');
+insert into t1 values ('bcd','def2', NULL, 'zz');
+insert into t1 values ('bce','def1', 'yuu', NULL);
+insert into t1 values ('bce','def2', NULL, 'quux');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 2 NULL NULL YES BTREE
+t1 1 a 2 b A 4 NULL NULL YES BTREE
+t1 1 a 3 c A 4 NULL NULL YES BTREE
+t1 1 a 4 d A 4 NULL NULL YES BTREE
+delete from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 0 NULL NULL YES BTREE
+t1 1 a 2 b A 0 NULL NULL YES BTREE
+t1 1 a 3 c A 0 NULL NULL YES BTREE
+t1 1 a 4 d A 0 NULL NULL YES BTREE
+set maria_stats_method=DEFAULT;
+drop table t1;
+create table t1(
+cip INT NOT NULL,
+time TIME NOT NULL,
+score INT NOT NULL DEFAULT 0,
+bob TINYBLOB
+);
+insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03');
+insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'),
+(6, 'c', '00:06');
+select * from t1 where bob is null and cip=1;
+cip time score bob
+1 00:01:00 0 NULL
+create index bug on t1 (bob(22), cip, time);
+select * from t1 where bob is null and cip=1;
+cip time score bob
+1 00:01:00 0 NULL
+drop table t1;
+create table t1 (
+id1 int not null auto_increment,
+id2 int not null default '0',
+t text not null,
+primary key (id1),
+key x (id2, t(32))
+) engine=maria;
+insert into t1 (id2, t) values
+(10, 'abc'), (10, 'abc'), (10, 'abc'),
+(20, 'abc'), (20, 'abc'), (20, 'def'),
+(10, 'abc'), (10, 'abc');
+select count(*) from t1 where id2 = 10;
+count(*)
+5
+select count(id1) from t1 where id2 = 10;
+count(id1)
+5
+drop table t1;
+CREATE TABLE t1(a TINYINT, KEY(a));
+INSERT INTO t1 VALUES(1);
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+MAX(a)
+1
+ALTER TABLE t1 DISABLE KEYS;
+SELECT MAX(a) FROM t1;
+MAX(a)
+1
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+MAX(a)
+1
+DROP TABLE t1;
+CREATE TABLE t1(a CHAR(9), b VARCHAR(7));
+INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx');
+UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb';
+SELECT * FROM t1;
+a b
+xxxxxxxxx bbbbbb
+xxxxxxxxx bbbbbb
+DROP TABLE t1;
+SET @@maria_repair_threads=2;
+SHOW VARIABLES LIKE 'maria_repair%';
+Variable_name Value
+maria_repair_threads 2
+CREATE TABLE t1 (
+`_id` int(11) NOT NULL default '0',
+`url` text,
+`email` text,
+`description` text,
+`loverlap` int(11) default NULL,
+`roverlap` int(11) default NULL,
+`lneighbor_id` int(11) default NULL,
+`rneighbor_id` int(11) default NULL,
+`length_` int(11) default NULL,
+`sequence` mediumtext,
+`name` text,
+`_obj_class` text NOT NULL,
+PRIMARY KEY (`_id`),
+UNIQUE KEY `sequence_name_index` (`name`(50)),
+KEY (`length_`)
+) DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES
+(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+SELECT _id FROM t1;
+_id
+1
+2
+3
+4
+5
+6
+7
+8
+9
+DELETE FROM t1 WHERE _id < 8;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Paged 2 # # # # 0 # # # # # #
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Paged 2 # # # # 0 # # # # # #
+SELECT _id FROM t1;
+_id
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+`_id` int(11) NOT NULL default '0',
+`url` text,
+`email` text,
+`description` text,
+`loverlap` int(11) default NULL,
+`roverlap` int(11) default NULL,
+`lneighbor_id` int(11) default NULL,
+`rneighbor_id` int(11) default NULL,
+`length_` int(11) default NULL,
+`sequence` mediumtext,
+`name` text,
+`_obj_class` text NOT NULL,
+PRIMARY KEY (`_id`),
+UNIQUE KEY `sequence_name_index` (`name`(50)),
+KEY (`length_`)
+) DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES
+(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+SELECT _id FROM t1;
+_id
+1
+2
+3
+4
+5
+6
+7
+8
+9
+DELETE FROM t1 WHERE _id < 8;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Paged 2 # # # # 0 # # # # # #
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+REPAIR TABLE t1 QUICK;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Paged 2 # # # # 0 # # # # # #
+SELECT _id FROM t1;
+_id
+8
+9
+DROP TABLE t1;
+SET @@maria_repair_threads=1;
+SHOW VARIABLES LIKE 'maria_repair%';
+Variable_name Value
+maria_repair_threads 1
+drop table if exists t1,t2,t3;
+--- Testing varchar ---
+--- Testing varchar ---
+create table t1 (v varchar(10), c char(10), t text);
+insert into t1 values('+ ', '+ ', '+ ');
+set @a=repeat(' ',20);
+insert into t1 values (concat('+',@a),concat('+',@a),concat('+',@a));
+Warnings:
+Note 1265 Data truncated for column 'v' at row 1
+select concat('*',v,'*',c,'*',t,'*') from t1;
+concat('*',v,'*',c,'*',t,'*')
+*+ *+*+ *
+*+ *+*+ *
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+create table t2 like t1;
+show create table t2;
+Table Create Table
+t2 CREATE TABLE `t2` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+create table t3 select * from t1;
+show create table t3;
+Table Create Table
+t3 CREATE TABLE `t3` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+alter table t1 modify c varchar(10);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` varchar(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+alter table t1 modify v char(10);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` char(10) DEFAULT NULL,
+ `c` varchar(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+alter table t1 modify t varchar(10);
+Warnings:
+Note 1265 Data truncated for column 't' at row 2
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` char(10) DEFAULT NULL,
+ `c` varchar(10) DEFAULT NULL,
+ `t` varchar(10) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select concat('*',v,'*',c,'*',t,'*') from t1;
+concat('*',v,'*',c,'*',t,'*')
+*+*+*+ *
+*+*+*+ *
+drop table t1,t2,t3;
+create table t1 (v varchar(10), c char(10), t text, key(v), key(c), key(t(10)));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `v` (`v`),
+ KEY `c` (`c`),
+ KEY `t` (`t`(10))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select count(*) from t1;
+count(*)
+270
+insert into t1 values(concat('a',char(1)),concat('a',char(1)),concat('a',char(1)));
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where c='a';
+count(*)
+10
+select count(*) from t1 where t='a';
+count(*)
+10
+select count(*) from t1 where v='a ';
+count(*)
+10
+select count(*) from t1 where c='a ';
+count(*)
+10
+select count(*) from t1 where t='a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where c like 'a%';
+count(*)
+11
+select count(*) from t1 where t like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 13 const # Using where; Using index
+explain select count(*) from t1 where c='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref c c 11 const # Using where; Using index
+explain select count(*) from t1 where t='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref t t 13 const # Using where
+explain select count(*) from t1 where v like 'a%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v v 13 NULL # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 13 const # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 13 const # Using where; Using index
+alter table t1 add unique(v);
+ERROR 23000: Duplicate entry '{ ' for key 'v_2'
+alter table t1 add key(v);
+select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a';
+qq
+*a*a*a*
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+explain select * from t1 where v='a';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v,v_2 # 13 const # Using where
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(c) from t1 group by v limit 10;
+v count(c)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(c) from t1 group by v limit 10;
+v count(c)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select c,count(*) from t1 group by c limit 10;
+c count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select c,count(t) from t1 group by c limit 10;
+c count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result c,count(t) from t1 group by c limit 10;
+c count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select t,count(*) from t1 group by t limit 10;
+t count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select t,count(t) from t1 group by t limit 10;
+t count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result t,count(t) from t1 group by t limit 10;
+t count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+alter table t1 modify v varchar(300), drop key v, drop key v_2, add key v (v);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(300) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `c` (`c`),
+ KEY `t` (`t`(10)),
+ KEY `v` (`v`)
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where v='a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where; Using index
+explain select count(*) from t1 where v like 'a%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v v 303 NULL # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where; Using index
+explain select * from t1 where v='a';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+alter table t1 drop key v, add key v (v(30));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(300) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `c` (`c`),
+ KEY `t` (`t`(10)),
+ KEY `v` (`v`(30))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where v='a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+explain select count(*) from t1 where v like 'a%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v v 33 NULL # Using where
+explain select count(*) from t1 where v between 'a' and 'a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+explain select * from t1 where v='a';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+alter table t1 modify v varchar(600), drop key v, add key v (v);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(600) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `c` (`c`),
+ KEY `t` (`t`(10)),
+ KEY `v` (`v`)
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+drop table t1;
+create table t1 (a char(10), unique (a));
+insert into t1 values ('a ');
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a' for key 'a'
+alter table t1 modify a varchar(10);
+insert into t1 values ('a '),('a '),('a '),('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+update t1 set a='a ' where a like 'a%';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+update t1 set a='abc ' where a like 'a ';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+update t1 set a='a ' where a like 'a %';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+update t1 set a='a ' where a like 'a ';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+drop table t1;
+create table t1 (v varchar(10), c char(10), t text, key(v(5)), key(c(5)), key(t(5)));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `v` (`v`(5)),
+ KEY `c` (`c`(5)),
+ KEY `t` (`t`(5))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v char(10) character set utf8);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` char(10) CHARACTER SET utf8 DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(10), c char(10)) row_format=fixed;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED
+insert into t1 values('a','a'),('a ','a ');
+select concat('*',v,'*',c,'*') from t1;
+concat('*',v,'*',c,'*')
+*a*a*
+*a *a*
+drop table t1;
+create table t1 (v varchar(65530), key(v(10)));
+insert into t1 values(repeat('a',65530));
+select length(v) from t1 where v=repeat('a',65530);
+length(v)
+65530
+drop table t1;
+create table t1(a int, b varchar(12), key ba(b, a));
+insert into t1 values (1, 'A'), (20, NULL);
+explain select * from t1 where a=20 and b is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref ba ba 20 const,const 1 Using where; Using index
+select * from t1 where a=20 and b is null;
+a b
+20 NULL
+drop table t1;
+create table t1 (v varchar(65530), key(v));
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1112 bytes
+drop table if exists t1;
+create table t1 (v varchar(65536));
+Warnings:
+Note 1246 Converting column 'v' from VARCHAR to TEXT
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` mediumtext
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(65530) character set utf8);
+Warnings:
+Note 1246 Converting column 'v' from VARCHAR to TEXT
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` mediumtext CHARACTER SET utf8
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(65535));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. You have to change some columns to TEXT or BLOBs
+set @save_concurrent_insert=@@concurrent_insert;
+set global concurrent_insert=1;
+create table t1 (a int) ROW_FORMAT=FIXED;
+insert into t1 values (1),(2),(3),(4),(5);
+lock table t1 read local;
+insert into t1 values(6),(7);
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+set global concurrent_insert=2;
+insert into t1 values (8),(9);
+unlock tables;
+insert into t1 values (10),(11),(12);
+select * from t1;
+a
+1
+2
+11
+10
+5
+6
+7
+8
+9
+12
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (a int, b varchar(30) default "hello") ROW_FORMAT=DYNAMIC;
+insert into t1 (a) values (1),(2),(3),(4),(5);
+lock table t1 read local;
+insert into t1 (a) values(6),(7);
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+set global concurrent_insert=2;
+insert into t1 (a) values (8),(9);
+unlock tables;
+insert into t1 (a) values (10),(11),(12);
+select a from t1;
+a
+1
+2
+11
+10
+5
+6
+7
+8
+9
+12
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+set global concurrent_insert=@save_concurrent_insert;
+create table t1 (a int, key(a));
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Table is already up to date
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+alter table t1 disable keys;
+alter table t1 enable keys;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+drop table t1;
+show create table t1;
+show create table t1;
+create table t1 (a int) select 42 a;
+select * from t1;
+a
+9
+select * from t1;
+a
+99
+select * from t1;
+a
+42
+drop table t1;
+End of 4.1 tests
+create table t1 (c1 int) pack_keys=0;
+create table t2 (c1 int) pack_keys=1;
+create table t3 (c1 int) pack_keys=default;
+create table t4 (c1 int) pack_keys=2;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '2' at line 1
+drop table t1, t2, t3;
+create table t1 (a int not null, key `a` (a) key_block_size=1024);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=2048);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a varchar(2048), key `a` (a));
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1112 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`(1112))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1024);
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1112 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`(1112)) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1112 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192,
+ KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1024
+alter table t1 key_block_size=2048;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192,
+ KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048
+alter table t1 add c int, add key (c);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192,
+ KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192,
+ KEY `c` (`c`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048
+alter table t1 key_block_size=0;
+alter table t1 add d int, add key (d);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ `d` int(11) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192,
+ KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192,
+ KEY `c` (`c`) KEY_BLOCK_SIZE=8192,
+ KEY `d` (`d`)
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1112 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`),
+ KEY `b` (`b`(1112))
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192
+drop table t1;
+create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1112 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`),
+ KEY `b` (`b`(1112))
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192
+drop table t1;
+create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192,
+ KEY `b` (`b`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=16384
+drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=512);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000);
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1112 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`(1112)) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=1025);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, key key_block_size=1024 (a));
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '=1024 (a))' at line 1
+create table t1 (a int not null, key `a` key_block_size=1024 (a));
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'key_block_size=1024 (a))' at line 1
+CREATE TABLE t1 (
+c1 INT,
+c2 VARCHAR(300),
+KEY (c1) KEY_BLOCK_SIZE 1024,
+KEY (c2) KEY_BLOCK_SIZE 8192
+);
+INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))),
+(11, REPEAT('b', CEIL(RAND() * 300))),
+(12, REPEAT('c', CEIL(RAND() * 300))),
+(13, REPEAT('d', CEIL(RAND() * 300))),
+(14, REPEAT('e', CEIL(RAND() * 300))),
+(15, REPEAT('f', CEIL(RAND() * 300))),
+(16, REPEAT('g', CEIL(RAND() * 300))),
+(17, REPEAT('h', CEIL(RAND() * 300))),
+(18, REPEAT('i', CEIL(RAND() * 300))),
+(19, REPEAT('j', CEIL(RAND() * 300))),
+(20, REPEAT('k', CEIL(RAND() * 300))),
+(21, REPEAT('l', CEIL(RAND() * 300))),
+(22, REPEAT('m', CEIL(RAND() * 300))),
+(23, REPEAT('n', CEIL(RAND() * 300))),
+(24, REPEAT('o', CEIL(RAND() * 300))),
+(25, REPEAT('p', CEIL(RAND() * 300))),
+(26, REPEAT('q', CEIL(RAND() * 300))),
+(27, REPEAT('r', CEIL(RAND() * 300))),
+(28, REPEAT('s', CEIL(RAND() * 300))),
+(29, REPEAT('t', CEIL(RAND() * 300))),
+(30, REPEAT('u', CEIL(RAND() * 300))),
+(31, REPEAT('v', CEIL(RAND() * 300))),
+(32, REPEAT('w', CEIL(RAND() * 300))),
+(33, REPEAT('x', CEIL(RAND() * 300))),
+(34, REPEAT('y', CEIL(RAND() * 300))),
+(35, REPEAT('z', CEIL(RAND() * 300)));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+REPAIR TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+DELETE FROM t1 WHERE c1 >= 10;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1;
+create table t1 (a int) transactional=0;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 TRANSACTIONAL=0
+drop table t1;
+create table t1 (a int) row_format=dynamic transactional=0;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=0
+drop table t1;
+create table t1 (a int) row_format=dynamic transactional=1;
+Warnings:
+Note 1478 Row format set to PAGE because of TRANSACTIONAL=1 option
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=1
+alter table t1 row_format=PAGE;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=1
+alter table t1 row_format=DYNAMIC;
+Warnings:
+Note 1478 Row format set to PAGE because of TRANSACTIONAL=1 option
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=1
+alter table t1 transactional=0;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=0
+alter table t1 row_format=DYNAMIC;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=0
+drop table t1;
+create table t1 (a int) row_format=PAGE;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE
+drop table t1;
+create table t1 (a int) row_format=PAGE TRANSACTIONAL=DEFAULT;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE
+alter table t1 row_format=DYNAMIC;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+drop table t1;
+create table t1 (a int) row_format=page;
+insert delayed into t1 values(1);
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+drop table t1;
+create table t1 (a int) row_format=page transactional=0;
+insert delayed into t1 values(1);
+flush table t1;
+select * from t1;
+a
+1
+select count(*) from t1;
+count(*)
+1
+drop table t1;
+create table t1 (a int) row_format=dynamic;
+insert delayed into t1 values(1);
+flush table t1;
+select * from t1;
+a
+1
+select count(*) from t1;
+count(*)
+1
+drop table t1;
+create table `t1` (
+t1_name varchar(255) default null,
+t1_id int(10) unsigned not null auto_increment,
+key (t1_name),
+primary key (t1_id)
+) engine=maria auto_increment = 1000 default charset=latin1;
+lock tables t1 write;
+INSERT INTO `t1` VALUES ('bla',1000),('bla',1001),('bla',1002);
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+unlock tables;
+create table t2 like t1;
+insert into t2 select * from t1;
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Table is already up to date
+delete from t2;
+insert into t2 select * from t1;
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Table is already up to date
+drop table t1,t2;
+create table t1 (a bigint auto_increment, primary key(a), b char(255), c varchar(20000));
+update t1 set b=repeat('a',100) where a between 1 and 100;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+update t1 set c=repeat('a',8192*2) where a between 200 and 202;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+CREATE TABLE t1 (a int, b int, v varchar(60000)) checksum=1 engine=maria;
+insert into t1 values (1,1,"aaa"),(1,2,null);
+checksum table t1;
+Table Checksum
+test.t1 1112804611
+lock table t1 write;
+insert into t1 values (1,3,repeat('c',30000)),(4,4,repeat('a',30000));
+update t1 set v="row5" where b=4;
+delete from t1 where b=3;
+select a, b, length(v) from t1;
+a b length(v)
+1 1 3
+1 2 NULL
+4 4 4
+drop table t1;
+CREATE TABLE t1 (
+auto int(5) unsigned NOT NULL auto_increment,
+string char(10) default "hello",
+tiny tinyint(4) DEFAULT '0' NOT NULL ,
+short smallint(6) DEFAULT '1' NOT NULL ,
+medium mediumint(8) DEFAULT '0' NOT NULL,
+long_int int(11) DEFAULT '0' NOT NULL,
+longlong bigint(13) DEFAULT '0' NOT NULL,
+real_float float(13,1) DEFAULT 0.0 NOT NULL,
+real_double double(16,4),
+utiny tinyint(3) unsigned DEFAULT '0' NOT NULL,
+ushort smallint(5) unsigned zerofill DEFAULT '00000' NOT NULL,
+umedium mediumint(8) unsigned DEFAULT '0' NOT NULL,
+ulong int(11) unsigned DEFAULT '0' NOT NULL,
+ulonglong bigint(13) unsigned DEFAULT '0' NOT NULL,
+time_stamp timestamp,
+date_field date,
+time_field time,
+date_time datetime,
+blob_col blob,
+tinyblob_col tinyblob,
+mediumblob_col mediumblob not null default '',
+longblob_col longblob not null default '',
+options enum('one','two','tree') not null ,
+flags set('one','two','tree') not null default '',
+PRIMARY KEY (auto),
+KEY (utiny),
+KEY (tiny),
+KEY (short),
+KEY any_name (medium),
+KEY (longlong),
+KEY (real_float),
+KEY (ushort),
+KEY (umedium),
+KEY (ulong),
+KEY (ulonglong,ulong),
+KEY (options,flags)
+) engine=maria;
+insert into t1 values (10,1,1,1,1,1,1,1,1,1,1,1,1,1,NULL,0,0,0,1,1,1,1,'one','one');
+create table t2 (primary key (auto)) engine=maria row_format=page select auto+1 as auto,1 as t1, 'a' as t2, repeat('a',256) as t3, binary repeat('b',256) as t4, repeat('a',4096) as t5, binary repeat('b',4096) as t6, '' as t7, binary '' as t8 from t1;
+check table t1,t2;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+select t1,t2,length(t3),length(t4),length(t5),length(t6),t7,t8 from t2;
+t1 t2 length(t3) length(t4) length(t5) length(t6) t7 t8
+1 a 256 256 4096 4096
+drop table t1,t2;
+CREATE TABLE t1 (seq int, s1 int, s2 blob);
+insert into t1 values (1, 1, MD5(1));
+update t1 set s1=2 where seq=1;
+check table t1 extended;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+show variables like 'maria%';
+Variable_name Value
+maria_block_size 8192
+maria_checkpoint_interval 30
+maria_log_file_size 4294959104
+maria_log_purge_type immediate
+maria_max_sort_file_size 9223372036854775807
+maria_page_checksum ON
+maria_pagecache_age_threshold 300
+maria_pagecache_buffer_size 8388572
+maria_pagecache_division_limit 100
+maria_repair_threads 1
+maria_sort_buffer_size 8388608
+maria_stats_method nulls_unequal
+maria_sync_log_dir NEWFILE
+show status like 'maria%';
+Variable_name Value
+Maria_pagecache_blocks_not_flushed #
+Maria_pagecache_blocks_unused #
+Maria_pagecache_blocks_used #
+Maria_pagecache_read_requests #
+Maria_pagecache_reads #
+Maria_pagecache_write_requests #
+Maria_pagecache_writes #
+create table t1 (s varchar(25), fulltext(s)) TRANSACTIONAL= 1;
+ERROR HY000: Maria can't yet handle SPATIAL or FULLTEXT keys in transactional mode. For now use TRANSACTIONAL=0
+drop table if exists t1;
+create table t1 ( fid int not null auto_increment primary key,
+g geometry not null, spatial key(g));
+ERROR HY000: Maria can't yet handle SPATIAL or FULLTEXT keys in transactional mode. For now use TRANSACTIONAL=0
+drop table if exists t1;
+set global maria_log_file_size=4294967296;
+Warnings:
+Warning 1292 Truncated incorrect log_file_size value: '4294967296'
diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result
index a709817b7ef..c1bd0670e0e 100644
--- a/mysql-test/r/merge.result
+++ b/mysql-test/r/merge.result
@@ -1,3 +1,5 @@
+set global storage_engine=myisam;
+set session storage_engine=myisam;
drop table if exists t1,t2,t3,t4,t5,t6;
drop database if exists mysqltest;
create table t1 (a int not null primary key auto_increment, message char(20));
diff --git a/mysql-test/r/mix2_myisam.result b/mysql-test/r/mix2_myisam.result
index cabc4de8d21..e0a3d1af089 100644
--- a/mysql-test/r/mix2_myisam.result
+++ b/mysql-test/r/mix2_myisam.result
@@ -1232,34 +1232,34 @@ insert t5 select * from t1;
insert t6 select * from t1;
checksum table t1, t2, t3, t4, t5, t6, t7 quick;
Table Checksum
-test.t1 2948697075
+test.t1 3442722830
test.t2 NULL
test.t3 NULL
test.t4 NULL
-test.t5 2948697075
+test.t5 3442722830
test.t6 NULL
test.t7 NULL
Warnings:
Error 1146 Table 'test.t7' doesn't exist
checksum table t1, t2, t3, t4, t5, t6, t7;
Table Checksum
-test.t1 2948697075
-test.t2 2948697075
-test.t3 2948697075
-test.t4 2948697075
-test.t5 2948697075
-test.t6 2948697075
+test.t1 3442722830
+test.t2 3442722830
+test.t3 3442722830
+test.t4 3442722830
+test.t5 3442722830
+test.t6 3442722830
test.t7 NULL
Warnings:
Error 1146 Table 'test.t7' doesn't exist
checksum table t1, t2, t3, t4, t5, t6, t7 extended;
Table Checksum
-test.t1 2948697075
-test.t2 2948697075
-test.t3 2948697075
-test.t4 2948697075
-test.t5 2948697075
-test.t6 2948697075
+test.t1 3442722830
+test.t2 3442722830
+test.t3 3442722830
+test.t4 3442722830
+test.t5 3442722830
+test.t6 3442722830
test.t7 NULL
Warnings:
Error 1146 Table 'test.t7' doesn't exist
diff --git a/mysql-test/r/myisam.result b/mysql-test/r/myisam.result
index 1c8b5e9d7d9..9c861b446a4 100644
--- a/mysql-test/r/myisam.result
+++ b/mysql-test/r/myisam.result
@@ -1,4 +1,4 @@
-drop table if exists t1,t2;
+drop table if exists t1,t2,t3;
SET SQL_WARNINGS=1;
CREATE TABLE t1 (
STRING_DATA char(255) default NULL,
@@ -551,22 +551,22 @@ insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
insert t2 select * from t1;
checksum table t1, t2, t3 quick;
Table Checksum
-test.t1 2948697075
+test.t1 3442722830
test.t2 NULL
test.t3 NULL
Warnings:
Error 1146 Table 'test.t3' doesn't exist
checksum table t1, t2, t3;
Table Checksum
-test.t1 2948697075
-test.t2 2948697075
+test.t1 3442722830
+test.t2 3442722830
test.t3 NULL
Warnings:
Error 1146 Table 'test.t3' doesn't exist
checksum table t1, t2, t3 extended;
Table Checksum
-test.t1 2948697075
-test.t2 2948697075
+test.t1 3442722830
+test.t2 3442722830
test.t3 NULL
Warnings:
Error 1146 Table 'test.t3' doesn't exist
diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result
index d07aed5317a..6d0e1aaab29 100644
--- a/mysql-test/r/mysqldump.result
+++ b/mysql-test/r/mysqldump.result
@@ -723,7 +723,7 @@ DROP TABLE t1;
#
# Test for --insert-ignore
#
-CREATE TABLE t1 (a int);
+CREATE TABLE t1 (a int) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1),(2),(3);
INSERT INTO t1 VALUES (4),(5),(6);
@@ -3644,8 +3644,8 @@ CREATE TABLE t1(a int);
INSERT INTO t1 VALUES (1), (2);
mysqldump: Input filename too long: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
DROP TABLE t1;
-CREATE TABLE t2 (a int);
-CREATE TABLE t3 (a int);
+CREATE TABLE t2 (a int) ENGINE=MyISAM;
+CREATE TABLE t3 (a int) ENGINE=MyISAM;
CREATE TABLE t1 (a int) ENGINE=merge UNION=(t2, t3);
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
diff --git a/mysql-test/r/old-mode.result b/mysql-test/r/old-mode.result
new file mode 100644
index 00000000000..df2c0b6fee0
--- /dev/null
+++ b/mysql-test/r/old-mode.result
@@ -0,0 +1,14 @@
+drop table if exists t1,t2;
+create table t1 (a int, b varchar(200), c text not null) checksum=1;
+create table t2 (a int, b varchar(200), c text not null) checksum=0;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+checksum table t1, t2;
+Table Checksum
+test.t1 3442722830
+test.t2 2948697075
+checksum table t1, t2 extended;
+Table Checksum
+test.t1 2948697075
+test.t2 2948697075
+drop table t1,t2;
diff --git a/mysql-test/r/ps_2myisam.result b/mysql-test/r/ps_2myisam.result
index fbc6781e5e7..769c22a40d1 100644
--- a/mysql-test/r/ps_2myisam.result
+++ b/mysql-test/r/ps_2myisam.result
@@ -1756,7 +1756,7 @@ set @arg14= 'abc';
set @arg14= NULL ;
set @arg15= CAST('abc' as binary) ;
set @arg15= NULL ;
-create table t5 as select
+create table t5 engine = MyISAM as select
8 as const01, @arg01 as param01,
8.0 as const02, @arg02 as param02,
80.00000000000e-1 as const03, @arg03 as param03,
diff --git a/mysql-test/r/ps_3innodb.result b/mysql-test/r/ps_3innodb.result
index fcd0b5de9a0..5d176f86192 100644
--- a/mysql-test/r/ps_3innodb.result
+++ b/mysql-test/r/ps_3innodb.result
@@ -1739,7 +1739,7 @@ set @arg14= 'abc';
set @arg14= NULL ;
set @arg15= CAST('abc' as binary) ;
set @arg15= NULL ;
-create table t5 as select
+create table t5 engine = MyISAM as select
8 as const01, @arg01 as param01,
8.0 as const02, @arg02 as param02,
80.00000000000e-1 as const03, @arg03 as param03,
diff --git a/mysql-test/r/ps_4heap.result b/mysql-test/r/ps_4heap.result
index 862c0ff75c1..121e02fbe53 100644
--- a/mysql-test/r/ps_4heap.result
+++ b/mysql-test/r/ps_4heap.result
@@ -1740,7 +1740,7 @@ set @arg14= 'abc';
set @arg14= NULL ;
set @arg15= CAST('abc' as binary) ;
set @arg15= NULL ;
-create table t5 as select
+create table t5 engine = MyISAM as select
8 as const01, @arg01 as param01,
8.0 as const02, @arg02 as param02,
80.00000000000e-1 as const03, @arg03 as param03,
diff --git a/mysql-test/r/ps_5merge.result b/mysql-test/r/ps_5merge.result
index 51393cc8bc3..c0182bdc81b 100644
--- a/mysql-test/r/ps_5merge.result
+++ b/mysql-test/r/ps_5merge.result
@@ -1676,7 +1676,7 @@ set @arg14= 'abc';
set @arg14= NULL ;
set @arg15= CAST('abc' as binary) ;
set @arg15= NULL ;
-create table t5 as select
+create table t5 engine = MyISAM as select
8 as const01, @arg01 as param01,
8.0 as const02, @arg02 as param02,
80.00000000000e-1 as const03, @arg03 as param03,
@@ -4698,7 +4698,7 @@ set @arg14= 'abc';
set @arg14= NULL ;
set @arg15= CAST('abc' as binary) ;
set @arg15= NULL ;
-create table t5 as select
+create table t5 engine = MyISAM as select
8 as const01, @arg01 as param01,
8.0 as const02, @arg02 as param02,
80.00000000000e-1 as const03, @arg03 as param03,
diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result
new file mode 100644
index 00000000000..e1e881f09f2
--- /dev/null
+++ b/mysql-test/r/ps_maria.result
@@ -0,0 +1,3146 @@
+set global maria_log_file_size=4294967295;
+use test;
+drop table if exists t1, t9 ;
+create table t1
+(
+a int, b varchar(30),
+primary key(a)
+) engine = 'MARIA' ;
+create table t9
+(
+c1 tinyint, c2 smallint, c3 mediumint, c4 int,
+c5 integer, c6 bigint, c7 float, c8 double,
+c9 double precision, c10 real, c11 decimal(7, 4), c12 numeric(8, 4),
+c13 date, c14 datetime, c15 timestamp, c16 time,
+c17 year, c18 tinyint, c19 bool, c20 char,
+c21 char(10), c22 varchar(30), c23 tinyblob, c24 tinytext,
+c25 blob, c26 text, c27 mediumblob, c28 mediumtext,
+c29 longblob, c30 longtext, c31 enum('one', 'two', 'three'),
+c32 set('monday', 'tuesday', 'wednesday'),
+primary key(c1)
+) engine = 'MARIA' ;
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+test_sequence
+------ simple select tests ------
+prepare stmt1 from ' select * from t9 order by c1 ' ;
+execute stmt1;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def test t9 t9 c1 c1 1 4 1 N 49155 0 63
+def test t9 t9 c2 c2 2 6 1 Y 32768 0 63
+def test t9 t9 c3 c3 9 9 1 Y 32768 0 63
+def test t9 t9 c4 c4 3 11 1 Y 32768 0 63
+def test t9 t9 c5 c5 3 11 1 Y 32768 0 63
+def test t9 t9 c6 c6 8 20 1 Y 32768 0 63
+def test t9 t9 c7 c7 4 12 1 Y 32768 31 63
+def test t9 t9 c8 c8 5 22 1 Y 32768 31 63
+def test t9 t9 c9 c9 5 22 1 Y 32768 31 63
+def test t9 t9 c10 c10 5 22 1 Y 32768 31 63
+def test t9 t9 c11 c11 246 9 6 Y 0 4 63
+def test t9 t9 c12 c12 246 10 6 Y 0 4 63
+def test t9 t9 c13 c13 10 10 10 Y 128 0 63
+def test t9 t9 c14 c14 12 19 19 Y 128 0 63
+def test t9 t9 c15 c15 7 19 19 N 9441 0 63
+def test t9 t9 c16 c16 11 8 8 Y 128 0 63
+def test t9 t9 c17 c17 13 4 4 Y 32864 0 63
+def test t9 t9 c18 c18 1 4 1 Y 32768 0 63
+def test t9 t9 c19 c19 1 1 1 Y 32768 0 63
+def test t9 t9 c20 c20 254 1 1 Y 0 0 8
+def test t9 t9 c21 c21 254 10 10 Y 0 0 8
+def test t9 t9 c22 c22 253 30 30 Y 0 0 8
+def test t9 t9 c23 c23 252 255 8 Y 144 0 63
+def test t9 t9 c24 c24 252 255 8 Y 16 0 8
+def test t9 t9 c25 c25 252 65535 4 Y 144 0 63
+def test t9 t9 c26 c26 252 65535 4 Y 16 0 8
+def test t9 t9 c27 c27 252 16777215 10 Y 144 0 63
+def test t9 t9 c28 c28 252 16777215 10 Y 16 0 8
+def test t9 t9 c29 c29 252 4294967295 8 Y 144 0 63
+def test t9 t9 c30 c30 252 4294967295 8 Y 16 0 8
+def test t9 t9 c31 c31 254 5 3 Y 256 0 8
+def test t9 t9 c32 c32 254 24 7 Y 2048 0 8
+c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday
+set @arg00='SELECT' ;
+@arg00 a from t1 where a=1;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a from t1 where a=1' at line 1
+prepare stmt1 from ' ? a from t1 where a=1 ';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a from t1 where a=1' at line 1
+set @arg00=1 ;
+select @arg00, b from t1 where a=1 ;
+@arg00 b
+1 one
+prepare stmt1 from ' select ?, b from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+? b
+1 one
+set @arg00='lion' ;
+select @arg00, b from t1 where a=1 ;
+@arg00 b
+lion one
+prepare stmt1 from ' select ?, b from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+? b
+lion one
+set @arg00=NULL ;
+select @arg00, b from t1 where a=1 ;
+@arg00 b
+NULL one
+prepare stmt1 from ' select ?, b from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+? b
+NULL one
+set @arg00=1 ;
+select b, a - @arg00 from t1 where a=1 ;
+b a - @arg00
+one 0
+prepare stmt1 from ' select b, a - ? from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+b a - ?
+one 0
+set @arg00=null ;
+select @arg00 as my_col ;
+my_col
+NULL
+prepare stmt1 from ' select ? as my_col';
+execute stmt1 using @arg00 ;
+my_col
+NULL
+select @arg00 + 1 as my_col ;
+my_col
+NULL
+prepare stmt1 from ' select ? + 1 as my_col';
+execute stmt1 using @arg00 ;
+my_col
+NULL
+select 1 + @arg00 as my_col ;
+my_col
+NULL
+prepare stmt1 from ' select 1 + ? as my_col';
+execute stmt1 using @arg00 ;
+my_col
+NULL
+set @arg00='MySQL' ;
+select substr(@arg00,1,2) from t1 where a=1 ;
+substr(@arg00,1,2)
+My
+prepare stmt1 from ' select substr(?,1,2) from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+substr(?,1,2)
+My
+set @arg00=3 ;
+select substr('MySQL',@arg00,5) from t1 where a=1 ;
+substr('MySQL',@arg00,5)
+SQL
+prepare stmt1 from ' select substr(''MySQL'',?,5) from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+substr('MySQL',?,5)
+SQL
+select substr('MySQL',1,@arg00) from t1 where a=1 ;
+substr('MySQL',1,@arg00)
+MyS
+prepare stmt1 from ' select substr(''MySQL'',1,?) from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+substr('MySQL',1,?)
+MyS
+set @arg00='MySQL' ;
+select a , concat(@arg00,b) from t1 order by a;
+a concat(@arg00,b)
+1 MySQLone
+2 MySQLtwo
+3 MySQLthree
+4 MySQLfour
+prepare stmt1 from ' select a , concat(?,b) from t1 order by a ' ;
+execute stmt1 using @arg00;
+a concat(?,b)
+1 MySQLone
+2 MySQLtwo
+3 MySQLthree
+4 MySQLfour
+select a , concat(b,@arg00) from t1 order by a ;
+a concat(b,@arg00)
+1 oneMySQL
+2 twoMySQL
+3 threeMySQL
+4 fourMySQL
+prepare stmt1 from ' select a , concat(b,?) from t1 order by a ' ;
+execute stmt1 using @arg00;
+a concat(b,?)
+1 oneMySQL
+2 twoMySQL
+3 threeMySQL
+4 fourMySQL
+set @arg00='MySQL' ;
+select group_concat(@arg00,b order by a) from t1
+group by 'a' ;
+group_concat(@arg00,b order by a)
+MySQLone,MySQLtwo,MySQLthree,MySQLfour
+prepare stmt1 from ' select group_concat(?,b order by a) from t1
+group by ''a'' ' ;
+execute stmt1 using @arg00;
+group_concat(?,b order by a)
+MySQLone,MySQLtwo,MySQLthree,MySQLfour
+select group_concat(b,@arg00 order by a) from t1
+group by 'a' ;
+group_concat(b,@arg00 order by a)
+oneMySQL,twoMySQL,threeMySQL,fourMySQL
+prepare stmt1 from ' select group_concat(b,? order by a) from t1
+group by ''a'' ' ;
+execute stmt1 using @arg00;
+group_concat(b,? order by a)
+oneMySQL,twoMySQL,threeMySQL,fourMySQL
+set @arg00='first' ;
+set @arg01='second' ;
+set @arg02=NULL;
+select @arg00, @arg01 from t1 where a=1 ;
+@arg00 @arg01
+first second
+prepare stmt1 from ' select ?, ? from t1 where a=1 ' ;
+execute stmt1 using @arg00, @arg01 ;
+? ?
+first second
+execute stmt1 using @arg02, @arg01 ;
+? ?
+NULL second
+execute stmt1 using @arg00, @arg02 ;
+? ?
+first NULL
+execute stmt1 using @arg02, @arg02 ;
+? ?
+NULL NULL
+drop table if exists t5 ;
+create table t5 (id1 int(11) not null default '0',
+value2 varchar(100), value1 varchar(100)) ;
+insert into t5 values (1,'hh','hh'),(2,'hh','hh'),
+(1,'ii','ii'),(2,'ii','ii') ;
+prepare stmt1 from ' select id1,value1 from t5 where id1=? or value1=? order by id1,value1 ' ;
+set @arg00=1 ;
+set @arg01='hh' ;
+execute stmt1 using @arg00, @arg01 ;
+id1 value1
+1 hh
+1 ii
+2 hh
+drop table t5 ;
+drop table if exists t5 ;
+create table t5(session_id char(9) not null) ;
+insert into t5 values ('abc') ;
+prepare stmt1 from ' select * from t5
+where ?=''1111'' and session_id = ''abc'' ' ;
+set @arg00='abc' ;
+execute stmt1 using @arg00 ;
+session_id
+set @arg00='1111' ;
+execute stmt1 using @arg00 ;
+session_id
+abc
+set @arg00='abc' ;
+execute stmt1 using @arg00 ;
+session_id
+drop table t5 ;
+set @arg00='FROM' ;
+select a @arg00 t1 where a=1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 t1 where a=1' at line 1
+prepare stmt1 from ' select a ? t1 where a=1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? t1 where a=1' at line 1
+set @arg00='t1' ;
+select a from @arg00 where a=1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 where a=1' at line 1
+prepare stmt1 from ' select a from ? where a=1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? where a=1' at line 1
+set @arg00='WHERE' ;
+select a from t1 @arg00 a=1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a=1' at line 1
+prepare stmt1 from ' select a from t1 ? a=1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a=1' at line 1
+set @arg00=1 ;
+select a FROM t1 where a=@arg00 ;
+a
+1
+prepare stmt1 from ' select a FROM t1 where a=? ' ;
+execute stmt1 using @arg00 ;
+a
+1
+set @arg00=1000 ;
+execute stmt1 using @arg00 ;
+a
+set @arg00=NULL ;
+select a FROM t1 where a=@arg00 ;
+a
+prepare stmt1 from ' select a FROM t1 where a=? ' ;
+execute stmt1 using @arg00 ;
+a
+set @arg00=4 ;
+select a FROM t1 where a=sqrt(@arg00) ;
+a
+2
+prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ;
+execute stmt1 using @arg00 ;
+a
+2
+set @arg00=NULL ;
+select a FROM t1 where a=sqrt(@arg00) ;
+a
+prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ;
+execute stmt1 using @arg00 ;
+a
+set @arg00=2 ;
+set @arg01=3 ;
+select a FROM t1 where a in (@arg00,@arg01) order by a;
+a
+2
+3
+prepare stmt1 from ' select a FROM t1 where a in (?,?) order by a ';
+execute stmt1 using @arg00, @arg01;
+a
+2
+3
+set @arg00= 'one' ;
+set @arg01= 'two' ;
+set @arg02= 'five' ;
+prepare stmt1 from ' select b FROM t1 where b in (?,?,?) order by b ' ;
+execute stmt1 using @arg00, @arg01, @arg02 ;
+b
+one
+two
+prepare stmt1 from ' select b FROM t1 where b like ? ';
+set @arg00='two' ;
+execute stmt1 using @arg00 ;
+b
+two
+set @arg00='tw%' ;
+execute stmt1 using @arg00 ;
+b
+two
+set @arg00='%wo' ;
+execute stmt1 using @arg00 ;
+b
+two
+set @arg00=null ;
+insert into t9 set c1= 0, c5 = NULL ;
+select c5 from t9 where c5 > NULL ;
+c5
+prepare stmt1 from ' select c5 from t9 where c5 > ? ';
+execute stmt1 using @arg00 ;
+c5
+select c5 from t9 where c5 < NULL ;
+c5
+prepare stmt1 from ' select c5 from t9 where c5 < ? ';
+execute stmt1 using @arg00 ;
+c5
+select c5 from t9 where c5 = NULL ;
+c5
+prepare stmt1 from ' select c5 from t9 where c5 = ? ';
+execute stmt1 using @arg00 ;
+c5
+select c5 from t9 where c5 <=> NULL ;
+c5
+NULL
+prepare stmt1 from ' select c5 from t9 where c5 <=> ? ';
+execute stmt1 using @arg00 ;
+c5
+NULL
+delete from t9 where c1= 0 ;
+set @arg00='>' ;
+select a FROM t1 where a @arg00 1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 1' at line 1
+prepare stmt1 from ' select a FROM t1 where a ? 1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? 1' at line 1
+set @arg00=1 ;
+select a,b FROM t1 where a is not NULL
+AND b is not NULL group by a - @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+prepare stmt1 from ' select a,b FROM t1 where a is not NULL
+AND b is not NULL group by a - ? ' ;
+execute stmt1 using @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00='two' ;
+select a,b FROM t1 where a is not NULL
+AND b is not NULL having b <> @arg00 order by a ;
+a b
+1 one
+3 three
+4 four
+prepare stmt1 from ' select a,b FROM t1 where a is not NULL
+AND b is not NULL having b <> ? order by a ' ;
+execute stmt1 using @arg00 ;
+a b
+1 one
+3 three
+4 four
+set @arg00=1 ;
+select a,b FROM t1 where a is not NULL
+AND b is not NULL order by a - @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+prepare stmt1 from ' select a,b FROM t1 where a is not NULL
+AND b is not NULL order by a - ? ' ;
+execute stmt1 using @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00=2 ;
+select a,b from t1 order by 2 ;
+a b
+4 four
+1 one
+3 three
+2 two
+prepare stmt1 from ' select a,b from t1
+order by ? ';
+execute stmt1 using @arg00;
+a b
+4 four
+1 one
+3 three
+2 two
+set @arg00=1 ;
+execute stmt1 using @arg00;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00=0 ;
+execute stmt1 using @arg00;
+ERROR 42S22: Unknown column '?' in 'order clause'
+set @arg00=1;
+prepare stmt1 from ' select a,b from t1 order by a
+limit 1 ';
+execute stmt1 ;
+a b
+1 one
+prepare stmt1 from ' select a,b from t1 order by a limit ? ';
+execute stmt1 using @arg00;
+a b
+1 one
+set @arg00='b' ;
+set @arg01=0 ;
+set @arg02=2 ;
+set @arg03=2 ;
+select sum(a), @arg00 from t1 where a > @arg01
+and b is not null group by substr(b,@arg02)
+having sum(a) <> @arg03 ;
+sum(a) @arg00
+3 b
+1 b
+4 b
+prepare stmt1 from ' select sum(a), ? from t1 where a > ?
+and b is not null group by substr(b,?)
+having sum(a) <> ? ';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03;
+sum(a) ?
+3 b
+1 b
+4 b
+test_sequence
+------ join tests ------
+select first.a as a1, second.a as a2
+from t1 first, t1 second
+where first.a = second.a order by a1 ;
+a1 a2
+1 1
+2 2
+3 3
+4 4
+prepare stmt1 from ' select first.a as a1, second.a as a2
+ from t1 first, t1 second
+ where first.a = second.a order by a1 ';
+execute stmt1 ;
+a1 a2
+1 1
+2 2
+3 3
+4 4
+set @arg00='ABC';
+set @arg01='two';
+set @arg02='one';
+select first.a, @arg00, second.a FROM t1 first, t1 second
+where @arg01 = first.b or first.a = second.a or second.b = @arg02
+order by second.a, first.a;
+a @arg00 a
+1 ABC 1
+2 ABC 1
+3 ABC 1
+4 ABC 1
+2 ABC 2
+2 ABC 3
+3 ABC 3
+2 ABC 4
+4 ABC 4
+prepare stmt1 from ' select first.a, ?, second.a FROM t1 first, t1 second
+ where ? = first.b or first.a = second.a or second.b = ?
+ order by second.a, first.a';
+execute stmt1 using @arg00, @arg01, @arg02;
+a ? a
+1 ABC 1
+2 ABC 1
+3 ABC 1
+4 ABC 1
+2 ABC 2
+2 ABC 3
+3 ABC 3
+2 ABC 4
+4 ABC 4
+drop table if exists t2 ;
+create table t2 as select * from t1 ;
+set @query1= 'SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a ' ;
+set @query2= 'SELECT * FROM t2 natural join t1 order by t2.a ' ;
+set @query3= 'SELECT * FROM t2 join t1 using(a) order by t2.a ' ;
+set @query4= 'SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a ' ;
+set @query5= 'SELECT * FROM t2 natural left join t1 order by t2.a ' ;
+set @query6= 'SELECT * FROM t2 left join t1 using(a) order by t2.a ' ;
+set @query7= 'SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a ' ;
+set @query8= 'SELECT * FROM t2 natural right join t1 order by t2.a ' ;
+set @query9= 'SELECT * FROM t2 right join t1 using(a) order by t2.a ' ;
+the join statement is:
+SELECT * FROM t2 right join t1 using(a) order by t2.a
+prepare stmt1 from @query9 ;
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+the join statement is:
+SELECT * FROM t2 natural right join t1 order by t2.a
+prepare stmt1 from @query8 ;
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+the join statement is:
+SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a
+prepare stmt1 from @query7 ;
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+the join statement is:
+SELECT * FROM t2 left join t1 using(a) order by t2.a
+prepare stmt1 from @query6 ;
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+the join statement is:
+SELECT * FROM t2 natural left join t1 order by t2.a
+prepare stmt1 from @query5 ;
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+the join statement is:
+SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a
+prepare stmt1 from @query4 ;
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+the join statement is:
+SELECT * FROM t2 join t1 using(a) order by t2.a
+prepare stmt1 from @query3 ;
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+the join statement is:
+SELECT * FROM t2 natural join t1 order by t2.a
+prepare stmt1 from @query2 ;
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+the join statement is:
+SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a
+prepare stmt1 from @query1 ;
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+drop table t2 ;
+test_sequence
+------ subquery tests ------
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = ''two'') ';
+execute stmt1 ;
+a b
+2 two
+set @arg00='two' ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where b = 'two' ) and b=@arg00 ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = ''two'') and b=? ';
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00='two' ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where b = @arg00 ) and b='two' ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = ? ) and b=''two'' ' ;
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=3 ;
+set @arg01='three' ;
+select a,b FROM t1 where (a,b) in (select 3, 'three');
+a b
+3 three
+select a FROM t1 where (a,b) in (select @arg00,@arg01);
+a
+3
+prepare stmt1 from ' select a FROM t1 where (a,b) in (select ?, ?) ';
+execute stmt1 using @arg00, @arg01;
+a
+3
+set @arg00=1 ;
+set @arg01='two' ;
+set @arg02=2 ;
+set @arg03='two' ;
+select a, @arg00, b FROM t1 outer_table where
+b=@arg01 and a = (select @arg02 from t1 where b = @arg03 ) ;
+a @arg00 b
+2 1 two
+prepare stmt1 from ' select a, ?, b FROM t1 outer_table where
+ b=? and a = (select ? from t1 where b = ? ) ' ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03 ;
+a ? b
+2 1 two
+prepare stmt1 from 'select c4 FROM t9 where
+ c13 = (select MAX(b) from t1 where a = ?) and c22 = ? ' ;
+execute stmt1 using @arg01, @arg02;
+c4
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = outer_table.b ) order by a ';
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+prepare stmt1 from ' SELECT a as ccc from t1 outr where a+1=
+ (SELECT 1+outr.a from t1 where outr.a+1=a+1 and a=1) ';
+execute stmt1 ;
+ccc
+1
+deallocate prepare stmt1 ;
+prepare stmt1 from ' SELECT a as ccc from t1 outr where a+1=
+ (SELECT 1+outr.a from t1 where outr.a+1=a+1 and a=1) ';
+execute stmt1 ;
+ccc
+1
+deallocate prepare stmt1 ;
+prepare stmt1 from ' SELECT a as ccc from t1 outr where a+1=
+ (SELECT 1+outr.a from t1 where outr.a+1=a+1 and a=1) ';
+execute stmt1 ;
+ccc
+1
+deallocate prepare stmt1 ;
+set @arg00='two' ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where b = outer_table.b ) and b=@arg00 ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = outer_table.b) and b=? ';
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=2 ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where a = @arg00 and b = outer_table.b) and b='two' ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where a = ? and b = outer_table.b) and b=''two'' ' ;
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=2 ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where outer_table.a = @arg00 and a=2) and b='two' ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where outer_table.a = ? and a=2) and b=''two'' ' ;
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=1 ;
+set @arg01='two' ;
+set @arg02=2 ;
+set @arg03='two' ;
+select a, @arg00, b FROM t1 outer_table where
+b=@arg01 and a = (select @arg02 from t1 where outer_table.b = @arg03
+and outer_table.a=a ) ;
+a @arg00 b
+2 1 two
+prepare stmt1 from ' select a, ?, b FROM t1 outer_table where
+ b=? and a = (select ? from t1 where outer_table.b = ?
+ and outer_table.a=a ) ' ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03 ;
+a ? b
+2 1 two
+set @arg00=1 ;
+set @arg01=0 ;
+select a, @arg00
+from ( select a - @arg00 as a from t1 where a=@arg00 ) as t2
+where a=@arg01;
+a @arg00
+0 1
+prepare stmt1 from ' select a, ?
+ from ( select a - ? as a from t1 where a=? ) as t2
+ where a=? ';
+execute stmt1 using @arg00, @arg00, @arg00, @arg01 ;
+a ?
+0 1
+drop table if exists t2 ;
+create table t2 as select * from t1;
+prepare stmt1 from ' select a in (select a from t2) from t1 ' ;
+execute stmt1 ;
+a in (select a from t2)
+1
+1
+1
+1
+drop table if exists t5, t6, t7 ;
+create table t5 (a int , b int) ;
+create table t6 like t5 ;
+create table t7 like t5 ;
+insert into t5 values (0, 100), (1, 2), (1, 3), (2, 2), (2, 7),
+(2, -1), (3, 10) ;
+insert into t6 values (0, 0), (1, 1), (2, 1), (3, 1), (4, 1) ;
+insert into t7 values (3, 3), (2, 2), (1, 1) ;
+prepare stmt1 from ' select a, (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1) from t7 ' ;
+execute stmt1 ;
+a (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1)
+3 1
+2 2
+1 2
+execute stmt1 ;
+a (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1)
+3 1
+2 2
+1 2
+execute stmt1 ;
+a (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1)
+3 1
+2 2
+1 2
+drop table t5, t6, t7 ;
+drop table if exists t2 ;
+create table t2 as select * from t9;
+set @stmt= ' SELECT
+ (SELECT SUM(c1 + c12 + 0.0) FROM t2
+ where (t9.c2 - 0e-3) = t2.c2
+ GROUP BY t9.c15 LIMIT 1) as scalar_s,
+ exists (select 1.0e+0 from t2
+ where t2.c3 * 9.0000000000 = t9.c4) as exists_s,
+ c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s,
+ (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s
+FROM t9,
+(select c25 x, c32 y from t2) tt WHERE x = c25 ' ;
+prepare stmt1 from @stmt ;
+execute stmt1 ;
+execute stmt1 ;
+set @stmt= concat('explain ',@stmt);
+prepare stmt1 from @stmt ;
+execute stmt1 ;
+execute stmt1 ;
+set @stmt= ' SELECT
+ (SELECT SUM(c1+c12+?) FROM t2 where (t9.c2-?)=t2.c2
+ GROUP BY t9.c15 LIMIT 1) as scalar_s,
+ exists (select ? from t2
+ where t2.c3*?=t9.c4) as exists_s,
+ c5*? in (select c6+? from t2) as in_s,
+ (c7-?, c8-?) in (select c9+?, c10+? from t2) as in_row_s
+FROM t9,
+(select c25 x, c32 y from t2) tt WHERE x =c25 ' ;
+set @arg00= 0.0 ;
+set @arg01= 0e-3 ;
+set @arg02= 1.0e+0 ;
+set @arg03= 9.0000000000 ;
+set @arg04= 4 ;
+set @arg05= 0.3e+1 ;
+set @arg06= 4 ;
+set @arg07= 4 ;
+set @arg08= 4.0 ;
+set @arg09= 40e-1 ;
+prepare stmt1 from @stmt ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+set @stmt= concat('explain ',@stmt);
+prepare stmt1 from @stmt ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+drop table t2 ;
+select 1 < (select a from t1) ;
+ERROR 21000: Subquery returns more than 1 row
+prepare stmt1 from ' select 1 < (select a from t1) ' ;
+execute stmt1 ;
+ERROR 21000: Subquery returns more than 1 row
+select 1 as my_col ;
+my_col
+1
+test_sequence
+------ union tests ------
+prepare stmt1 from ' select a FROM t1 where a=1
+ union distinct
+ select a FROM t1 where a=1 ';
+execute stmt1 ;
+a
+1
+execute stmt1 ;
+a
+1
+prepare stmt1 from ' select a FROM t1 where a=1
+ union all
+ select a FROM t1 where a=1 ';
+execute stmt1 ;
+a
+1
+1
+prepare stmt1 from ' SELECT 1, 2 union SELECT 1 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+prepare stmt1 from ' SELECT 1 union SELECT 1, 2 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+prepare stmt1 from ' SELECT * from t1 union SELECT 1 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+prepare stmt1 from ' SELECT 1 union SELECT * from t1 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+set @arg00=1 ;
+select @arg00 FROM t1 where a=1
+union distinct
+select 1 FROM t1 where a=1;
+@arg00
+1
+prepare stmt1 from ' select ? FROM t1 where a=1
+ union distinct
+ select 1 FROM t1 where a=1 ' ;
+execute stmt1 using @arg00;
+?
+1
+set @arg00=1 ;
+select 1 FROM t1 where a=1
+union distinct
+select @arg00 FROM t1 where a=1;
+1
+1
+prepare stmt1 from ' select 1 FROM t1 where a=1
+ union distinct
+ select ? FROM t1 where a=1 ' ;
+execute stmt1 using @arg00;
+1
+1
+set @arg00='a' ;
+select @arg00 FROM t1 where a=1
+union distinct
+select @arg00 FROM t1 where a=1;
+@arg00
+a
+prepare stmt1 from ' select ? FROM t1 where a=1
+ union distinct
+ select ? FROM t1 where a=1 ';
+execute stmt1 using @arg00, @arg00;
+?
+a
+prepare stmt1 from ' select ?
+ union distinct
+ select ? ';
+execute stmt1 using @arg00, @arg00;
+?
+a
+set @arg00='a' ;
+set @arg01=1 ;
+set @arg02='a' ;
+set @arg03=2 ;
+select @arg00 FROM t1 where a=@arg01
+union distinct
+select @arg02 FROM t1 where a=@arg03;
+@arg00
+a
+prepare stmt1 from ' select ? FROM t1 where a=?
+ union distinct
+ select ? FROM t1 where a=? ' ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03;
+?
+a
+set @arg00=1 ;
+prepare stmt1 from ' select sum(a) + 200, ? from t1
+union distinct
+select sum(a) + 200, 1 from t1
+group by b ' ;
+execute stmt1 using @arg00;
+sum(a) + 200 ?
+210 1
+204 1
+201 1
+203 1
+202 1
+set @Oporto='Oporto' ;
+set @Lisboa='Lisboa' ;
+set @0=0 ;
+set @1=1 ;
+set @2=2 ;
+set @3=3 ;
+set @4=4 ;
+select @Oporto,@Lisboa,@0,@1,@2,@3,@4 ;
+@Oporto @Lisboa @0 @1 @2 @3 @4
+Oporto Lisboa 0 1 2 3 4
+select sum(a) + 200 as the_sum, @Oporto as the_town from t1
+group by b
+union distinct
+select sum(a) + 200, @Lisboa from t1
+group by b ;
+the_sum the_town
+204 Oporto
+201 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+201 Lisboa
+203 Lisboa
+202 Lisboa
+prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1
+ group by b
+ union distinct
+ select sum(a) + 200, ? from t1
+ group by b ' ;
+execute stmt1 using @Oporto, @Lisboa;
+the_sum the_town
+204 Oporto
+201 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+201 Lisboa
+203 Lisboa
+202 Lisboa
+select sum(a) + 200 as the_sum, @Oporto as the_town from t1
+where a > @1
+group by b
+union distinct
+select sum(a) + 200, @Lisboa from t1
+where a > @2
+group by b ;
+the_sum the_town
+204 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+203 Lisboa
+prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1
+ where a > ?
+ group by b
+ union distinct
+ select sum(a) + 200, ? from t1
+ where a > ?
+ group by b ' ;
+execute stmt1 using @Oporto, @1, @Lisboa, @2;
+the_sum the_town
+204 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+203 Lisboa
+select sum(a) + 200 as the_sum, @Oporto as the_town from t1
+where a > @1
+group by b
+having avg(a) > @2
+union distinct
+select sum(a) + 200, @Lisboa from t1
+where a > @2
+group by b
+having avg(a) > @3;
+the_sum the_town
+204 Oporto
+203 Oporto
+204 Lisboa
+prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1
+ where a > ?
+ group by b
+ having avg(a) > ?
+ union distinct
+ select sum(a) + 200, ? from t1
+ where a > ?
+ group by b
+ having avg(a) > ? ';
+execute stmt1 using @Oporto, @1, @2, @Lisboa, @2, @3;
+the_sum the_town
+204 Oporto
+203 Oporto
+204 Lisboa
+test_sequence
+------ explain select tests ------
+prepare stmt1 from ' explain select * from t9 ' ;
+execute stmt1;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def id 8 3 1 N 32929 0 63
+def select_type 253 19 6 N 1 31 8
+def table 253 64 2 Y 0 31 8
+def type 253 10 3 Y 0 31 8
+def possible_keys 253 4096 0 Y 0 31 8
+def key 253 64 0 Y 0 31 8
+def key_len 253 1365 0 Y 0 31 8
+def ref 253 1024 0 Y 0 31 8
+def rows 8 10 1 Y 32928 0 63
+def Extra 253 255 0 N 1 31 8
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t9 ALL NULL NULL NULL NULL 2
+drop table if exists t2 ;
+create table t2 (s varchar(25), fulltext(s)) TRANSACTIONAL= 0
+ENGINE = 'MARIA' ;
+insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ;
+commit ;
+prepare stmt1 from ' select s from t2 where match (s) against (?) ' ;
+set @arg00='Dogs' ;
+execute stmt1 using @arg00 ;
+s
+Hollow Dogs
+prepare stmt1 from ' SELECT s FROM t2
+where match (s) against (concat(?,''digger'')) ';
+set @arg00='Grave' ;
+execute stmt1 using @arg00 ;
+s
+Gravedigger
+drop table t2 ;
+test_sequence
+------ delete tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+prepare stmt1 from 'delete from t1 where a=2' ;
+execute stmt1;
+select a,b from t1 where a=2;
+a b
+execute stmt1;
+insert into t1 values(0,NULL);
+set @arg00=NULL;
+prepare stmt1 from 'delete from t1 where b=?' ;
+execute stmt1 using @arg00;
+select a,b from t1 where b is NULL ;
+a b
+0 NULL
+set @arg00='one';
+execute stmt1 using @arg00;
+select a,b from t1 where b=@arg00;
+a b
+prepare stmt1 from 'truncate table t1' ;
+test_sequence
+------ update tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+prepare stmt1 from 'update t1 set b=''a=two'' where a=2' ;
+execute stmt1;
+select a,b from t1 where a=2;
+a b
+2 a=two
+execute stmt1;
+select a,b from t1 where a=2;
+a b
+2 a=two
+set @arg00=NULL;
+prepare stmt1 from 'update t1 set b=? where a=2' ;
+execute stmt1 using @arg00;
+select a,b from t1 where a=2;
+a b
+2 NULL
+set @arg00='two';
+execute stmt1 using @arg00;
+select a,b from t1 where a=2;
+a b
+2 two
+set @arg00=2;
+prepare stmt1 from 'update t1 set b=NULL where a=?' ;
+execute stmt1 using @arg00;
+select a,b from t1 where a=@arg00;
+a b
+2 NULL
+update t1 set b='two' where a=@arg00;
+set @arg00=2000;
+execute stmt1 using @arg00;
+select a,b from t1 where a=@arg00;
+a b
+set @arg00=2;
+set @arg01=22;
+prepare stmt1 from 'update t1 set a=? where a=?' ;
+execute stmt1 using @arg00, @arg00;
+select a,b from t1 where a=@arg00;
+a b
+2 two
+execute stmt1 using @arg01, @arg00;
+select a,b from t1 where a=@arg01;
+a b
+22 two
+execute stmt1 using @arg00, @arg01;
+select a,b from t1 where a=@arg00;
+a b
+2 two
+set @arg00=NULL;
+set @arg01=2;
+execute stmt1 using @arg00, @arg01;
+Warnings:
+Warning 1048 Column 'a' cannot be null
+select a,b from t1 order by a;
+a b
+0 two
+1 one
+3 three
+4 four
+set @arg00=0;
+execute stmt1 using @arg01, @arg00;
+select a,b from t1 order by a;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00=23;
+set @arg01='two';
+set @arg02=2;
+set @arg03='two';
+set @arg04=2;
+drop table if exists t2;
+create table t2 as select a,b from t1 ;
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 where a = @arg00 ;
+a b
+23 two
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a not in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 order by a ;
+a b
+1 one
+2 two
+3 three
+4 four
+drop table t2 ;
+create table t2
+(
+a int, b varchar(30),
+primary key(a)
+) engine = 'MARIA' ;
+insert into t2(a,b) select a, b from t1 ;
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 where a = @arg00 ;
+a b
+23 two
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a not in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 order by a ;
+a b
+1 one
+2 two
+3 three
+4 four
+drop table t2 ;
+set @arg00=1;
+prepare stmt1 from 'update t1 set b=''bla''
+where a=2
+limit 1';
+execute stmt1 ;
+select a,b from t1 where b = 'bla' ;
+a b
+2 bla
+prepare stmt1 from 'update t1 set b=''bla'' where a=2 limit ?';
+execute stmt1 using @arg00;
+test_sequence
+------ insert tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+prepare stmt1 from 'insert into t1 values(5, ''five'' )';
+execute stmt1;
+select a,b from t1 where a = 5;
+a b
+5 five
+set @arg00='six' ;
+prepare stmt1 from 'insert into t1 values(6, ? )';
+execute stmt1 using @arg00;
+select a,b from t1 where b = @arg00;
+a b
+6 six
+execute stmt1 using @arg00;
+ERROR 23000: Duplicate entry '6' for key 'PRIMARY'
+set @arg00=NULL ;
+prepare stmt1 from 'insert into t1 values(0, ? )';
+execute stmt1 using @arg00;
+select a,b from t1 where b is NULL;
+a b
+0 NULL
+set @arg00=8 ;
+set @arg01='eight' ;
+prepare stmt1 from 'insert into t1 values(?, ? )';
+execute stmt1 using @arg00, @arg01 ;
+select a,b from t1 where b = @arg01;
+a b
+8 eight
+set @NULL= null ;
+set @arg00= 'abc' ;
+execute stmt1 using @NULL, @NULL ;
+ERROR 23000: Column 'a' cannot be null
+execute stmt1 using @NULL, @NULL ;
+ERROR 23000: Column 'a' cannot be null
+execute stmt1 using @NULL, @arg00 ;
+ERROR 23000: Column 'a' cannot be null
+execute stmt1 using @NULL, @arg00 ;
+ERROR 23000: Column 'a' cannot be null
+set @arg01= 10000 + 2 ;
+execute stmt1 using @arg01, @arg00 ;
+set @arg01= 10000 + 1 ;
+execute stmt1 using @arg01, @arg00 ;
+select * from t1 where a > 10000 order by a ;
+a b
+10001 abc
+10002 abc
+delete from t1 where a > 10000 ;
+set @arg01= 10000 + 2 ;
+execute stmt1 using @arg01, @NULL ;
+set @arg01= 10000 + 1 ;
+execute stmt1 using @arg01, @NULL ;
+select * from t1 where a > 10000 order by a ;
+a b
+10001 NULL
+10002 NULL
+delete from t1 where a > 10000 ;
+set @arg01= 10000 + 10 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 9 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 8 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 7 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 6 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 5 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 4 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 3 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 2 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 1 ;
+execute stmt1 using @arg01, @arg01 ;
+select * from t1 where a > 10000 order by a ;
+a b
+10001 10001
+10002 10002
+10003 10003
+10004 10004
+10005 10005
+10006 10006
+10007 10007
+10008 10008
+10009 10009
+10010 10010
+delete from t1 where a > 10000 ;
+set @arg00=81 ;
+set @arg01='8-1' ;
+set @arg02=82 ;
+set @arg03='8-2' ;
+prepare stmt1 from 'insert into t1 values(?,?),(?,?)';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03 ;
+select a,b from t1 where a in (@arg00,@arg02) ;
+a b
+81 8-1
+82 8-2
+set @arg00=9 ;
+set @arg01='nine' ;
+prepare stmt1 from 'insert into t1 set a=?, b=? ';
+execute stmt1 using @arg00, @arg01 ;
+select a,b from t1 where a = @arg00 ;
+a b
+9 nine
+set @arg00=6 ;
+set @arg01=1 ;
+prepare stmt1 from 'insert into t1 set a=?, b=''sechs''
+ on duplicate key update a=a + ?, b=concat(b,''modified'') ';
+execute stmt1 using @arg00, @arg01;
+select * from t1 order by a;
+a b
+0 NULL
+1 one
+2 two
+3 three
+4 four
+5 five
+7 sixmodified
+8 eight
+9 nine
+81 8-1
+82 8-2
+set @arg00=81 ;
+set @arg01=1 ;
+execute stmt1 using @arg00, @arg01;
+ERROR 23000: Duplicate entry '82' for key 'PRIMARY'
+drop table if exists t2 ;
+create table t2 (id int auto_increment primary key)
+ENGINE= 'MARIA' ;
+prepare stmt1 from ' select last_insert_id() ' ;
+insert into t2 values (NULL) ;
+execute stmt1 ;
+last_insert_id()
+1
+insert into t2 values (NULL) ;
+execute stmt1 ;
+last_insert_id()
+2
+drop table t2 ;
+set @1000=1000 ;
+set @x1000_2="x1000_2" ;
+set @x1000_3="x1000_3" ;
+set @x1000="x1000" ;
+set @1100=1100 ;
+set @x1100="x1100" ;
+set @100=100 ;
+set @updated="updated" ;
+insert into t1 values(1000,'x1000_1') ;
+insert into t1 values(@1000,@x1000_2),(@1000,@x1000_3)
+on duplicate key update a = a + @100, b = concat(b,@updated) ;
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 x1000_3
+1100 x1000_1updated
+delete from t1 where a >= 1000 ;
+insert into t1 values(1000,'x1000_1') ;
+prepare stmt1 from ' insert into t1 values(?,?),(?,?)
+ on duplicate key update a = a + ?, b = concat(b,?) ';
+execute stmt1 using @1000, @x1000_2, @1000, @x1000_3, @100, @updated ;
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 x1000_3
+1100 x1000_1updated
+delete from t1 where a >= 1000 ;
+insert into t1 values(1000,'x1000_1') ;
+execute stmt1 using @1000, @x1000_2, @1100, @x1000_3, @100, @updated ;
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1200 x1000_1updatedupdated
+delete from t1 where a >= 1000 ;
+prepare stmt1 from ' replace into t1 (a,b) select 100, ''hundred'' ';
+execute stmt1;
+execute stmt1;
+execute stmt1;
+test_sequence
+------ multi table tests ------
+delete from t1 ;
+delete from t9 ;
+insert into t1(a,b) values (1, 'one'), (2, 'two'), (3, 'three') ;
+insert into t9 (c1,c21)
+values (1, 'one'), (2, 'two'), (3, 'three') ;
+prepare stmt_delete from " delete t1, t9
+ from t1, t9 where t1.a=t9.c1 and t1.b='updated' ";
+prepare stmt_update from " update t1, t9
+ set t1.b='updated', t9.c21='updated'
+ where t1.a=t9.c1 and t1.a=? ";
+prepare stmt_select1 from " select a, b from t1 order by a" ;
+prepare stmt_select2 from " select c1, c21 from t9 order by c1" ;
+set @arg00= 1 ;
+execute stmt_update using @arg00 ;
+execute stmt_delete ;
+execute stmt_select1 ;
+a b
+2 two
+3 three
+execute stmt_select2 ;
+c1 c21
+2 two
+3 three
+set @arg00= @arg00 + 1 ;
+execute stmt_update using @arg00 ;
+execute stmt_delete ;
+execute stmt_select1 ;
+a b
+3 three
+execute stmt_select2 ;
+c1 c21
+3 three
+set @arg00= @arg00 + 1 ;
+execute stmt_update using @arg00 ;
+execute stmt_delete ;
+execute stmt_select1 ;
+a b
+execute stmt_select2 ;
+c1 c21
+set @arg00= @arg00 + 1 ;
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+insert into t1 values(0,NULL) ;
+set @duplicate='duplicate ' ;
+set @1000=1000 ;
+set @5=5 ;
+select a,b from t1 where a < 5 order by a ;
+a b
+0 NULL
+1 one
+2 two
+3 three
+4 four
+insert into t1 select a + @1000, concat(@duplicate,b) from t1
+where a < @5 ;
+affected rows: 5
+info: Records: 5 Duplicates: 0 Warnings: 0
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 NULL
+1001 duplicate one
+1002 duplicate two
+1003 duplicate three
+1004 duplicate four
+delete from t1 where a >= 1000 ;
+prepare stmt1 from ' insert into t1 select a + ?, concat(?,b) from t1
+where a < ? ' ;
+execute stmt1 using @1000, @duplicate, @5;
+affected rows: 5
+info: Records: 5 Duplicates: 0 Warnings: 0
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 NULL
+1001 duplicate one
+1002 duplicate two
+1003 duplicate three
+1004 duplicate four
+delete from t1 where a >= 1000 ;
+set @1=1 ;
+set @2=2 ;
+set @100=100 ;
+set @float=1.00;
+set @five='five' ;
+drop table if exists t2;
+create table t2 like t1 ;
+insert into t2 (b,a)
+select @duplicate, sum(first.a) from t1 first, t1 second
+where first.a <> @5 and second.b = first.b
+and second.b <> @five
+group by second.b
+having sum(second.a) > @2
+union
+select b, a + @100 from t1
+where (a,b) in ( select sqrt(a+@1)+CAST(@float AS signed),b
+from t1);
+affected rows: 3
+info: Records: 3 Duplicates: 0 Warnings: 0
+select a,b from t2 order by a ;
+a b
+3 duplicate
+4 duplicate
+103 three
+delete from t2 ;
+prepare stmt1 from ' insert into t2 (b,a)
+select ?, sum(first.a)
+ from t1 first, t1 second
+ where first.a <> ? and second.b = first.b and second.b <> ?
+ group by second.b
+ having sum(second.a) > ?
+union
+select b, a + ? from t1
+ where (a,b) in ( select sqrt(a+?)+CAST(? AS signed),b
+ from t1 ) ' ;
+execute stmt1 using @duplicate, @5, @five, @2, @100, @1, @float ;
+affected rows: 3
+info: Records: 3 Duplicates: 0 Warnings: 0
+select a,b from t2 order by a ;
+a b
+3 duplicate
+4 duplicate
+103 three
+drop table t2;
+drop table if exists t5 ;
+set @arg01= 8;
+set @arg02= 8.0;
+set @arg03= 80.00000000000e-1;
+set @arg04= 'abc' ;
+set @arg05= CAST('abc' as binary) ;
+set @arg06= '1991-08-05' ;
+set @arg07= CAST('1991-08-05' as date);
+set @arg08= '1991-08-05 01:01:01' ;
+set @arg09= CAST('1991-08-05 01:01:01' as datetime) ;
+set @arg10= unix_timestamp('1991-01-01 01:01:01');
+set @arg11= YEAR('1991-01-01 01:01:01');
+set @arg12= 8 ;
+set @arg12= NULL ;
+set @arg13= 8.0 ;
+set @arg13= NULL ;
+set @arg14= 'abc';
+set @arg14= NULL ;
+set @arg15= CAST('abc' as binary) ;
+set @arg15= NULL ;
+create table t5 engine = MyISAM as select
+8 as const01, @arg01 as param01,
+8.0 as const02, @arg02 as param02,
+80.00000000000e-1 as const03, @arg03 as param03,
+'abc' as const04, @arg04 as param04,
+CAST('abc' as binary) as const05, @arg05 as param05,
+'1991-08-05' as const06, @arg06 as param06,
+CAST('1991-08-05' as date) as const07, @arg07 as param07,
+'1991-08-05 01:01:01' as const08, @arg08 as param08,
+CAST('1991-08-05 01:01:01' as datetime) as const09, @arg09 as param09,
+unix_timestamp('1991-01-01 01:01:01') as const10, @arg10 as param10,
+YEAR('1991-01-01 01:01:01') as const11, @arg11 as param11,
+NULL as const12, @arg12 as param12,
+@arg13 as param13,
+@arg14 as param14,
+@arg15 as param15;
+show create table t5 ;
+Table Create Table
+t5 CREATE TABLE `t5` (
+ `const01` int(1) NOT NULL DEFAULT '0',
+ `param01` bigint(20) DEFAULT NULL,
+ `const02` decimal(2,1) NOT NULL DEFAULT '0.0',
+ `param02` decimal(65,30) DEFAULT NULL,
+ `const03` double NOT NULL DEFAULT '0',
+ `param03` double DEFAULT NULL,
+ `const04` varchar(3) NOT NULL DEFAULT '',
+ `param04` longtext,
+ `const05` varbinary(3) NOT NULL DEFAULT '',
+ `param05` longblob,
+ `const06` varchar(10) NOT NULL DEFAULT '',
+ `param06` longtext,
+ `const07` date DEFAULT NULL,
+ `param07` longblob,
+ `const08` varchar(19) NOT NULL DEFAULT '',
+ `param08` longtext,
+ `const09` datetime DEFAULT NULL,
+ `param09` longblob,
+ `const10` int(10) NOT NULL DEFAULT '0',
+ `param10` bigint(20) DEFAULT NULL,
+ `const11` int(4) DEFAULT NULL,
+ `param11` bigint(20) DEFAULT NULL,
+ `const12` binary(0) DEFAULT NULL,
+ `param12` bigint(20) DEFAULT NULL,
+ `param13` decimal(65,30) DEFAULT NULL,
+ `param14` longtext,
+ `param15` longblob
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select * from t5 ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def test t5 t5 const01 const01 3 1 1 N 32769 0 63
+def test t5 t5 param01 param01 8 20 1 Y 32768 0 63
+def test t5 t5 const02 const02 246 4 3 N 1 1 63
+def test t5 t5 param02 param02 246 67 32 Y 0 30 63
+def test t5 t5 const03 const03 5 17 1 N 32769 31 63
+def test t5 t5 param03 param03 5 23 1 Y 32768 31 63
+def test t5 t5 const04 const04 253 3 3 N 1 0 8
+def test t5 t5 param04 param04 252 4294967295 3 Y 16 0 8
+def test t5 t5 const05 const05 253 3 3 N 129 0 63
+def test t5 t5 param05 param05 252 4294967295 3 Y 144 0 63
+def test t5 t5 const06 const06 253 10 10 N 1 0 8
+def test t5 t5 param06 param06 252 4294967295 10 Y 16 0 8
+def test t5 t5 const07 const07 10 10 10 Y 128 0 63
+def test t5 t5 param07 param07 252 4294967295 10 Y 144 0 63
+def test t5 t5 const08 const08 253 19 19 N 1 0 8
+def test t5 t5 param08 param08 252 4294967295 19 Y 16 0 8
+def test t5 t5 const09 const09 12 19 19 Y 128 0 63
+def test t5 t5 param09 param09 252 4294967295 19 Y 144 0 63
+def test t5 t5 const10 const10 3 10 9 N 32769 0 63
+def test t5 t5 param10 param10 8 20 9 Y 32768 0 63
+def test t5 t5 const11 const11 3 4 4 Y 32768 0 63
+def test t5 t5 param11 param11 8 20 4 Y 32768 0 63
+def test t5 t5 const12 const12 254 0 0 Y 128 0 63
+def test t5 t5 param12 param12 8 20 0 Y 32768 0 63
+def test t5 t5 param13 param13 246 67 0 Y 0 30 63
+def test t5 t5 param14 param14 252 4294967295 0 Y 16 0 8
+def test t5 t5 param15 param15 252 4294967295 0 Y 144 0 63
+const01 8
+param01 8
+const02 8.0
+param02 8.000000000000000000000000000000
+const03 8
+param03 8
+const04 abc
+param04 abc
+const05 abc
+param05 abc
+const06 1991-08-05
+param06 1991-08-05
+const07 1991-08-05
+param07 1991-08-05
+const08 1991-08-05 01:01:01
+param08 1991-08-05 01:01:01
+const09 1991-08-05 01:01:01
+param09 1991-08-05 01:01:01
+const10 662680861
+param10 662680861
+const11 1991
+param11 1991
+const12 NULL
+param12 NULL
+param13 NULL
+param14 NULL
+param15 NULL
+drop table t5 ;
+test_sequence
+------ data type conversion tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+insert into t9 set c1= 0, c15= '1991-01-01 01:01:01' ;
+select * from t9 order by c1 ;
+c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday
+test_sequence
+------ select @parameter:= column ------
+prepare full_info from "select @arg01, @arg02, @arg03, @arg04,
+ @arg05, @arg06, @arg07, @arg08,
+ @arg09, @arg10, @arg11, @arg12,
+ @arg13, @arg14, @arg15, @arg16,
+ @arg17, @arg18, @arg19, @arg20,
+ @arg21, @arg22, @arg23, @arg24,
+ @arg25, @arg26, @arg27, @arg28,
+ @arg29, @arg30, @arg31, @arg32" ;
+select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4,
+@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8,
+@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12,
+@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16,
+@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20,
+@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24,
+@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28,
+@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32
+from t9 where c1= 1 ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 1 Y 32896 0 63
+def @arg03 8 20 1 Y 32896 0 63
+def @arg04 8 20 1 Y 32896 0 63
+def @arg05 8 20 1 Y 32896 0 63
+def @arg06 8 20 1 Y 32896 0 63
+def @arg07 5 23 1 Y 32896 31 63
+def @arg08 5 23 1 Y 32896 31 63
+def @arg09 5 23 1 Y 32896 31 63
+def @arg10 5 23 1 Y 32896 31 63
+def @arg11 246 83 6 Y 128 30 63
+def @arg12 246 83 6 Y 128 30 63
+def @arg13 251 16777216 10 Y 128 31 63
+def @arg14 251 16777216 19 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 8 Y 128 31 63
+def @arg17 8 20 4 Y 32928 0 63
+def @arg18 8 20 1 Y 32896 0 63
+def @arg19 8 20 1 Y 32896 0 63
+def @arg20 251 16777216 1 Y 0 31 8
+def @arg21 251 16777216 10 Y 0 31 8
+def @arg22 251 16777216 30 Y 0 31 8
+def @arg23 251 16777216 8 Y 128 31 63
+def @arg24 251 16777216 8 Y 0 31 8
+def @arg25 251 16777216 4 Y 128 31 63
+def @arg26 251 16777216 4 Y 0 31 8
+def @arg27 251 16777216 10 Y 128 31 63
+def @arg28 251 16777216 10 Y 0 31 8
+def @arg29 251 16777216 8 Y 128 31 63
+def @arg30 251 16777216 8 Y 0 31 8
+def @arg31 251 16777216 3 Y 0 31 8
+def @arg32 251 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4,
+@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8,
+@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12,
+@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16,
+@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20,
+@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24,
+@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28,
+@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32
+from t9 where c1= 0 ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 0 Y 32896 0 63
+def @arg03 8 20 0 Y 32896 0 63
+def @arg04 8 20 0 Y 32896 0 63
+def @arg05 8 20 0 Y 32896 0 63
+def @arg06 8 20 0 Y 32896 0 63
+def @arg07 5 23 0 Y 32896 31 63
+def @arg08 5 23 0 Y 32896 31 63
+def @arg09 5 23 0 Y 32896 31 63
+def @arg10 5 23 0 Y 32896 31 63
+def @arg11 246 83 0 Y 128 30 63
+def @arg12 246 83 0 Y 128 30 63
+def @arg13 251 16777216 0 Y 128 31 63
+def @arg14 251 16777216 0 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 0 Y 128 31 63
+def @arg17 8 20 0 Y 32928 0 63
+def @arg18 8 20 0 Y 32896 0 63
+def @arg19 8 20 0 Y 32896 0 63
+def @arg20 251 16777216 0 Y 0 31 8
+def @arg21 251 16777216 0 Y 0 31 8
+def @arg22 251 16777216 0 Y 0 31 8
+def @arg23 251 16777216 0 Y 128 31 63
+def @arg24 251 16777216 0 Y 0 31 8
+def @arg25 251 16777216 0 Y 128 31 63
+def @arg26 251 16777216 0 Y 0 31 8
+def @arg27 251 16777216 0 Y 128 31 63
+def @arg28 251 16777216 0 Y 0 31 8
+def @arg29 251 16777216 0 Y 128 31 63
+def @arg30 251 16777216 0 Y 0 31 8
+def @arg31 251 16777216 0 Y 0 31 8
+def @arg32 251 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select
+ @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4,
+ @arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8,
+ @arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12,
+ @arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16,
+ @arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20,
+ @arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24,
+ @arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28,
+ @arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32
+from t9 where c1= ?" ;
+set @my_key= 1 ;
+execute stmt1 using @my_key ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 1 Y 32896 0 63
+def @arg03 8 20 1 Y 32896 0 63
+def @arg04 8 20 1 Y 32896 0 63
+def @arg05 8 20 1 Y 32896 0 63
+def @arg06 8 20 1 Y 32896 0 63
+def @arg07 5 23 1 Y 32896 31 63
+def @arg08 5 23 1 Y 32896 31 63
+def @arg09 5 23 1 Y 32896 31 63
+def @arg10 5 23 1 Y 32896 31 63
+def @arg11 246 83 6 Y 128 30 63
+def @arg12 246 83 6 Y 128 30 63
+def @arg13 251 16777216 10 Y 128 31 63
+def @arg14 251 16777216 19 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 8 Y 128 31 63
+def @arg17 8 20 4 Y 32928 0 63
+def @arg18 8 20 1 Y 32896 0 63
+def @arg19 8 20 1 Y 32896 0 63
+def @arg20 251 16777216 1 Y 0 31 8
+def @arg21 251 16777216 10 Y 0 31 8
+def @arg22 251 16777216 30 Y 0 31 8
+def @arg23 251 16777216 8 Y 128 31 63
+def @arg24 251 16777216 8 Y 0 31 8
+def @arg25 251 16777216 4 Y 128 31 63
+def @arg26 251 16777216 4 Y 0 31 8
+def @arg27 251 16777216 10 Y 128 31 63
+def @arg28 251 16777216 10 Y 0 31 8
+def @arg29 251 16777216 8 Y 128 31 63
+def @arg30 251 16777216 8 Y 0 31 8
+def @arg31 251 16777216 3 Y 0 31 8
+def @arg32 251 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+set @my_key= 0 ;
+execute stmt1 using @my_key ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 0 Y 32896 0 63
+def @arg03 8 20 0 Y 32896 0 63
+def @arg04 8 20 0 Y 32896 0 63
+def @arg05 8 20 0 Y 32896 0 63
+def @arg06 8 20 0 Y 32896 0 63
+def @arg07 5 23 0 Y 32896 31 63
+def @arg08 5 23 0 Y 32896 31 63
+def @arg09 5 23 0 Y 32896 31 63
+def @arg10 5 23 0 Y 32896 31 63
+def @arg11 246 83 0 Y 128 30 63
+def @arg12 246 83 0 Y 128 30 63
+def @arg13 251 16777216 0 Y 128 31 63
+def @arg14 251 16777216 0 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 0 Y 128 31 63
+def @arg17 8 20 0 Y 32928 0 63
+def @arg18 8 20 0 Y 32896 0 63
+def @arg19 8 20 0 Y 32896 0 63
+def @arg20 251 16777216 0 Y 0 31 8
+def @arg21 251 16777216 0 Y 0 31 8
+def @arg22 251 16777216 0 Y 0 31 8
+def @arg23 251 16777216 0 Y 128 31 63
+def @arg24 251 16777216 0 Y 0 31 8
+def @arg25 251 16777216 0 Y 128 31 63
+def @arg26 251 16777216 0 Y 0 31 8
+def @arg27 251 16777216 0 Y 128 31 63
+def @arg28 251 16777216 0 Y 0 31 8
+def @arg29 251 16777216 0 Y 128 31 63
+def @arg30 251 16777216 0 Y 0 31 8
+def @arg31 251 16777216 0 Y 0 31 8
+def @arg32 251 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select ? := c1 from t9 where c1= 1" ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ':= c1 from t9 where c1= 1' at line 1
+test_sequence
+------ select column, .. into @parm,.. ------
+select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
+c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24,
+c25, c26, c27, c28, c29, c30, c31, c32
+into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08,
+@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16,
+@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24,
+@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32
+from t9 where c1= 1 ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 1 Y 32896 0 63
+def @arg03 8 20 1 Y 32896 0 63
+def @arg04 8 20 1 Y 32896 0 63
+def @arg05 8 20 1 Y 32896 0 63
+def @arg06 8 20 1 Y 32896 0 63
+def @arg07 5 23 1 Y 32896 31 63
+def @arg08 5 23 1 Y 32896 31 63
+def @arg09 5 23 1 Y 32896 31 63
+def @arg10 5 23 1 Y 32896 31 63
+def @arg11 246 83 6 Y 128 30 63
+def @arg12 246 83 6 Y 128 30 63
+def @arg13 251 16777216 10 Y 128 31 63
+def @arg14 251 16777216 19 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 8 Y 128 31 63
+def @arg17 8 20 4 Y 32928 0 63
+def @arg18 8 20 1 Y 32896 0 63
+def @arg19 8 20 1 Y 32896 0 63
+def @arg20 251 16777216 1 Y 0 31 8
+def @arg21 251 16777216 10 Y 0 31 8
+def @arg22 251 16777216 30 Y 0 31 8
+def @arg23 251 16777216 8 Y 128 31 63
+def @arg24 251 16777216 8 Y 0 31 8
+def @arg25 251 16777216 4 Y 128 31 63
+def @arg26 251 16777216 4 Y 0 31 8
+def @arg27 251 16777216 10 Y 128 31 63
+def @arg28 251 16777216 10 Y 0 31 8
+def @arg29 251 16777216 8 Y 128 31 63
+def @arg30 251 16777216 8 Y 0 31 8
+def @arg31 251 16777216 3 Y 0 31 8
+def @arg32 251 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
+c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24,
+c25, c26, c27, c28, c29, c30, c31, c32
+into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08,
+@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16,
+@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24,
+@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32
+from t9 where c1= 0 ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 0 Y 32896 0 63
+def @arg03 8 20 0 Y 32896 0 63
+def @arg04 8 20 0 Y 32896 0 63
+def @arg05 8 20 0 Y 32896 0 63
+def @arg06 8 20 0 Y 32896 0 63
+def @arg07 5 23 0 Y 32896 31 63
+def @arg08 5 23 0 Y 32896 31 63
+def @arg09 5 23 0 Y 32896 31 63
+def @arg10 5 23 0 Y 32896 31 63
+def @arg11 246 83 0 Y 128 30 63
+def @arg12 246 83 0 Y 128 30 63
+def @arg13 251 16777216 0 Y 128 31 63
+def @arg14 251 16777216 0 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 0 Y 128 31 63
+def @arg17 8 20 0 Y 32928 0 63
+def @arg18 8 20 0 Y 32896 0 63
+def @arg19 8 20 0 Y 32896 0 63
+def @arg20 251 16777216 0 Y 0 31 8
+def @arg21 251 16777216 0 Y 0 31 8
+def @arg22 251 16777216 0 Y 0 31 8
+def @arg23 251 16777216 0 Y 128 31 63
+def @arg24 251 16777216 0 Y 0 31 8
+def @arg25 251 16777216 0 Y 128 31 63
+def @arg26 251 16777216 0 Y 0 31 8
+def @arg27 251 16777216 0 Y 128 31 63
+def @arg28 251 16777216 0 Y 0 31 8
+def @arg29 251 16777216 0 Y 128 31 63
+def @arg30 251 16777216 0 Y 0 31 8
+def @arg31 251 16777216 0 Y 0 31 8
+def @arg32 251 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
+ c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24,
+ c25, c26, c27, c28, c29, c30, c31, c32
+into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08,
+ @arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16,
+ @arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24,
+ @arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32
+from t9 where c1= ?" ;
+set @my_key= 1 ;
+execute stmt1 using @my_key ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 1 Y 32896 0 63
+def @arg03 8 20 1 Y 32896 0 63
+def @arg04 8 20 1 Y 32896 0 63
+def @arg05 8 20 1 Y 32896 0 63
+def @arg06 8 20 1 Y 32896 0 63
+def @arg07 5 23 1 Y 32896 31 63
+def @arg08 5 23 1 Y 32896 31 63
+def @arg09 5 23 1 Y 32896 31 63
+def @arg10 5 23 1 Y 32896 31 63
+def @arg11 246 83 6 Y 128 30 63
+def @arg12 246 83 6 Y 128 30 63
+def @arg13 251 16777216 10 Y 128 31 63
+def @arg14 251 16777216 19 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 8 Y 128 31 63
+def @arg17 8 20 4 Y 32928 0 63
+def @arg18 8 20 1 Y 32896 0 63
+def @arg19 8 20 1 Y 32896 0 63
+def @arg20 251 16777216 1 Y 0 31 8
+def @arg21 251 16777216 10 Y 0 31 8
+def @arg22 251 16777216 30 Y 0 31 8
+def @arg23 251 16777216 8 Y 128 31 63
+def @arg24 251 16777216 8 Y 0 31 8
+def @arg25 251 16777216 4 Y 128 31 63
+def @arg26 251 16777216 4 Y 0 31 8
+def @arg27 251 16777216 10 Y 128 31 63
+def @arg28 251 16777216 10 Y 0 31 8
+def @arg29 251 16777216 8 Y 128 31 63
+def @arg30 251 16777216 8 Y 0 31 8
+def @arg31 251 16777216 3 Y 0 31 8
+def @arg32 251 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+set @my_key= 0 ;
+execute stmt1 using @my_key ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 8 20 1 Y 32896 0 63
+def @arg02 8 20 0 Y 32896 0 63
+def @arg03 8 20 0 Y 32896 0 63
+def @arg04 8 20 0 Y 32896 0 63
+def @arg05 8 20 0 Y 32896 0 63
+def @arg06 8 20 0 Y 32896 0 63
+def @arg07 5 23 0 Y 32896 31 63
+def @arg08 5 23 0 Y 32896 31 63
+def @arg09 5 23 0 Y 32896 31 63
+def @arg10 5 23 0 Y 32896 31 63
+def @arg11 246 83 0 Y 128 30 63
+def @arg12 246 83 0 Y 128 30 63
+def @arg13 251 16777216 0 Y 128 31 63
+def @arg14 251 16777216 0 Y 128 31 63
+def @arg15 251 16777216 19 Y 128 31 63
+def @arg16 251 16777216 0 Y 128 31 63
+def @arg17 8 20 0 Y 32928 0 63
+def @arg18 8 20 0 Y 32896 0 63
+def @arg19 8 20 0 Y 32896 0 63
+def @arg20 251 16777216 0 Y 0 31 8
+def @arg21 251 16777216 0 Y 0 31 8
+def @arg22 251 16777216 0 Y 0 31 8
+def @arg23 251 16777216 0 Y 128 31 63
+def @arg24 251 16777216 0 Y 0 31 8
+def @arg25 251 16777216 0 Y 128 31 63
+def @arg26 251 16777216 0 Y 0 31 8
+def @arg27 251 16777216 0 Y 128 31 63
+def @arg28 251 16777216 0 Y 0 31 8
+def @arg29 251 16777216 0 Y 128 31 63
+def @arg30 251 16777216 0 Y 0 31 8
+def @arg31 251 16777216 0 Y 0 31 8
+def @arg32 251 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select c1 into ? from t9 where c1= 1" ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? from t9 where c1= 1' at line 1
+test_sequence
+-- insert into numeric columns --
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 ) ;
+set @arg00= 21 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22 )" ;
+execute stmt1 ;
+set @arg00= 23;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0,
+30.0, 30.0, 30.0 ) ;
+set @arg00= 31.0 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0,
+ 32.0, 32.0, 32.0 )" ;
+execute stmt1 ;
+set @arg00= 33.0;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( '40', '40', '40', '40', '40', '40', '40', '40',
+'40', '40', '40' ) ;
+set @arg00= '41' ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( '42', '42', '42', '42', '42', '42', '42', '42',
+ '42', '42', '42' )" ;
+execute stmt1 ;
+set @arg00= '43';
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( CAST('50' as binary), CAST('50' as binary),
+CAST('50' as binary), CAST('50' as binary), CAST('50' as binary),
+CAST('50' as binary), CAST('50' as binary), CAST('50' as binary),
+CAST('50' as binary), CAST('50' as binary), CAST('50' as binary) ) ;
+set @arg00= CAST('51' as binary) ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( CAST('52' as binary), CAST('52' as binary),
+ CAST('52' as binary), CAST('52' as binary), CAST('52' as binary),
+ CAST('52' as binary), CAST('52' as binary), CAST('52' as binary),
+ CAST('52' as binary), CAST('52' as binary), CAST('52' as binary) )" ;
+execute stmt1 ;
+set @arg00= CAST('53' as binary) ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+set @arg00= 2 ;
+set @arg00= NULL ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+NULL, NULL, NULL ) ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 61, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 62, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL )" ;
+execute stmt1 ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 63, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+set @arg00= 8.0 ;
+set @arg00= NULL ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 71, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 73, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+set @arg00= 'abc' ;
+set @arg00= NULL ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 81, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 83, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12
+from t9 where c1 >= 20
+order by c1 ;
+c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c12
+20 20 20 20 20 20 20 20 20 20 20.0000
+21 21 21 21 21 21 21 21 21 21 21.0000
+22 22 22 22 22 22 22 22 22 22 22.0000
+23 23 23 23 23 23 23 23 23 23 23.0000
+30 30 30 30 30 30 30 30 30 30 30.0000
+31 31 31 31 31 31 31 31 31 31 31.0000
+32 32 32 32 32 32 32 32 32 32 32.0000
+33 33 33 33 33 33 33 33 33 33 33.0000
+40 40 40 40 40 40 40 40 40 40 40.0000
+41 41 41 41 41 41 41 41 41 41 41.0000
+42 42 42 42 42 42 42 42 42 42 42.0000
+43 43 43 43 43 43 43 43 43 43 43.0000
+50 50 50 50 50 50 50 50 50 50 50.0000
+51 51 51 51 51 51 51 51 51 51 51.0000
+52 52 52 52 52 52 52 52 52 52 52.0000
+53 53 53 53 53 53 53 53 53 53 53.0000
+60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+test_sequence
+-- select .. where numeric column = .. --
+set @arg00= 20;
+select 'true' as found from t9
+where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20
+and c8= 20 and c9= 20 and c10= 20 and c12= 20;
+found
+true
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20
+ and c8= 20 and c9= 20 and c10= 20 and c12= 20 ";
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 20.0;
+select 'true' as found from t9
+where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0
+and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0;
+found
+true
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0
+ and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0 ";
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+select 'true' as found from t9
+where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20'
+ and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20';
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20'
+ and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20' ";
+execute stmt1 ;
+found
+true
+set @arg00= '20';
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+select 'true' as found from t9
+where c1= CAST('20' as binary) and c2= CAST('20' as binary) and
+c3= CAST('20' as binary) and c4= CAST('20' as binary) and
+c5= CAST('20' as binary) and c6= CAST('20' as binary) and
+c7= CAST('20' as binary) and c8= CAST('20' as binary) and
+c9= CAST('20' as binary) and c10= CAST('20' as binary) and
+c12= CAST('20' as binary);
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= CAST('20' as binary) and c2= CAST('20' as binary) and
+ c3= CAST('20' as binary) and c4= CAST('20' as binary) and
+ c5= CAST('20' as binary) and c6= CAST('20' as binary) and
+ c7= CAST('20' as binary) and c8= CAST('20' as binary) and
+ c9= CAST('20' as binary) and c10= CAST('20' as binary) and
+ c12= CAST('20' as binary) ";
+execute stmt1 ;
+found
+true
+set @arg00= CAST('20' as binary) ;
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+delete from t9 ;
+test_sequence
+-- some numeric overflow experiments --
+prepare my_insert from "insert into t9
+ ( c21, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 'O', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+prepare my_select from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12
+from t9 where c21 = 'O' ";
+prepare my_delete from "delete from t9 where c21 = 'O' ";
+set @arg00= 9223372036854775807 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 9.22337e+18
+c8 9.22337203685478e+18
+c9 9.22337203685478e+18
+c10 9.22337203685478e+18
+c12 9999.9999
+execute my_delete ;
+set @arg00= '9223372036854775807' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 9.22337e+18
+c8 9.22337203685478e+18
+c9 9.22337203685478e+18
+c10 9.22337203685478e+18
+c12 9999.9999
+execute my_delete ;
+set @arg00= -9223372036854775808 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -9.22337e+18
+c8 -9.22337203685478e+18
+c9 -9.22337203685478e+18
+c10 -9.22337203685478e+18
+c12 -9999.9999
+execute my_delete ;
+set @arg00= '-9223372036854775808' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -9.22337e+18
+c8 -9.22337203685478e+18
+c9 -9.22337203685478e+18
+c10 -9.22337203685478e+18
+c12 -9999.9999
+execute my_delete ;
+set @arg00= 1.11111111111111111111e+50 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 3.40282e+38
+c8 1.11111111111111e+50
+c9 1.11111111111111e+50
+c10 1.11111111111111e+50
+c12 9999.9999
+execute my_delete ;
+set @arg00= '1.11111111111111111111e+50' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 3.40282e+38
+c8 1.11111111111111e+50
+c9 1.11111111111111e+50
+c10 1.11111111111111e+50
+c12 9999.9999
+execute my_delete ;
+set @arg00= -1.11111111111111111111e+50 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -3.40282e+38
+c8 -1.11111111111111e+50
+c9 -1.11111111111111e+50
+c10 -1.11111111111111e+50
+c12 -9999.9999
+execute my_delete ;
+set @arg00= '-1.11111111111111111111e+50' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -3.40282e+38
+c8 -1.11111111111111e+50
+c9 -1.11111111111111e+50
+c10 -1.11111111111111e+50
+c12 -9999.9999
+execute my_delete ;
+test_sequence
+-- insert into string columns --
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+select c1, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30
+from t9 where c1 >= 20
+order by c1 ;
+c1 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30
+20 2 20 20 20 20 20 20 20 20 20 20
+21 2 21 21 21 21 21 21 21 21 21 21
+22 2 22 22 22 22 22 22 22 22 22 22
+23 2 23 23 23 23 23 23 23 23 23 23
+30 3 30 30 30 30 30 30 30 30 30 30
+31 3 31 31 31 31 31 31 31 31 31 31
+32 3 32 32 32 32 32 32 32 32 32 32
+33 3 33 33 33 33 33 33 33 33 33 33
+40 4 40 40 40 40 40 40 40 40 40 40
+41 4 41 41 41 41 41 41 41 41 41 41
+42 4 42 42 42 42 42 42 42 42 42 42
+43 4 43 43 43 43 43 43 43 43 43 43
+50 5 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0
+51 5 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0
+52 5 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0
+53 5 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0
+54 5 54 54 54.00 54.00 54.00 54.00 54.00 54.00 54.00 54.00
+55 5 55 55 55 55 55 55 55 55 55 55
+56 6 56 56 56.00 56.00 56.00 56.00 56.00 56.00 56.00 56.00
+57 6 57 57 57.00 57.00 57.00 57.00 57.00 57.00 57.00 57.00
+60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+test_sequence
+-- select .. where string column = .. --
+set @arg00= '20';
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and
+c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and
+c27= '20' and c28= '20' and c29= '20' and c30= '20' ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and
+ c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and
+ c27= '20' and c28= '20' and c29= '20' and c30= '20'" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and
+ c21= ? and c22= ? and c23= ? and c25= ? and
+ c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= CAST('20' as binary);
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20)))
+= CAST('20' as binary) and c21= CAST('20' as binary)
+and c22= CAST('20' as binary) and c23= CAST('20' as binary) and
+c24= CAST('20' as binary) and c25= CAST('20' as binary) and
+c26= CAST('20' as binary) and c27= CAST('20' as binary) and
+c28= CAST('20' as binary) and c29= CAST('20' as binary) and
+c30= CAST('20' as binary) ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20))) = @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and
+c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20)))
+ = CAST('20' as binary) and c21= CAST('20' as binary)
+ and c22= CAST('20' as binary) and c23= CAST('20' as binary) and
+ c24= CAST('20' as binary) and c25= CAST('20' as binary) and
+ c26= CAST('20' as binary) and c27= CAST('20' as binary) and
+ c28= CAST('20' as binary) and c29= CAST('20' as binary) and
+ c30= CAST('20' as binary)" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20))) = ? and c21= ? and
+ c22= ? and c23= ? and c25= ? and c26= ? and c27= ? and c28= ? and
+ c29= ? and c30= ?";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 20;
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and
+c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and
+c27= 20 and c28= 20 and c29= 20 and c30= 20 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and
+ c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and
+ c27= 20 and c28= 20 and c29= 20 and c30= 20" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and
+ c21= ? and c22= ? and c23= ? and c25= ? and
+ c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 20.0;
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and
+c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and
+c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and
+ c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and
+ c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and
+ c21= ? and c22= ? and c23= ? and c25= ? and
+ c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+delete from t9 ;
+test_sequence
+-- insert into date/time columns --
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Note 1265 Data truncated for column 'c13' at row 1
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+select c1, c13, c14, c15, c16, c17 from t9 order by c1 ;
+c1 c13 c14 c15 c16 c17
+20 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+21 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+22 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+23 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+30 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+31 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+32 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+33 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+40 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+41 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+42 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+43 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+50 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+51 2010-00-00 2010-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+52 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+53 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+60 NULL NULL 1991-01-01 01:01:01 NULL NULL
+61 NULL NULL 1991-01-01 01:01:01 NULL NULL
+62 NULL NULL 1991-01-01 01:01:01 NULL NULL
+63 NULL NULL 1991-01-01 01:01:01 NULL NULL
+71 NULL NULL 1991-01-01 01:01:01 NULL NULL
+73 NULL NULL 1991-01-01 01:01:01 NULL NULL
+81 NULL NULL 1991-01-01 01:01:01 NULL NULL
+83 NULL NULL 1991-01-01 01:01:01 NULL NULL
+test_sequence
+-- select .. where date/time column = .. --
+set @arg00= '1991-01-01 01:01:01' ;
+select 'true' as found from t9
+where c1= 20 and c13= CAST('1991-01-01 01:01:01' AS DATE) and c14= '1991-01-01 01:01:01' and
+c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and
+c17= '1991-01-01 01:01:01' ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and c13= CAST(@arg00 AS DATE) and c14= @arg00 and c15= @arg00 and c16= @arg00
+and c17= @arg00 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= CAST('1991-01-01 01:01:01' AS DATE) and c14= '1991-01-01 01:01:01' and
+ c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and
+ c17= '1991-01-01 01:01:01'" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= CAST(? AS DATE) and c14= ? and c15= ? and c16= ? and c17= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= CAST('1991-01-01 01:01:01' as datetime) ;
+select 'true' as found from t9
+where c1= 20 and c13= CAST('1991-01-01 00:00:00' as datetime) and
+c14= CAST('1991-01-01 01:01:01' as datetime) and
+c15= CAST('1991-01-01 01:01:01' as datetime) and
+c16= CAST('1991-01-01 01:01:01' as datetime) and
+c17= CAST('1991-01-01 01:01:01' as datetime) ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and c13= CAST(@arg00 AS DATE) and c14= @arg00 and c15= @arg00 and c16= @arg00
+and c17= @arg00 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= CAST('1991-01-01 00:00:00' as datetime) and
+ c14= CAST('1991-01-01 01:01:01' as datetime) and
+ c15= CAST('1991-01-01 01:01:01' as datetime) and
+ c16= CAST('1991-01-01 01:01:01' as datetime) and
+ c17= CAST('1991-01-01 01:01:01' as datetime)" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= CAST(? AS DATE) and c14= ? and c15= ? and c16= ? and c17= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 1991 ;
+select 'true' as found from t9
+where c1= 20 and c17= 1991 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and c17= @arg00 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c17= 1991" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c17= ?" ;
+execute stmt1 using @arg00 ;
+found
+true
+set @arg00= 1.991e+3 ;
+select 'true' as found from t9
+where c1= 20 and abs(c17 - 1.991e+3) < 0.01 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and abs(c17 - @arg00) < 0.01 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and abs(c17 - 1.991e+3) < 0.01" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and abs(c17 - ?) < 0.01" ;
+execute stmt1 using @arg00 ;
+found
+true
+drop table t1, t9;
diff --git a/mysql-test/r/query_cache.result b/mysql-test/r/query_cache.result
index 1a728354c7b..ce5ae57c125 100644
--- a/mysql-test/r/query_cache.result
+++ b/mysql-test/r/query_cache.result
@@ -42,9 +42,9 @@ drop table t1;
show status like "Qcache_queries_in_cache";
Variable_name Value
Qcache_queries_in_cache 0
-create table t1 (a int not null);
+create table t1 (a int not null) ENGINE=MyISAM;
insert into t1 values (1),(2),(3);
-create table t2 (a int not null);
+create table t2 (a int not null) ENGINE=MyISAM;
insert into t2 values (4),(5),(6);
create table t3 (a int not null) engine=MERGE UNION=(t1,t2) INSERT_METHOD=FIRST;
select * from t3;
@@ -460,7 +460,7 @@ Qcache_queries_in_cache 2
drop table t1;
flush query cache;
reset query cache;
-create table t1 (a int not null);
+create table t1 (a int not null) ENGINE=MyISAM;
insert into t1 values (1),(2),(3);
select * from t1;
a
diff --git a/mysql-test/r/subselect.result b/mysql-test/r/subselect.result
index d1173fed7f4..209f67e5386 100644
--- a/mysql-test/r/subselect.result
+++ b/mysql-test/r/subselect.result
@@ -626,8 +626,8 @@ a b
33 10
22 11
drop table t11, t12, t2;
-CREATE TABLE t1 (x int);
-create table t2 (a int);
+CREATE TABLE t1 (x int) ENGINE=MyISAM;
+create table t2 (a int) ENGINE=MyISAM;
create table t3 (b int);
insert into t2 values (1);
insert into t3 values (1),(2);
@@ -674,7 +674,7 @@ x
11
2
drop table t1, t2, t3;
-CREATE TABLE t1 (x int not null, y int, primary key (x));
+CREATE TABLE t1 (x int not null, y int, primary key (x)) ENGINE=MyISAM;
create table t2 (a int);
create table t3 (a int);
insert into t2 values (1);
diff --git a/mysql-test/suite/ndb/r/ps_7ndb.result b/mysql-test/suite/ndb/r/ps_7ndb.result
index 6e2e61bbc5e..70d511d5398 100644
--- a/mysql-test/suite/ndb/r/ps_7ndb.result
+++ b/mysql-test/suite/ndb/r/ps_7ndb.result
@@ -1739,7 +1739,7 @@ set @arg14= 'abc';
set @arg14= NULL ;
set @arg15= CAST('abc' as binary) ;
set @arg15= NULL ;
-create table t5 as select
+create table t5 engine = MyISAM as select
8 as const01, @arg01 as param01,
8.0 as const02, @arg02 as param02,
80.00000000000e-1 as const03, @arg03 as param03,
diff --git a/mysql-test/suite/rpl/r/rpl_insert.result b/mysql-test/suite/rpl/r/rpl_insert.result
index b6a97926f73..6080d18c5aa 100644
--- a/mysql-test/suite/rpl/r/rpl_insert.result
+++ b/mysql-test/suite/rpl/r/rpl_insert.result
@@ -9,7 +9,7 @@ drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
CREATE SCHEMA IF NOT EXISTS mysqlslap;
USE mysqlslap;
-CREATE TABLE t1 (id INT, name VARCHAR(64));
+CREATE TABLE t1 (id INT, name VARCHAR(64)) ENGINE=MyISAM;
SELECT COUNT(*) FROM mysqlslap.t1;
COUNT(*)
5000
diff --git a/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result b/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result
index 072f15cbbd3..319888fa083 100644
--- a/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result
+++ b/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result
@@ -4,21 +4,21 @@ reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
-create table t1 (a int);
+create table t1 (a int) ENGINE=MyISAM;
insert into t1 values (10);
-create table t2 (a int);
+create table t2 (a int) ENGINE=MyISAM;
create table t3 (a int) engine=merge union(t1);
create table t4 (a int);
insert into t4 select * from t3;
rename table t1 to t5, t2 to t1;
flush no_write_to_binlog tables;
-SHOW BINLOG EVENTS FROM 623 ;
+SHOW BINLOG EVENTS FROM 651 ;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
select * from t3;
a
flush tables;
-SHOW BINLOG EVENTS FROM 623 ;
+SHOW BINLOG EVENTS FROM 651 ;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
master-bin.000001 # Query 1 # use `test`; flush tables
diff --git a/mysql-test/suite/rpl/r/rpl_row_insert_delayed.result b/mysql-test/suite/rpl/r/rpl_row_insert_delayed.result
index 1551d83266d..fa6c8cf9982 100644
--- a/mysql-test/suite/rpl/r/rpl_row_insert_delayed.result
+++ b/mysql-test/suite/rpl/r/rpl_row_insert_delayed.result
@@ -11,7 +11,7 @@ USE mysqlslap;
select @@global.binlog_format;
@@global.binlog_format
ROW
-CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64));
+CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64)) ENGINE=MyISAM;
FLUSH TABLE t1;
SELECT COUNT(*) FROM t1;
COUNT(*)
diff --git a/mysql-test/suite/rpl/r/rpl_stm_flsh_tbls.result b/mysql-test/suite/rpl/r/rpl_stm_flsh_tbls.result
index 1c6b5615b6e..bc14c443f48 100644
--- a/mysql-test/suite/rpl/r/rpl_stm_flsh_tbls.result
+++ b/mysql-test/suite/rpl/r/rpl_stm_flsh_tbls.result
@@ -4,21 +4,21 @@ reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
-create table t1 (a int);
+create table t1 (a int) ENGINE=MyISAM;
insert into t1 values (10);
-create table t2 (a int);
+create table t2 (a int) ENGINE=MyISAM;
create table t3 (a int) engine=merge union(t1);
create table t4 (a int);
insert into t4 select * from t3;
rename table t1 to t5, t2 to t1;
flush no_write_to_binlog tables;
-SHOW BINLOG EVENTS FROM 656 ;
+SHOW BINLOG EVENTS FROM 684 ;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
select * from t3;
a
flush tables;
-SHOW BINLOG EVENTS FROM 656 ;
+SHOW BINLOG EVENTS FROM 684 ;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
master-bin.000001 # Query 1 # use `test`; flush tables
diff --git a/mysql-test/suite/rpl/r/rpl_stm_insert_delayed.result b/mysql-test/suite/rpl/r/rpl_stm_insert_delayed.result
index 5ca0ea2b780..70aeb733769 100644
--- a/mysql-test/suite/rpl/r/rpl_stm_insert_delayed.result
+++ b/mysql-test/suite/rpl/r/rpl_stm_insert_delayed.result
@@ -11,7 +11,7 @@ USE mysqlslap;
select @@global.binlog_format;
@@global.binlog_format
STATEMENT
-CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64));
+CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64)) ENGINE=MyISAM;
FLUSH TABLE t1;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -77,7 +77,7 @@ USE mysqlslap;
select @@global.binlog_format;
@@global.binlog_format
MIXED
-CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64));
+CREATE TABLE t1 (id INT primary key auto_increment, name VARCHAR(64)) ENGINE=MyISAM;
FLUSH TABLE t1;
SELECT COUNT(*) FROM t1;
COUNT(*)
diff --git a/mysql-test/suite/rpl/r/rpl_switch_stm_row_mixed.result b/mysql-test/suite/rpl/r/rpl_switch_stm_row_mixed.result
index 8ed9ff5dc2f..75eb3b09c57 100644
--- a/mysql-test/suite/rpl/r/rpl_switch_stm_row_mixed.result
+++ b/mysql-test/suite/rpl/r/rpl_switch_stm_row_mixed.result
@@ -135,7 +135,7 @@ execute stmt1 using @string;
deallocate prepare stmt1;
insert into t1 values(concat("for_23_",UUID()));
insert into t1 select "yesterday_24_";
-create table t2 select rpad(UUID(),100,' ');
+create table t2 ENGINE=MyISAM select rpad(UUID(),100,' ');
create table t3 select 1 union select UUID();
create table t4 select * from t1 where 3 in (select 1 union select 2 union select UUID() union select 3);
create table t5 select * from t1 where 3 in (select 1 union select 2 union select curdate() union select 3);
@@ -473,7 +473,7 @@ master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Query # # use `mysqltest1`; insert into t1 select "yesterday_24_"
master-bin.000001 # Query # # use `mysqltest1`; CREATE TABLE `t2` (
`rpad(UUID(),100,' ')` varchar(100) CHARACTER SET utf8 NOT NULL DEFAULT ''
-)
+) ENGINE=MyISAM
master-bin.000001 # Table_map # # table_id: # (mysqltest1.t2)
master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Query # # use `mysqltest1`; CREATE TABLE `t3` (
@@ -801,7 +801,7 @@ master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Query # # use `mysqltest1`; insert into t1 select "yesterday_24_"
master-bin.000001 # Query # # use `mysqltest1`; CREATE TABLE `t2` (
`rpad(UUID(),100,' ')` varchar(100) CHARACTER SET utf8 NOT NULL DEFAULT ''
-)
+) ENGINE=MyISAM
master-bin.000001 # Table_map # # table_id: # (mysqltest1.t2)
master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Query # # use `mysqltest1`; CREATE TABLE `t3` (
diff --git a/mysql-test/suite/rpl/t/rpl_innodb_bug28430.test b/mysql-test/suite/rpl/t/rpl_innodb_bug28430.test
index eb828f07415..a2e6a7a5f98 100644
--- a/mysql-test/suite/rpl/t/rpl_innodb_bug28430.test
+++ b/mysql-test/suite/rpl/t/rpl_innodb_bug28430.test
@@ -1,3 +1,4 @@
+--source include/big_test.inc
--source include/have_innodb.inc
--source include/have_partition.inc
--source include/have_binlog_format_mixed_or_row.inc
diff --git a/mysql-test/suite/rpl/t/rpl_insert.test b/mysql-test/suite/rpl/t/rpl_insert.test
index 0d471a0e0a9..77847dd24f2 100644
--- a/mysql-test/suite/rpl/t/rpl_insert.test
+++ b/mysql-test/suite/rpl/t/rpl_insert.test
@@ -11,7 +11,7 @@ CREATE SCHEMA IF NOT EXISTS mysqlslap;
USE mysqlslap;
--enable_warnings
-CREATE TABLE t1 (id INT, name VARCHAR(64));
+CREATE TABLE t1 (id INT, name VARCHAR(64)) ENGINE=MyISAM;
let $query = "INSERT DELAYED INTO t1 VALUES (1, 'Dr. No'), (2, 'From Russia With Love'), (3, 'Goldfinger'), (4, 'Thunderball'), (5, 'You Only Live Twice')";
--exec $MYSQL_SLAP --silent --concurrency=5 --iterations=200 --query=$query --delimiter=";"
diff --git a/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test b/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test
index bfa356fbfb4..c8a4d5d89a6 100644
--- a/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test
+++ b/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test
@@ -1,7 +1,7 @@
# depends on the binlog output
-- source include/have_binlog_format_row.inc
-let $rename_event_pos= 623;
+let $rename_event_pos= 651;
# Bug#18326: Do not lock table for writing during prepare of statement
# The use of the ps protocol causes extra table maps in the binlog, so
diff --git a/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test b/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test
index a8a33d05e8b..dad61002be4 100644
--- a/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test
+++ b/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test
@@ -1,7 +1,7 @@
# depends on the binlog output
--source include/have_binlog_format_mixed_or_statement.inc
-let $rename_event_pos= 656;
+let $rename_event_pos= 684;
-- source extra/rpl_tests/rpl_flsh_tbls.test
# End of 4.1 tests
diff --git a/mysql-test/suite/rpl/t/rpl_switch_stm_row_mixed.test b/mysql-test/suite/rpl/t/rpl_switch_stm_row_mixed.test
index 05dcb91ca28..67dbde5e89d 100644
--- a/mysql-test/suite/rpl/t/rpl_switch_stm_row_mixed.test
+++ b/mysql-test/suite/rpl/t/rpl_switch_stm_row_mixed.test
@@ -140,7 +140,7 @@ insert into t1 select "yesterday_24_";
# Test of CREATE TABLE SELECT
-create table t2 select rpad(UUID(),100,' ');
+create table t2 ENGINE=MyISAM select rpad(UUID(),100,' ');
create table t3 select 1 union select UUID();
create table t4 select * from t1 where 3 in (select 1 union select 2 union select UUID() union select 3);
create table t5 select * from t1 where 3 in (select 1 union select 2 union select curdate() union select 3);
diff --git a/mysql-test/t/alter_table.test b/mysql-test/t/alter_table.test
index 99c9ae23801..f6444d08af0 100644
--- a/mysql-test/t/alter_table.test
+++ b/mysql-test/t/alter_table.test
@@ -159,7 +159,7 @@ drop table t1;
# Test of ALTER TABLE DELAYED
#
-CREATE TABLE t1 (i int(10), index(i) );
+CREATE TABLE t1 (i int(10), index(i) ) ENGINE=MyISAM;
ALTER TABLE t1 DISABLE KEYS;
INSERT DELAYED INTO t1 VALUES(1),(2),(3);
ALTER TABLE t1 ENABLE KEYS;
diff --git a/mysql-test/t/binlog_unsafe.test b/mysql-test/t/binlog_unsafe.test
index f34c22dc5f7..209e16e0cc0 100644
--- a/mysql-test/t/binlog_unsafe.test
+++ b/mysql-test/t/binlog_unsafe.test
@@ -14,5 +14,4 @@ INSERT INTO t1 SELECT UUID();
query_vertical SHOW WARNINGS;
DROP TABLE t1,t2,t3;
-
-
+DROP VIEW v1;
diff --git a/mysql-test/t/delayed.test b/mysql-test/t/delayed.test
index ce57645bd4b..9c7e825bc30 100644
--- a/mysql-test/t/delayed.test
+++ b/mysql-test/t/delayed.test
@@ -5,6 +5,15 @@
# (Can't be tested with purify :( )
#
+# limit the test to engines which support INSERT DELAYED
+disable_query_log;
+--require r/true.require
+select @@global.storage_engine in
+("memory","myisam","archive","blackhole") and
+@@session.storage_engine in
+("memory","myisam","archive","blackhole") as `TRUE`;
+enable_query_log;
+
--disable_warnings
drop table if exists t1;
--enable_warnings
diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def
index 3f9ec52ca36..7c0c994bf9d 100644
--- a/mysql-test/t/disabled.def
+++ b/mysql-test/t/disabled.def
@@ -12,6 +12,7 @@
user_limits : Bug#23921 random failure of user_limits.test
concurrent_innodb : BUG#21579 2006-08-11 mleich innodb_concurrent random failures with varying differences
+
ctype_big5 : BUG#26711 2007-06-21 Lars Test has never worked on Double Whopper
federated_transactions : Bug#29523 Transactions do not work
diff --git a/mysql-test/t/events_logs_tests.test b/mysql-test/t/events_logs_tests.test
index 3240dccbc76..1c296ee302b 100644
--- a/mysql-test/t/events_logs_tests.test
+++ b/mysql-test/t/events_logs_tests.test
@@ -75,12 +75,10 @@ create event ev_log_general on schedule at now() on completion not preserve
--replace_column 1 USER_HOST
select user_host, db, sql_text from mysql.slow_log
where sql_text like 'select \'events_logs_test\'%';
-
drop database events_test;
set global event_scheduler=off;
set @@global.long_query_time=default;
set @@session.long_query_time=default;
-
#
# Safety
#
diff --git a/mysql-test/t/maria-big.test b/mysql-test/t/maria-big.test
new file mode 100644
index 00000000000..168bf800ea6
--- /dev/null
+++ b/mysql-test/t/maria-big.test
@@ -0,0 +1,28 @@
+# Test of scenarios potentially too big for --valgrind or --mem
+-- source include/have_maria.inc
+enable_info;
+set storage_engine=maria;
+set global maria_log_file_size=4294967295;
+disable_warnings;
+drop table if exists t1, t2;
+enable_warnings;
+create table t1(a char(3));
+insert into t1 values("abc");
+insert into t1 select "def" from t1;
+insert into t1 select "ghi" from t1;
+insert into t1 select "jkl" from t1;
+insert into t1 select "mno" from t1;
+insert into t1 select "pqr" from t1;
+insert into t1 select "stu" from t1;
+insert into t1 select "vwx" from t1;
+insert into t1 select "yza" from t1;
+insert into t1 select "ceg" from t1;
+insert into t1 select "ikm" from t1;
+insert into t1 select "oqs" from t1;
+select count(*) from t1;
+insert into t1 select "uwy" from t1;
+create table t2 select * from t1;
+select count(*) from t1;
+select count(*) from t2;
+drop table t1, t2;
+disable_info;
diff --git a/mysql-test/t/maria-connect.test b/mysql-test/t/maria-connect.test
new file mode 100644
index 00000000000..35a1518ca5a
--- /dev/null
+++ b/mysql-test/t/maria-connect.test
@@ -0,0 +1,42 @@
+#
+# Test that can't be run with --extern
+#
+
+-- source include/have_maria.inc
+-- source include/have_log_bin.inc
+
+let $default=`select @@global.storage_engine`;
+set global storage_engine=maria;
+set session storage_engine=maria;
+
+set global maria_log_file_size=4294967295;
+
+# Initialise
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+SET SQL_WARNINGS=1;
+
+#
+# UNIQUE key test
+#
+# as long as maria cannot rollback, binlog should contain both inserts
+#
+RESET MASTER;
+set binlog_format=statement;
+CREATE TABLE t1 (a int primary key);
+insert t1 values (1),(2),(3);
+--error ER_DUP_ENTRY
+insert t1 values (4),(2),(5);
+select * from t1;
+SHOW BINLOG EVENTS FROM 106;
+drop table t1;
+set binlog_format=default;
+
+# End of 5.2 tests
+
+--disable_result_log
+--disable_query_log
+eval set global storage_engine=$default;
+--enable_result_log
+--enable_query_log
diff --git a/mysql-test/t/maria-purge.test b/mysql-test/t/maria-purge.test
new file mode 100644
index 00000000000..6b815b80050
--- /dev/null
+++ b/mysql-test/t/maria-purge.test
@@ -0,0 +1,104 @@
+-- source include/have_maria.inc
+-- source include/big_test.inc
+let $default=`select @@global.storage_engine`;
+set global storage_engine=maria;
+set session storage_engine=maria;
+let $def_logsize=`select @@global.maria_log_file_size`;
+let $def_checkinterval=`select @@global.maria_checkpoint_interval`;
+
+set global maria_log_file_size=4294967296;
+# Initialise
+--disable_warnings
+drop table if exists t1,t2;
+--enable_warnings
+SET SQL_WARNINGS=1;
+
+CREATE TABLE t1 (
+ STRING_DATA char(255) default NULL
+);
+CREATE TABLE t2 (
+ STRING_DATA char(255) default NULL
+);
+
+
+INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA');
+INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD');
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+
+set global maria_log_file_size=16777216;
+eval set global maria_checkpoint_interval=$def_checkinterval;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+
+insert into t2 select * from t1;
+insert into t1 select * from t2;
+
+
+
+eval set global maria_checkpoint_interval=$def_checkinterval;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+set global maria_log_file_size=16777216;
+select @@global.maria_log_file_size;
+eval set global maria_checkpoint_interval=$def_checkinterval;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+set global maria_log_file_size=8388608;
+select @@global.maria_log_file_size;
+
+set global maria_log_purge_type=at_flush;
+insert into t1 select * from t2;
+eval set global maria_checkpoint_interval=$def_checkinterval;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+flush logs;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+
+set global maria_log_file_size=16777216;
+set global maria_log_purge_type=external;
+insert into t1 select * from t2;
+eval set global maria_checkpoint_interval=$def_checkinterval;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+flush logs;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+
+set global maria_log_purge_type=immediate;
+insert into t1 select * from t2;
+eval set global maria_checkpoint_interval=$def_checkinterval;
+--replace_regex /Size +[0-9]+ ; .+master-data/master-data/
+SHOW ENGINE maria logs;
+
+drop table t1, t2;
+
+--disable_result_log
+--disable_query_log
+set global maria_log_purge_type=immediate;
+eval set global storage_engine=$default;
+eval set global maria_log_file_size=$def_logsize;
+eval set global maria_checkpoint_interval=$def_checkinterval;
+--enable_result_log
+--enable_query_log
diff --git a/mysql-test/t/maria-recovery-bitmap-master.opt b/mysql-test/t/maria-recovery-bitmap-master.opt
new file mode 100644
index 00000000000..a745693594e
--- /dev/null
+++ b/mysql-test/t/maria-recovery-bitmap-master.opt
@@ -0,0 +1,2 @@
+--skip-stack-trace --skip-core-file
+
diff --git a/mysql-test/t/maria-recovery-bitmap.test b/mysql-test/t/maria-recovery-bitmap.test
new file mode 100644
index 00000000000..28d122ed6f7
--- /dev/null
+++ b/mysql-test/t/maria-recovery-bitmap.test
@@ -0,0 +1,79 @@
+# Tests of Maria's recovery of the bitmap pages
+
+--source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur as we crash
+--source include/not_valgrind.inc
+# Binary must be compiled with debug for crash to occur
+--source include/have_debug.inc
+--source include/have_maria.inc
+
+--disable_warnings
+drop database if exists mysqltest;
+--enable_warnings
+create database mysqltest;
+
+# Include scripts can perform SQL. For it to not influence the main test
+# they use a separate connection. This way if they use a DDL it would
+# not autocommit in the main test.
+connect (admin, 127.0.0.1, root,,mysqltest,,);
+--enable_reconnect
+
+connection default;
+use mysqltest;
+--enable_reconnect
+
+-- source include/maria_empty_logs.inc
+let $mms_tables=1;
+create table t1 (a varchar(10000)) engine=maria;
+
+# we want recovery to use the tables as they were at time of crash
+let $mvr_restore_old_snapshot=0;
+# UNDO phase prevents physical comparison, normally,
+# so we'll only use checksums to compare.
+let $mms_compare_physically=0;
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+
+--echo * TEST of over-allocated bitmap not flushed by checkpoint
+let $mvr_debug_option="+d,maria_crash";
+insert into t1 values ("bbbbbbb");
+-- source include/maria_make_snapshot_for_comparison.inc
+# make_snapshot_for_comparison closed the table, which lost its id.
+# So we make a null operation just to give a short id to the table so
+# that checkpoint includes table in checkpoint (otherwise nothing to
+# test).
+insert into t1 values ("bbbbbbb");
+delete from t1 limit 1;
+set session debug="+d,info,enter,exit,maria_over_alloc_bitmap";
+send insert into t1 values ("aaaaaaaaa");
+connection admin;
+# Leave time for INSERT to block after modifying bitmap;
+# in the future we should not use sleep but something like
+# debug_sync_point().
+sleep 5;
+# force a checkpoint, which could, if buggy, flush over-allocated
+# bitmap page; as REDO-UNDO was not written, bitmap and data page
+# would be inconsistent. Correct checkpoint will wait until UNDO is
+# written.
+set global maria_checkpoint_interval=1;
+-- source include/maria_verify_recovery.inc
+
+# disabled until pagecache callback framework is coded at which point
+# we can add a get_lsn() callback for bitmaps, fixing the below bug.
+if (0)
+{
+--echo * TEST of bitmap flushed without REDO-UNDO in the log (WAL violation)
+# before crashing we'll flush the bitmap page
+let $mvr_debug_option="+d,maria_flush_bitmap,maria_crash";
+-- source include/maria_make_snapshot_for_comparison.inc
+lock tables t1 write;
+insert into t1 values (REPEAT('a', 6000));
+# bitmap of after-INSERT will be on disk, but data pages will not; if
+# log is not flushed the bitmap is inconsistent with the data.
+-- source include/maria_verify_recovery.inc
+drop table t1;
+}
+
+# clean up everything
+let $mms_purpose=comparison;
+eval drop database mysqltest_for_$mms_purpose;
+drop database mysqltest;
diff --git a/mysql-test/t/maria-recovery-master.opt b/mysql-test/t/maria-recovery-master.opt
new file mode 100644
index 00000000000..a745693594e
--- /dev/null
+++ b/mysql-test/t/maria-recovery-master.opt
@@ -0,0 +1,2 @@
+--skip-stack-trace --skip-core-file
+
diff --git a/mysql-test/t/maria-recovery.test b/mysql-test/t/maria-recovery.test
new file mode 100644
index 00000000000..5a33a196434
--- /dev/null
+++ b/mysql-test/t/maria-recovery.test
@@ -0,0 +1,187 @@
+--source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur as we crash
+--source include/not_valgrind.inc
+# Binary must be compiled with debug for crash to occur
+--source include/have_debug.inc
+--source include/have_maria.inc
+
+set global maria_log_file_size=4294967295;
+
+--disable_warnings
+drop database if exists mysqltest;
+--enable_warnings
+create database mysqltest;
+
+# Include scripts can perform SQL. For it to not influence the main test
+# they use a separate connection. This way if they use a DDL it would
+# not autocommit in the main test.
+connect (admin, 127.0.0.1, root,,mysqltest,,);
+--enable_reconnect
+
+connection default;
+use mysqltest;
+--enable_reconnect
+
+# A sample test
+-- source include/maria_empty_logs.inc
+let $mms_tables=1;
+create table t1 (a varchar(1000)) engine=maria;
+
+--echo * TEST of REDO: see if recovery can reconstruct if we give it an old table
+
+-- source include/maria_make_snapshot_for_feeding_recovery.inc
+# Your committed statements here, which we expect to
+# be reconstructed from the log
+insert into t1 values ("00000000");
+-- source include/maria_make_snapshot_for_comparison.inc
+# we want recovery to run on the first snapshot made above
+let $mvr_restore_old_snapshot=1;
+# As we did only committed work, we test REDO applying, which could
+# produce a physically identical table.
+let $mms_compare_physically=1;
+let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+# the script below will trigger recovery and compare checksums
+-- source include/maria_verify_recovery.inc
+let $mms_compare_physically=0;
+# so a SELECT like this is pure visual effect, brings nothing.
+select * from t1;
+
+--echo * TEST of REDO+UNDO: normal recovery test (no moving tables under its feet)
+
+# different types of crash => a loop; here are loop control variables
+let $crash_no_flush=1;
+let $crash_flush_whole_page_cache=0;
+let $crash_flush_states=0;
+let $crash_flush_whole_log=0;
+let $crash_loop=1;
+
+# we want recovery to use the tables as they were at time of crash
+let $mvr_restore_old_snapshot=0;
+# UNDO phase prevents physical comparison, normally,
+# so we'll only use checksums to compare.
+let $mms_compare_physically=0;
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+
+# Note that we don't remove logs between iterations. Test is
+# cumulative (each new recovery processes more log records than the previous).
+
+while ($crash_loop)
+{
+ if ($crash_flush_whole_log)
+ {
+ let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
+ # set up what next iteration should do:
+ let $crash_flush_whole_log=0;
+ let $crash_loop=0;
+ }
+ if ($crash_flush_states)
+ {
+ let $mvr_debug_option="+d,maria_flush_states,maria_flush_whole_log,maria_crash";
+ let $crash_flush_states=0;
+ let $crash_flush_whole_log=1;
+ }
+ if ($crash_flush_whole_page_cache)
+ {
+ let $mvr_debug_option="+d,maria_flush_whole_page_cache,maria_crash";
+ let $crash_flush_whole_page_cache=0;
+ let $crash_flush_states=1;
+ }
+ if ($crash_no_flush)
+ {
+ let $mvr_debug_option="+d,maria_crash";
+ let $crash_no_flush=0;
+ let $crash_flush_whole_page_cache=1;
+ }
+ # Your committed statements here
+ insert into t1 values ("00000000");
+ -- source include/maria_make_snapshot_for_comparison.inc
+ # Your statements which we expect to be rolled back
+ lock tables t1 write;
+ insert into t1 values ("aaaaaaaaa");
+ -- source include/maria_verify_recovery.inc
+ select * from t1;
+}
+
+drop table t1;
+
+# what did we compare above:
+# - checksum: tells that the tables contain the same amount of rows
+# and same data in rows
+# - index: no, neither state nor pages were compared
+# - bitmap pages: the REPAIR QUICK done above very probably checks
+# that bitmap reflects page occupation; do we need to do physical
+# compare?
+# - page LSN: not compared; we should compare that page's LSN in new
+# table is >= page's LSN in old table (it can be >, due to UNDO phase)
+# we had a bug where new page's LSN was 0... todo.
+
+#
+# Test for this bug: an UPDATE purges and rewrites a tail page, and
+# recovery applied the purge, stamped page with UNDO's LSN, thus
+# the rewrite was ignored.
+#
+
+--echo * TEST of two REDOs for same page in one REDO group
+-- source include/maria_empty_logs.inc
+let $mms_tables=1;
+CREATE TABLE t1 (
+ i int,
+ b blob default NULL,
+ c varchar(6000) default NULL
+) ENGINE=MARIA CHECKSUM=1;
+-- source include/maria_make_snapshot_for_feeding_recovery.inc
+INSERT INTO t1 VALUES (1, REPEAT('a', 5000), REPEAT('b', 5000));
+UPDATE t1 SET i=3, b=CONCAT(b,'c') WHERE i=1;
+SELECT LENGTH(b) FROM t1 WHERE i=3;
+-- source include/maria_make_snapshot_for_comparison.inc
+# we want recovery to run on the first snapshot made above
+let $mvr_restore_old_snapshot=1;
+let $mms_compare_physically=0;
+let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+-- source include/maria_verify_recovery.inc
+SELECT LENGTH(b) FROM t1 WHERE i=3;
+drop table t1;
+
+# Test that INSERT's effect on auto-increment is recovered
+--echo * TEST of INSERT vs state.auto_increment
+-- source include/maria_empty_logs.inc
+let $mms_tables=1;
+CREATE TABLE t1 (
+ i int auto_increment primary key,
+ c varchar(6),
+ key(c)
+) ENGINE=MARIA;
+insert into t1 values(null,"b");
+-- source include/maria_make_snapshot_for_feeding_recovery.inc
+insert into t1 values(null,"a"), (null,"c"), (null,"d");
+# With this DELETE we also verify that Recovery cares only about INSERTs
+delete from t1 where c="d";
+-- source include/maria_make_snapshot_for_comparison.inc
+let $mvr_restore_old_snapshot=1;
+let $mms_compare_physically=0;
+let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+-- source include/maria_verify_recovery.inc
+show create table t1;
+
+# Test that UPDATE's effect on auto-increment is recovered
+--echo * TEST of UPDATE vs state.auto_increment
+-- source include/maria_make_snapshot_for_feeding_recovery.inc
+update t1 set i=15 where c="a";
+-- source include/maria_make_snapshot_for_comparison.inc
+let $mvr_restore_old_snapshot=1;
+let $mms_compare_physically=0;
+let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+-- source include/maria_verify_recovery.inc
+show create table t1;
+drop table t1;
+
+# clean up everything
+let $mms_purpose=feeding_recovery;
+eval drop database mysqltest_for_$mms_purpose;
+let $mms_purpose=comparison;
+eval drop database mysqltest_for_$mms_purpose;
+drop database mysqltest;
diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test
new file mode 100644
index 00000000000..e0195adfdb3
--- /dev/null
+++ b/mysql-test/t/maria.test
@@ -0,0 +1,1334 @@
+#
+# Testing of potential probelms in Maria
+# This code was initially taken from myisam.test
+#
+
+-- source include/have_maria.inc
+
+let $default=`select @@global.storage_engine`;
+set global storage_engine=maria;
+set session storage_engine=maria;
+
+set global maria_log_file_size=4294967295;
+
+# Initialise
+--disable_warnings
+drop table if exists t1,t2;
+--enable_warnings
+SET SQL_WARNINGS=1;
+
+#
+# Test problem with CHECK TABLE;
+#
+
+CREATE TABLE t1 (
+ STRING_DATA char(255) default NULL,
+ KEY string_data (STRING_DATA)
+);
+
+INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA');
+INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD');
+INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF');
+INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG');
+INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH');
+INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW');
+CHECK TABLE t1;
+drop table t1;
+
+#
+# Test problem with rows that are 65517-65520 bytes long
+#
+
+create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a));
+
+let $1=100;
+disable_query_log;
+--disable_warnings
+SET SQL_WARNINGS=0;
+while ($1)
+{
+ eval insert into t1 (b) values(repeat(char(65+$1),65550-$1));
+ dec $1;
+}
+SET SQL_WARNINGS=1;
+--enable_warnings
+--enable_query_log
+check table t1;
+repair table t1;
+delete from t1 where (a & 1);
+check table t1;
+repair table t1;
+check table t1;
+
+# FLUSH + REPAIR used to cause assertion failure in page cache
+flush table t1;
+repair table t1;
+drop table t1;
+
+#
+# Test bug: Two optimize in a row reset index cardinality
+#
+
+create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b));
+insert into t1 (b) values (1),(2),(2),(2),(2);
+optimize table t1;
+show index from t1;
+optimize table t1;
+show index from t1;
+drop table t1;
+
+#
+# Test of how ORDER BY works when doing it on the whole table
+#
+
+create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b));
+insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4);
+explain select * from t1 order by a;
+explain select * from t1 order by b;
+explain select * from t1 order by c;
+explain select a from t1 order by a;
+explain select b from t1 order by b;
+explain select a,b from t1 order by b;
+explain select a,b from t1;
+explain select a,b,c from t1;
+drop table t1;
+
+#
+# Test of OPTIMIZE of locked and modified tables
+#
+set autocommit=0;
+begin;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (2), (3);
+LOCK TABLES t1 WRITE;
+INSERT INTO t1 VALUES (1), (2), (3);
+commit;
+set autocommit=1;
+UNLOCK TABLES;
+OPTIMIZE TABLE t1;
+DROP TABLE t1;
+
+#
+# Test of optimize, when only mi_sort_index (but not mi_repair*) is done
+# in ha_maria::repair, and index size is changed (decreased).
+#
+
+create table t1 ( t1 char(255), key(t1(250)));
+insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169');
+insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203');
+insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767');
+insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907');
+insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011');
+insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101');
+insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564');
+insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002');
+insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834');
+insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016');
+insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899');
+insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482');
+insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139');
+insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477');
+insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755');
+insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188');
+insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454');
+insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656');
+insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741');
+insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178');
+insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049');
+insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635');
+insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573');
+insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878');
+insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077');
+delete from t1 where t1>'2';
+insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'),
+('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'),
+('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47');
+optimize table t1;
+check table t1;
+drop table t1;
+
+#
+# test of maria with huge number of packed fields
+#
+
+create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8
+int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17
+int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int,
+i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34
+int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int,
+i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51
+int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int,
+i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68
+int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int,
+i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85
+int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int,
+i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102
+int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110
+int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118
+int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126
+int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134
+int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142
+int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150
+int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158
+int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166
+int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174
+int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182
+int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190
+int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198
+int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206
+int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214
+int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222
+int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230
+int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238
+int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246
+int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254
+int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262
+int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270
+int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278
+int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286
+int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294
+int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302
+int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310
+int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318
+int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326
+int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334
+int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342
+int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350
+int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358
+int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366
+int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374
+int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382
+int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390
+int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398
+int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406
+int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414
+int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422
+int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430
+int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438
+int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446
+int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454
+int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462
+int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470
+int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478
+int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486
+int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494
+int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502
+int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510
+int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518
+int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526
+int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534
+int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542
+int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550
+int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558
+int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566
+int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574
+int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582
+int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590
+int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598
+int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606
+int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614
+int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622
+int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630
+int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638
+int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646
+int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654
+int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662
+int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670
+int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678
+int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686
+int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694
+int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702
+int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710
+int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718
+int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726
+int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734
+int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742
+int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750
+int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758
+int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766
+int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774
+int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782
+int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790
+int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798
+int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806
+int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814
+int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822
+int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830
+int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838
+int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846
+int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854
+int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862
+int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870
+int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878
+int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886
+int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894
+int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902
+int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910
+int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918
+int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926
+int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934
+int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942
+int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950
+int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958
+int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966
+int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974
+int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982
+int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990
+int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998
+int, i999 int, i1000 int, b blob) row_format=dynamic;
+insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei");
+update t1 set b=repeat('a',256);
+update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0;
+check table t1;
+delete from t1 where i8=1;
+select i1,i2 from t1;
+check table t1;
+drop table t1;
+
+#
+# Test of REPAIR that once failed
+#
+CREATE TABLE `t1` (
+ `post_id` mediumint(8) unsigned NOT NULL auto_increment,
+ `topic_id` mediumint(8) unsigned NOT NULL default '0',
+ `post_time` datetime NOT NULL default '0000-00-00 00:00:00',
+ `post_text` text NOT NULL,
+ `icon_url` varchar(10) NOT NULL default '',
+ `sign` tinyint(1) unsigned NOT NULL default '0',
+ `post_edit` varchar(150) NOT NULL default '',
+ `poster_login` varchar(35) NOT NULL default '',
+ `ip` varchar(15) NOT NULL default '',
+ PRIMARY KEY (`post_id`),
+ KEY `post_time` (`post_time`),
+ KEY `ip` (`ip`),
+ KEY `poster_login` (`poster_login`),
+ KEY `topic_id` (`topic_id`),
+ FULLTEXT KEY `post_text` (`post_text`)
+) TRANSACTIONAL=0;
+
+INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test');
+
+REPAIR TABLE t1;
+CHECK TABLE t1;
+drop table t1;
+
+#
+# Test of creating table with too long key
+#
+
+--error 1071
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e));
+--error 1071
+CREATE TABLE t1 (a varchar(32000), unique key(a));
+--error 1070
+CREATE TABLE t1 (a varchar(1), b varchar(1), key (a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b));
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255));
+--error 1071
+ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e);
+DROP TABLE t1;
+
+#
+# Test of cardinality of keys with NULL
+#
+
+CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a));
+INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4);
+create table t2 (a int not null, b int, c int, key(b), key(c), key(a));
+INSERT into t2 values (1,1,1), (2,2,2);
+optimize table t1;
+show index from t1;
+explain select * from t1,t2 where t1.a=t2.a;
+explain select * from t1,t2 force index(a) where t1.a=t2.a;
+explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a;
+explain select * from t1,t2 where t1.b=t2.b;
+explain select * from t1,t2 force index(c) where t1.a=t2.a;
+explain select * from t1 where a=0 or a=2;
+explain select * from t1 force index (a) where a=0 or a=2;
+explain select * from t1 where c=1;
+explain select * from t1 use index() where c=1;
+drop table t1,t2;
+
+#
+# Test bug when updating a split dynamic row where keys are not changed
+#
+
+create table t1 (a int not null auto_increment primary key, b varchar(255));
+insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100));
+update t1 set b=repeat(left(b,1),200) where a=1;
+delete from t1 where (a & 1)= 0;
+update t1 set b=repeat('e',200) where a=1;
+flush tables;
+check table t1;
+
+#
+# check updating with keys
+#
+
+disable_query_log;
+let $1 = 100;
+while ($1)
+{
+ eval insert into t1 (b) values (repeat(char(($1 & 32)+65), $1));
+ dec $1;
+}
+enable_query_log;
+update t1 set b=repeat(left(b,1),255) where a between 1 and 5;
+update t1 set b=repeat(left(b,1),10) where a between 32 and 43;
+update t1 set b=repeat(left(b,1),2) where a between 64 and 66;
+update t1 set b=repeat(left(b,1),65) where a between 67 and 70;
+check table t1;
+insert into t1 (b) values (repeat('z',100));
+update t1 set b="test" where left(b,1) > 'n';
+check table t1;
+drop table t1;
+
+#
+# Test space-stripping features
+#
+create table t1 ( a text not null, key a (a(20)));
+insert into t1 values ('aaa '),('aaa'),('aa');
+check table t1;
+repair table t1;
+select concat(a,'.') from t1 where a='aaa';
+select concat(a,'.') from t1 where binary a='aaa';
+update t1 set a='bbb' where a='aaa';
+select concat(a,'.') from t1;
+drop table t1;
+
+#
+# Test again but with dynamic format
+#
+create table t1 ( a text not null, key a (a(20))) row_format=dynamic;
+insert into t1 values ('aaa '),('aaa'),('aa');
+check table t1;
+repair table t1;
+select concat(a,'.') from t1 where a='aaa';
+select concat(a,'.') from t1 where binary a='aaa';
+update t1 set a='bbb' where a='aaa';
+select concat(a,'.') from t1;
+drop table t1;
+
+#
+# More space testing
+#
+
+create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10)));
+insert into t1 values('807780', '477', '165');
+insert into t1 values('807780', '477', '162');
+insert into t1 values('807780', '472', '162');
+select * from t1 where a='807780' and b='477' and c='165';
+drop table t1;
+
+#
+# Space-stripping in prefix_search
+#
+
+CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a));
+INSERT t1 VALUES ("can \tcan");
+INSERT t1 VALUES ("can can");
+INSERT t1 VALUES ("can");
+SELECT * FROM t1;
+CHECK TABLE t1;
+DROP TABLE t1;
+
+#
+# Verify blob handling
+#
+
+create table t1 (a blob);
+insert into t1 values('a '),('a');
+select concat(a,'.') from t1 where a='a';
+select concat(a,'.') from t1 where a='a ';
+alter table t1 add key(a(2));
+select concat(a,'.') from t1 where a='a';
+select concat(a,'.') from t1 where a='a ';
+drop table t1;
+
+#
+# Test text and unique
+#
+create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20)));
+insert into t1 (b) values ('a'),('b'),('c');
+select concat(b,'.') from t1;
+update t1 set b='b ' where a=2;
+--error ER_DUP_ENTRY
+update t1 set b='b ' where a > 1;
+--error ER_DUP_ENTRY
+insert into t1 (b) values ('b');
+select * from t1;
+delete from t1 where b='b';
+select a,concat(b,'.') from t1;
+drop table t1;
+
+#
+# Test keys with 0 segments
+#
+create table t1 (a int not null);
+create table t2 (a int not null, primary key (a));
+insert into t1 values (1);
+insert into t2 values (1),(2);
+select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+select distinct t1.a from t1,t2 order by t2.a;
+select sql_big_result distinct t1.a from t1,t2;
+explain select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+explain select distinct t1.a from t1,t2 order by t2.a;
+drop table t1,t2;
+
+#
+# Test freshly imported table and LIMIT
+#
+create table t1 (
+ c1 varchar(32),
+ key (c1)
+);
+alter table t1 disable keys;
+insert into t1 values ('a'), ('b');
+select c1 from t1 order by c1 limit 1;
+drop table t1;
+
+#
+# Test join that could miss concurrently inserted row
+# Note that for the moment Maria only supports multiple writers if we have
+# static or dynamic row format
+#
+# Partial key.
+create table t1 (a int not null, primary key(a)) ROW_FORMAT=FIXED;
+create table t2 (a int not null, b int not null, primary key(a,b)) ROW_FORMAT=FIXED;
+insert into t1 values (1),(2),(3),(4),(5),(6);
+insert into t2 values (1,1),(2,1);
+set autocommit=0;
+begin;
+lock tables t1 read local, t2 read local;
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+connect (root,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
+insert into t2 values(2,0);
+commit;
+disconnect root;
+connection default;
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+drop table t1,t2;
+#
+# Full key.
+CREATE TABLE t1 (c1 varchar(250) NOT NULL) ROW_FORMAT=DYNAMIC;
+CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)) ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003');
+INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004');
+LOCK TABLES t1 READ LOCAL, t2 READ LOCAL;
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+ WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+connect (con1,localhost,root,,);
+connection con1;
+INSERT INTO t2 VALUES ('test000001'), ('test000005');
+disconnect con1;
+connection default;
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+ WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+DROP TABLE t1,t2;
+
+#
+# Test RTREE index
+#
+--error 1235, 1289
+CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`));
+# INSERT INTO t1 VALUES (1,1),(1,1);
+# DELETE FROM rt WHERE a<1;
+# DROP TABLE IF EXISTS t1;
+
+create table t1 (a int, b varchar(200), c text not null) checksum=1;
+create table t2 (a int, b varchar(200), c text not null) checksum=0;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+checksum table t1, t2, t3 quick;
+checksum table t1, t2, t3;
+checksum table t1, t2, t3 extended;
+#show table status;
+drop table t1,t2;
+
+create table t1 (a int, key (a));
+show keys from t1;
+alter table t1 disable keys;
+show keys from t1;
+create table t2 (a int);
+let $i=1000;
+set @@rand_seed1=31415926,@@rand_seed2=2718281828;
+--disable_query_log
+while ($i)
+{
+ dec $i;
+ insert t2 values (rand()*100000);
+}
+--enable_query_log
+insert t1 select * from t2;
+show keys from t1;
+alter table t1 enable keys;
+show keys from t1;
+alter table t1 engine=heap;
+alter table t1 disable keys;
+show keys from t1;
+drop table t1,t2;
+
+#
+# Index search for NULL in blob
+#
+create table t1 ( a tinytext, b char(1), index idx (a(1),b) );
+insert into t1 values (null,''), (null,'');
+explain select count(*) from t1 where a is null;
+select count(*) from t1 where a is null;
+drop table t1;
+
+#
+# Test corruption Can't open file: 'table.MYI' (errno: 145)
+#
+create table t1 (c1 int, c2 varchar(4) not null default '',
+ key(c2(3))) default charset=utf8;
+insert into t1 values (1,'A'), (2, 'B'), (3, 'A');
+update t1 set c2='A B' where c1=2;
+check table t1;
+drop table t1;
+
+#
+# Test CHECKSUM TABLE
+#
+
+create table t1 (c1 int);
+insert into t1 values (1),(2),(3),(4);
+checksum table t1;
+delete from t1 where c1 = 1;
+create table t2 as select * from t1;
+# The following returns 0 with the bug in place.
+checksum table t1;
+# The above should give the same number as the following.
+checksum table t2;
+drop table t1, t2;
+
+CREATE TABLE t1 (
+ twenty int(4),
+ hundred int(4) NOT NULL
+) CHECKSUM=1;
+INSERT INTO t1 VALUES (11,91);
+check table t1 extended;
+checksum table t1;
+checksum table t1 extended;
+alter table t1 row_format=fixed;
+checksum table t1;
+alter table t1 row_format=dynamic;
+checksum table t1;
+alter table t1 engine=myisam;
+checksum table t1;
+drop table t1;
+
+#
+# maria_stats_method variable.
+#
+
+show variables like 'maria_stats_method';
+
+create table t1 (a int, key(a));
+insert into t1 values (0),(1),(2),(3),(4);
+insert into t1 select NULL from t1;
+
+# default: NULLs considered inequal
+analyze table t1;
+show index from t1;
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+show index from t1;
+
+# Set nulls to be equal:
+set maria_stats_method=nulls_equal;
+show variables like 'maria_stats_method';
+insert into t1 values (11);
+delete from t1 where a=11;
+
+analyze table t1;
+show index from t1;
+
+insert into t1 values (11);
+delete from t1 where a=11;
+
+check table t1;
+show index from t1;
+
+# Set nulls back to be equal
+set maria_stats_method=DEFAULT;
+show variables like 'maria_stats_method';
+insert into t1 values (11);
+delete from t1 where a=11;
+
+analyze table t1;
+show index from t1;
+
+insert into t1 values (11);
+delete from t1 where a=11;
+
+check table t1;
+show index from t1;
+
+drop table t1;
+
+# WL#2609, CSC#XXXX: MARIA
+set maria_stats_method=nulls_ignored;
+show variables like 'maria_stats_method';
+
+create table t1 (
+ a char(3), b char(4), c char(5), d char(6),
+ key(a,b,c,d)
+);
+insert into t1 values ('bcd','def1', NULL, 'zz');
+insert into t1 values ('bcd','def2', NULL, 'zz');
+insert into t1 values ('bce','def1', 'yuu', NULL);
+insert into t1 values ('bce','def2', NULL, 'quux');
+analyze table t1;
+show index from t1;
+delete from t1;
+# This will give you different messages depending on if we are using
+# row base or stmt based replication as stmt base replication will use
+# truncate and row based will delete things row by row.
+--replace_result "Table is already up to date" "OK"
+analyze table t1;
+show index from t1;
+
+set maria_stats_method=DEFAULT;
+
+drop table t1;
+
+#
+# Test key value packing for TINYBLOBs
+#
+
+create table t1(
+ cip INT NOT NULL,
+ time TIME NOT NULL,
+ score INT NOT NULL DEFAULT 0,
+ bob TINYBLOB
+);
+
+insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03');
+insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'),
+ (6, 'c', '00:06');
+select * from t1 where bob is null and cip=1;
+create index bug on t1 (bob(22), cip, time);
+select * from t1 where bob is null and cip=1;
+drop table t1;
+
+#
+# Test COUNT(*) table with different INDEX
+#
+
+create table t1 (
+ id1 int not null auto_increment,
+ id2 int not null default '0',
+ t text not null,
+ primary key (id1),
+ key x (id2, t(32))
+) engine=maria; # engine clause is redundant but it's to test its parsing
+insert into t1 (id2, t) values
+(10, 'abc'), (10, 'abc'), (10, 'abc'),
+(20, 'abc'), (20, 'abc'), (20, 'def'),
+(10, 'abc'), (10, 'abc');
+select count(*) from t1 where id2 = 10;
+select count(id1) from t1 where id2 = 10;
+drop table t1;
+
+#
+# Test MIN and MAX functions in queries
+#
+
+CREATE TABLE t1(a TINYINT, KEY(a));
+INSERT INTO t1 VALUES(1);
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+ALTER TABLE t1 DISABLE KEYS;
+SELECT MAX(a) FROM t1;
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+DROP TABLE t1;
+
+#
+# Test update of table joined to self
+#
+CREATE TABLE t1(a CHAR(9), b VARCHAR(7));
+INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx');
+UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb';
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# OPTIMIZE TABLE with multiple threads
+#
+SET @@maria_repair_threads=2;
+SHOW VARIABLES LIKE 'maria_repair%';
+#
+# Test OPTIMIZE. This creates a new data file.
+CREATE TABLE t1 (
+ `_id` int(11) NOT NULL default '0',
+ `url` text,
+ `email` text,
+ `description` text,
+ `loverlap` int(11) default NULL,
+ `roverlap` int(11) default NULL,
+ `lneighbor_id` int(11) default NULL,
+ `rneighbor_id` int(11) default NULL,
+ `length_` int(11) default NULL,
+ `sequence` mediumtext,
+ `name` text,
+ `_obj_class` text NOT NULL,
+ PRIMARY KEY (`_id`),
+ UNIQUE KEY `sequence_name_index` (`name`(50)),
+ KEY (`length_`)
+) DEFAULT CHARSET=latin1;
+#
+INSERT INTO t1 VALUES
+ (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+ (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+ (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+ (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+ (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+ (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+ (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+ (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+ (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+#
+SELECT _id FROM t1;
+DELETE FROM t1 WHERE _id < 8;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+CHECK TABLE t1 EXTENDED;
+OPTIMIZE TABLE t1;
+CHECK TABLE t1 EXTENDED;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+SELECT _id FROM t1;
+DROP TABLE t1;
+#
+# Test REPAIR QUICK. This retains the old data file.
+CREATE TABLE t1 (
+ `_id` int(11) NOT NULL default '0',
+ `url` text,
+ `email` text,
+ `description` text,
+ `loverlap` int(11) default NULL,
+ `roverlap` int(11) default NULL,
+ `lneighbor_id` int(11) default NULL,
+ `rneighbor_id` int(11) default NULL,
+ `length_` int(11) default NULL,
+ `sequence` mediumtext,
+ `name` text,
+ `_obj_class` text NOT NULL,
+ PRIMARY KEY (`_id`),
+ UNIQUE KEY `sequence_name_index` (`name`(50)),
+ KEY (`length_`)
+) DEFAULT CHARSET=latin1;
+#
+INSERT INTO t1 VALUES
+ (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+ (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+ (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+ (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+ (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+ (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+ (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+ (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+ (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+#
+SELECT _id FROM t1;
+DELETE FROM t1 WHERE _id < 8;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+CHECK TABLE t1 EXTENDED;
+REPAIR TABLE t1 QUICK;
+CHECK TABLE t1 EXTENDED;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+SELECT _id FROM t1;
+DROP TABLE t1;
+#
+SET @@maria_repair_threads=1;
+SHOW VARIABLES LIKE 'maria_repair%';
+
+#
+# Test varchar
+#
+
+source include/varchar.inc;
+
+#
+# Some errors/warnings on create
+#
+
+create table t1 (v varchar(65530), key(v));
+drop table if exists t1;
+create table t1 (v varchar(65536));
+show create table t1;
+drop table t1;
+create table t1 (v varchar(65530) character set utf8);
+show create table t1;
+drop table t1;
+
+# MARIA specific varchar tests
+--error 1118
+create table t1 (v varchar(65535));
+
+#
+# Test concurrent insert
+# First with static record length
+#
+set @save_concurrent_insert=@@concurrent_insert;
+set global concurrent_insert=1;
+create table t1 (a int) ROW_FORMAT=FIXED;
+insert into t1 values (1),(2),(3),(4),(5);
+lock table t1 read local;
+connect (con1,localhost,root,,);
+connection con1;
+# Insert in table without hole
+insert into t1 values(6),(7);
+connection default;
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+connection con1;
+set global concurrent_insert=2;
+# Insert in table with hole -> Should insert at end
+insert into t1 values (8),(9);
+connection default;
+unlock tables;
+# Insert into hole
+insert into t1 values (10),(11),(12);
+select * from t1;
+check table t1;
+drop table t1;
+disconnect con1;
+
+# Same test with dynamic record length
+create table t1 (a int, b varchar(30) default "hello") ROW_FORMAT=DYNAMIC;
+insert into t1 (a) values (1),(2),(3),(4),(5);
+lock table t1 read local;
+connect (con1,localhost,root,,);
+connection con1;
+# Insert in table without hole
+insert into t1 (a) values(6),(7);
+connection default;
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+connection con1;
+set global concurrent_insert=2;
+# Insert in table with hole -> Should insert at end
+insert into t1 (a) values (8),(9);
+connection default;
+unlock tables;
+# Insert into hole
+insert into t1 (a) values (10),(11),(12);
+select a from t1;
+check table t1;
+drop table t1;
+disconnect con1;
+set global concurrent_insert=@save_concurrent_insert;
+
+#
+# ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX
+#
+
+create table t1 (a int, key(a));
+
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+analyze table t1;
+show keys from t1;
+
+alter table t1 disable keys;
+alter table t1 enable keys;
+show keys from t1;
+
+drop table t1;
+
+#
+# Test temporary table with data directory option
+#
+connect (session1,localhost,root,,);
+connect (session2,localhost,root,,);
+
+connection session1;
+disable_query_log;
+eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 9 a;
+enable_query_log;
+disable_result_log;
+show create table t1;
+enable_result_log;
+
+connection session2;
+disable_query_log;
+eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 99 a;
+enable_query_log;
+disable_result_log;
+show create table t1;
+enable_result_log;
+
+connection default;
+create table t1 (a int) select 42 a;
+
+connection session1;
+select * from t1;
+disconnect session1;
+connection session2;
+select * from t1;
+disconnect session2;
+connection default;
+select * from t1;
+drop table t1;
+
+--echo End of 4.1 tests
+
+#
+# Test if PACK_KEYS option takes values greater than 1 while creating table
+#
+create table t1 (c1 int) pack_keys=0;
+create table t2 (c1 int) pack_keys=1;
+create table t3 (c1 int) pack_keys=default;
+--error 1064
+create table t4 (c1 int) pack_keys=2;
+drop table t1, t2, t3;
+
+#
+# Test of key_block_size
+#
+
+create table t1 (a int not null, key `a` (a) key_block_size=1024);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, key `a` (a) key_block_size=2048);
+show create table t1;
+drop table t1;
+
+create table t1 (a varchar(2048), key `a` (a));
+show create table t1;
+drop table t1;
+
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1024);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024;
+show create table t1;
+alter table t1 key_block_size=2048;
+show create table t1;
+alter table t1 add c int, add key (c);
+show create table t1;
+alter table t1 key_block_size=0;
+alter table t1 add d int, add key (d);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192;
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192;
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384;
+show create table t1;
+drop table t1;
+
+
+# Test limits and errors of key_block_size
+
+create table t1 (a int not null, key `a` (a) key_block_size=512);
+show create table t1;
+drop table t1;
+
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, key `a` (a) key_block_size=1025);
+show create table t1;
+drop table t1;
+
+--error 1064
+create table t1 (a int not null, key key_block_size=1024 (a));
+--error 1064
+create table t1 (a int not null, key `a` key_block_size=1024 (a));
+
+#
+# Test of changing MI_KEY_BLOCK_LENGTH
+#
+
+CREATE TABLE t1 (
+ c1 INT,
+ c2 VARCHAR(300),
+ KEY (c1) KEY_BLOCK_SIZE 1024,
+ KEY (c2) KEY_BLOCK_SIZE 8192
+ );
+INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))),
+ (11, REPEAT('b', CEIL(RAND() * 300))),
+ (12, REPEAT('c', CEIL(RAND() * 300))),
+ (13, REPEAT('d', CEIL(RAND() * 300))),
+ (14, REPEAT('e', CEIL(RAND() * 300))),
+ (15, REPEAT('f', CEIL(RAND() * 300))),
+ (16, REPEAT('g', CEIL(RAND() * 300))),
+ (17, REPEAT('h', CEIL(RAND() * 300))),
+ (18, REPEAT('i', CEIL(RAND() * 300))),
+ (19, REPEAT('j', CEIL(RAND() * 300))),
+ (20, REPEAT('k', CEIL(RAND() * 300))),
+ (21, REPEAT('l', CEIL(RAND() * 300))),
+ (22, REPEAT('m', CEIL(RAND() * 300))),
+ (23, REPEAT('n', CEIL(RAND() * 300))),
+ (24, REPEAT('o', CEIL(RAND() * 300))),
+ (25, REPEAT('p', CEIL(RAND() * 300))),
+ (26, REPEAT('q', CEIL(RAND() * 300))),
+ (27, REPEAT('r', CEIL(RAND() * 300))),
+ (28, REPEAT('s', CEIL(RAND() * 300))),
+ (29, REPEAT('t', CEIL(RAND() * 300))),
+ (30, REPEAT('u', CEIL(RAND() * 300))),
+ (31, REPEAT('v', CEIL(RAND() * 300))),
+ (32, REPEAT('w', CEIL(RAND() * 300))),
+ (33, REPEAT('x', CEIL(RAND() * 300))),
+ (34, REPEAT('y', CEIL(RAND() * 300))),
+ (35, REPEAT('z', CEIL(RAND() * 300)));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+CHECK TABLE t1;
+REPAIR TABLE t1;
+DELETE FROM t1 WHERE c1 >= 10;
+CHECK TABLE t1;
+DROP TABLE t1;
+
+#
+# Test that TRANSACTIONAL is preserved
+#
+
+create table t1 (a int) transactional=0;
+show create table t1;
+drop table t1;
+create table t1 (a int) row_format=dynamic transactional=0;
+show create table t1;
+drop table t1;
+create table t1 (a int) row_format=dynamic transactional=1;
+show create table t1;
+alter table t1 row_format=PAGE;
+show create table t1;
+alter table t1 row_format=DYNAMIC;
+show create table t1;
+alter table t1 transactional=0;
+show create table t1;
+alter table t1 row_format=DYNAMIC;
+show create table t1;
+drop table t1;
+create table t1 (a int) row_format=PAGE;
+show create table t1;
+drop table t1;
+create table t1 (a int) row_format=PAGE TRANSACTIONAL=DEFAULT;
+show create table t1;
+alter table t1 row_format=DYNAMIC;
+show create table t1;
+drop table t1;
+
+# Verify that INSERT DELAYED is disabled only for transactional tables
+create table t1 (a int) row_format=page;
+--error ER_ILLEGAL_HA
+insert delayed into t1 values(1);
+drop table t1;
+create table t1 (a int) row_format=page transactional=0;
+insert delayed into t1 values(1);
+flush table t1;
+select * from t1;
+select count(*) from t1;
+drop table t1;
+create table t1 (a int) row_format=dynamic;
+insert delayed into t1 values(1);
+flush table t1;
+select * from t1;
+select count(*) from t1;
+drop table t1;
+
+# CHECK TABLE was reporting
+# "Size of datafile is: 0 Should be: 16384"
+#
+
+create table `t1` (
+ t1_name varchar(255) default null,
+ t1_id int(10) unsigned not null auto_increment,
+ key (t1_name),
+ primary key (t1_id)
+) engine=maria auto_increment = 1000 default charset=latin1;
+lock tables t1 write;
+INSERT INTO `t1` VALUES ('bla',1000),('bla',1001),('bla',1002);
+check table t1;
+unlock tables;
+
+#
+# Check that an empty table uses fast recreate of index when we fill it
+# with insert ... select.
+
+create table t2 like t1;
+insert into t2 select * from t1;
+
+# This should say that the table is already up to date
+analyze table t2;
+delete from t2;
+insert into t2 select * from t1;
+analyze table t2;
+
+drop table t1,t2;
+
+#
+# Test when expanding a row so that it doesn't fit into the same page
+#
+
+create table t1 (a bigint auto_increment, primary key(a), b char(255), c varchar(20000));
+
+let $1=1000;
+--disable_query_log
+--disable_warnings
+while ($1)
+{
+ insert into t1 () values();
+ dec $1;
+}
+--enable_query_log
+update t1 set b=repeat('a',100) where a between 1 and 100;
+check table t1;
+update t1 set c=repeat('a',8192*2) where a between 200 and 202;
+check table t1;
+drop table t1;
+
+#
+# Test where we shrink varchar
+#
+
+CREATE TABLE t1 (a int, b int, v varchar(60000)) checksum=1 engine=maria;
+insert into t1 values (1,1,"aaa"),(1,2,null);
+checksum table t1;
+lock table t1 write;
+insert into t1 values (1,3,repeat('c',30000)),(4,4,repeat('a',30000));
+update t1 set v="row5" where b=4;
+delete from t1 where b=3;
+select a, b, length(v) from t1;
+drop table t1;
+
+#
+# Test tail pages for blobs
+#
+
+CREATE TABLE t1 (
+ auto int(5) unsigned NOT NULL auto_increment,
+ string char(10) default "hello",
+ tiny tinyint(4) DEFAULT '0' NOT NULL ,
+ short smallint(6) DEFAULT '1' NOT NULL ,
+ medium mediumint(8) DEFAULT '0' NOT NULL,
+ long_int int(11) DEFAULT '0' NOT NULL,
+ longlong bigint(13) DEFAULT '0' NOT NULL,
+ real_float float(13,1) DEFAULT 0.0 NOT NULL,
+ real_double double(16,4),
+ utiny tinyint(3) unsigned DEFAULT '0' NOT NULL,
+ ushort smallint(5) unsigned zerofill DEFAULT '00000' NOT NULL,
+ umedium mediumint(8) unsigned DEFAULT '0' NOT NULL,
+ ulong int(11) unsigned DEFAULT '0' NOT NULL,
+ ulonglong bigint(13) unsigned DEFAULT '0' NOT NULL,
+ time_stamp timestamp,
+ date_field date,
+ time_field time,
+ date_time datetime,
+ blob_col blob,
+ tinyblob_col tinyblob,
+ mediumblob_col mediumblob not null default '',
+ longblob_col longblob not null default '',
+ options enum('one','two','tree') not null ,
+ flags set('one','two','tree') not null default '',
+ PRIMARY KEY (auto),
+ KEY (utiny),
+ KEY (tiny),
+ KEY (short),
+ KEY any_name (medium),
+ KEY (longlong),
+ KEY (real_float),
+ KEY (ushort),
+ KEY (umedium),
+ KEY (ulong),
+ KEY (ulonglong,ulong),
+ KEY (options,flags)
+) engine=maria;
+insert into t1 values (10,1,1,1,1,1,1,1,1,1,1,1,1,1,NULL,0,0,0,1,1,1,1,'one','one');
+create table t2 (primary key (auto)) engine=maria row_format=page select auto+1 as auto,1 as t1, 'a' as t2, repeat('a',256) as t3, binary repeat('b',256) as t4, repeat('a',4096) as t5, binary repeat('b',4096) as t6, '' as t7, binary '' as t8 from t1;
+check table t1,t2;
+select t1,t2,length(t3),length(t4),length(t5),length(t6),t7,t8 from t2;
+drop table t1,t2;
+
+# Test UPDATE with small BLOB which fits on head page
+
+CREATE TABLE t1 (seq int, s1 int, s2 blob);
+insert into t1 values (1, 1, MD5(1));
+update t1 set s1=2 where seq=1;
+check table t1 extended;
+drop table t1;
+
+show variables like 'maria%';
+--replace_column 2 #
+show status like 'maria%';
+
+#
+# Show that we can't yet create fulltext or spatial index with Maria
+#
+--error 138
+create table t1 (s varchar(25), fulltext(s)) TRANSACTIONAL= 1;
+drop table if exists t1;
+
+--error 138
+create table t1 ( fid int not null auto_increment primary key,
+g geometry not null, spatial key(g));
+drop table if exists t1;
+
+#
+# Test warning on log file size truncates
+#
+
+--enable_warnings
+set global maria_log_file_size=4294967296;
+
+# End of 5.2 tests
+
+--disable_result_log
+--disable_query_log
+eval set global storage_engine=$default;
+--enable_result_log
+--enable_query_log
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index e49297dd06c..679a60ab8e1 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -2,6 +2,11 @@
# Test of MERGE TABLES
#
+# MERGE tables require MyISAM tables
+let $default=`select @@global.storage_engine`;
+set global storage_engine=myisam;
+set session storage_engine=myisam;
+
--disable_warnings
drop table if exists t1,t2,t3,t4,t5,t6;
drop database if exists mysqltest;
@@ -599,9 +604,9 @@ SELECT * FROM t3;
DROP TABLE t1, t2, t3;
-
--echo End of 5.0 tests
+
#
# Bug #8306: TRUNCATE leads to index corruption
#
@@ -738,7 +743,7 @@ DROP TABLE t1, t2;
# *after* the administration task. It was terminated by UNLOCK TABLES only.
#
# This is the same test case as for
-# Bug#26867 - LOCK TABLES + REPAIR + merge table result in memory/cpu hogging
+# Bug#26867 - LOCK TABLES REPAIR + merge table result in memory/cpu hogging
#
#
CREATE TABLE t1 (c1 INT) ENGINE= MyISAM;
@@ -1381,3 +1386,8 @@ FLUSH TABLES m1, t1;
UNLOCK TABLES;
DROP TABLE t1, m1;
+--disable_result_log
+--disable_query_log
+eval set global storage_engine=$default;
+--enable_result_log
+--enable_query_log
diff --git a/mysql-test/t/myisam.test b/mysql-test/t/myisam.test
index fbd0a5ac4e7..89ff1482b60 100644
--- a/mysql-test/t/myisam.test
+++ b/mysql-test/t/myisam.test
@@ -4,7 +4,7 @@
# Initialise
--disable_warnings
-drop table if exists t1,t2;
+drop table if exists t1,t2,t3;
--enable_warnings
SET SQL_WARNINGS=1;
diff --git a/mysql-test/t/mysqldump.test b/mysql-test/t/mysqldump.test
index 0e4e9989ffa..eb08737dfcc 100644
--- a/mysql-test/t/mysqldump.test
+++ b/mysql-test/t/mysqldump.test
@@ -199,7 +199,7 @@ DROP TABLE t1;
--echo # Test for --insert-ignore
--echo #
-CREATE TABLE t1 (a int);
+CREATE TABLE t1 (a int) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1),(2),(3);
INSERT INTO t1 VALUES (4),(5),(6);
--exec $MYSQL_DUMP --skip-comments --insert-ignore test t1
@@ -1463,8 +1463,8 @@ DROP TABLE t1;
# Bug #25993: crashe with a merge table and -c
#
-CREATE TABLE t2 (a int);
-CREATE TABLE t3 (a int);
+CREATE TABLE t2 (a int) ENGINE=MyISAM;
+CREATE TABLE t3 (a int) ENGINE=MyISAM;
CREATE TABLE t1 (a int) ENGINE=merge UNION=(t2, t3);
--exec $MYSQL_DUMP --skip-comments -c test
DROP TABLE t1, t2, t3;
diff --git a/mysql-test/t/old-mode-master.opt b/mysql-test/t/old-mode-master.opt
new file mode 100644
index 00000000000..840ee0dedcf
--- /dev/null
+++ b/mysql-test/t/old-mode-master.opt
@@ -0,0 +1 @@
+--old=1
diff --git a/mysql-test/t/old-mode.test b/mysql-test/t/old-mode.test
new file mode 100644
index 00000000000..4fa21f761ca
--- /dev/null
+++ b/mysql-test/t/old-mode.test
@@ -0,0 +1,16 @@
+#
+# Test 'old' mode
+#
+
+# Initialise
+--disable_warnings
+drop table if exists t1,t2;
+--enable_warnings
+
+create table t1 (a int, b varchar(200), c text not null) checksum=1;
+create table t2 (a int, b varchar(200), c text not null) checksum=0;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+checksum table t1, t2;
+checksum table t1, t2 extended;
+drop table t1,t2;
diff --git a/mysql-test/t/ps_maria.test b/mysql-test/t/ps_maria.test
new file mode 100644
index 00000000000..d5a756c261e
--- /dev/null
+++ b/mysql-test/t/ps_maria.test
@@ -0,0 +1,47 @@
+###############################################
+# #
+# Prepared Statements test on MARIA tables #
+# #
+###############################################
+
+#
+# NOTE: PLEASE SEE ps_1general.test (bottom)
+# BEFORE ADDING NEW TEST CASES HERE !!!
+
+-- source include/have_maria.inc
+set global maria_log_file_size=4294967295;
+
+use test;
+
+let $type= 'MARIA' ;
+-- source include/ps_create.inc
+-- source include/ps_renew.inc
+
+-- source include/ps_query.inc
+
+# parameter in SELECT ... MATCH/AGAINST
+# case derived from client_test.c: test_bug1500()
+--disable_warnings
+drop table if exists t2 ;
+--enable_warnings
+eval create table t2 (s varchar(25), fulltext(s)) TRANSACTIONAL= 0
+ENGINE = $type ;
+insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ;
+commit ;
+
+prepare stmt1 from ' select s from t2 where match (s) against (?) ' ;
+set @arg00='Dogs' ;
+execute stmt1 using @arg00 ;
+prepare stmt1 from ' SELECT s FROM t2
+where match (s) against (concat(?,''digger'')) ';
+set @arg00='Grave' ;
+execute stmt1 using @arg00 ;
+drop table t2 ;
+
+-- source include/ps_modify.inc
+-- source include/ps_modify1.inc
+-- source include/ps_conv.inc
+
+drop table t1, t9;
+
+# End of 4.1 tests
diff --git a/mysql-test/t/query_cache.test b/mysql-test/t/query_cache.test
index 9e250372d51..890999cdc08 100644
--- a/mysql-test/t/query_cache.test
+++ b/mysql-test/t/query_cache.test
@@ -45,9 +45,9 @@ show status like "Qcache_queries_in_cache";
#
# MERGE TABLES with INSERT/UPDATE and DELETE
#
-create table t1 (a int not null);
+create table t1 (a int not null) ENGINE=MyISAM;
insert into t1 values (1),(2),(3);
-create table t2 (a int not null);
+create table t2 (a int not null) ENGINE=MyISAM;
insert into t2 values (4),(5),(6);
create table t3 (a int not null) engine=MERGE UNION=(t1,t2) INSERT_METHOD=FIRST;
# insert
@@ -294,7 +294,7 @@ drop table t1;
flush query cache;
reset query cache;
-create table t1 (a int not null);
+create table t1 (a int not null) ENGINE=MyISAM;
insert into t1 values (1),(2),(3);
select * from t1;
select * from t1;
diff --git a/mysql-test/t/query_cache_merge.test b/mysql-test/t/query_cache_merge.test
index 36b8662f088..03a14344664 100644
--- a/mysql-test/t/query_cache_merge.test
+++ b/mysql-test/t/query_cache_merge.test
@@ -19,7 +19,7 @@ let $1 = 257;
while ($1)
{
eval drop table if exists t$1;
- eval create table t$1(a int);
+ eval create table t$1(a int) ENGINE=MyISAM;
eval insert into t$1 values (1),(2);
dec $1;
}
diff --git a/mysql-test/t/subselect.test b/mysql-test/t/subselect.test
index 077e00a4c6e..6d6490d0ebc 100644
--- a/mysql-test/t/subselect.test
+++ b/mysql-test/t/subselect.test
@@ -337,8 +337,8 @@ select * from t12;
drop table t11, t12, t2;
#insert with subselects
-CREATE TABLE t1 (x int);
-create table t2 (a int);
+CREATE TABLE t1 (x int) ENGINE=MyISAM;
+create table t2 (a int) ENGINE=MyISAM;
create table t3 (b int);
insert into t2 values (1);
insert into t3 values (1),(2);
@@ -369,7 +369,7 @@ select * from t1;
drop table t1, t2, t3;
#replace with subselects
-CREATE TABLE t1 (x int not null, y int, primary key (x));
+CREATE TABLE t1 (x int not null, y int, primary key (x)) ENGINE=MyISAM;
create table t2 (a int);
create table t3 (a int);
insert into t2 values (1);
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index 60e75c96b75..5b024056fda 100755
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -27,7 +27,7 @@ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/zlib ${CMAKE_SOURCE_DIR}/include ${CMAKE
SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_modify.c
errors.c hash.c list.c md5.c mf_brkhant.c mf_cache.c mf_dirname.c mf_fn_ext.c
- mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c
+ mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c my_safehash.c
mf_keycaches.c mf_loadpath.c mf_pack.c mf_path.c mf_qsort.c mf_qsort2.c
mf_radix.c mf_same.c mf_sort.c mf_soundex.c mf_strip.c mf_arr_appstr.c mf_tempdir.c
mf_tempfile.c mf_unixpath.c mf_wcomp.c mf_wfile.c mulalloc.c my_access.c
@@ -39,7 +39,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_
my_mkdir.c my_mmap.c my_net.c my_once.c my_open.c my_pread.c my_pthread.c
my_quick.c my_read.c my_realloc.c my_redel.c my_rename.c my_seek.c my_sleep.c
my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c my_wincond.c
- my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c
+ my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c
rijndael.c safemalloc.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c
thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c)
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index f06d81da849..10200fde8be 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -20,18 +20,21 @@ INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
-I$(top_srcdir)/include -I$(srcdir)
pkglib_LIBRARIES = libmysys.a
LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a $(top_builddir)/dbug/libdbug.a
-noinst_HEADERS = mysys_priv.h my_static.h
+noinst_HEADERS = mysys_priv.h my_static.h my_safehash.h
libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
mf_path.c mf_loadpath.c my_file.c \
my_open.c my_create.c my_dup.c my_seek.c my_read.c \
my_pread.c my_write.c my_getpagesize.c \
+ my_safehash.c \
mf_keycache.c mf_keycaches.c my_crc32.c \
mf_iocache.c mf_iocache2.c mf_cache.c mf_tempfile.c \
mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \
my_malloc.c my_realloc.c my_once.c mulalloc.c \
my_alloc.c safemalloc.c my_new.cc \
- my_vle.c my_atomic.c \
+ my_vle.c my_atomic.c lf_hash.c \
+ lf_dynarray.c lf_alloc-pin.c \
my_fopen.c my_fstream.c my_getsystime.c \
+ my_rnd.c my_uuid.c \
my_error.c errors.c my_div.c my_messnc.c \
mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \
my_symlink.c my_symlink2.c \
@@ -52,7 +55,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
my_gethostbyname.c rijndael.c my_aes.c sha1.c \
my_handler.c my_netware.c my_largepage.c \
my_memmem.c \
- my_windac.c my_access.c base64.c my_libwrap.c
+ my_windac.c my_access.c base64.c my_libwrap.c \
+ wqueue.c
EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
thr_mutex.c thr_rwlock.c \
CMakeLists.txt mf_soundex.c \
@@ -126,5 +130,6 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES)
$(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS)
$(RM) -f ./test_base64.c
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/mysys/array.c b/mysys/array.c
index 8a539f18a20..9ff35791dde 100644
--- a/mysys/array.c
+++ b/mysys/array.c
@@ -30,8 +30,8 @@
alloc_increment Increment for adding new elements
DESCRIPTION
- init_dynamic_array() initiates array and allocate space for
- init_alloc eilements.
+ init_dynamic_array() initiates array and allocate space for
+ init_alloc eilements.
Array is usable even if space allocation failed.
Static buffers must begin immediately after the array structure.
@@ -41,7 +41,7 @@
*/
my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size,
- void *init_buffer, uint init_alloc,
+ void *init_buffer, uint init_alloc,
uint alloc_increment CALLER_INFO_PROTO)
{
DBUG_ENTER("init_dynamic_array");
@@ -63,20 +63,20 @@ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size,
array->size_of_element=element_size;
if ((array->buffer= init_buffer))
DBUG_RETURN(FALSE);
- if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc,MYF(MY_WME))))
+ if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc, MYF(MY_WME))))
{
array->max_element=0;
DBUG_RETURN(TRUE);
}
DBUG_RETURN(FALSE);
-}
+}
my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size,
- uint init_alloc,
+ uint init_alloc,
uint alloc_increment CALLER_INFO_PROTO)
{
/* placeholder to preserve ABI */
- return my_init_dynamic_array_ci(array, element_size, init_alloc,
+ return my_init_dynamic_array_ci(array, element_size, init_alloc,
alloc_increment);
}
/*
@@ -111,7 +111,7 @@ my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element)
/*
- Alloc space for next element(s)
+ Alloc space for next element(s)
SYNOPSIS
alloc_dynamic()
@@ -129,6 +129,7 @@ my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element)
uchar *alloc_dynamic(DYNAMIC_ARRAY *array)
{
+ DBUG_ENTER("alloc_dynamic");
if (array->elements == array->max_element)
{
char *new_ptr;
@@ -142,20 +143,20 @@ uchar *alloc_dynamic(DYNAMIC_ARRAY *array)
array->alloc_increment) *
array->size_of_element,
MYF(MY_WME))))
- return 0;
- memcpy(new_ptr, array->buffer,
+ DBUG_RETURN(0);
+ memcpy(new_ptr, array->buffer,
array->elements * array->size_of_element);
}
- else
- if (!(new_ptr=(char*) my_realloc(array->buffer,(array->max_element+
- array->alloc_increment)*
- array->size_of_element,
- MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
- return 0;
+ else if (!(new_ptr=(char*)
+ my_realloc(array->buffer,(array->max_element+
+ array->alloc_increment)*
+ array->size_of_element,
+ MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
+ DBUG_RETURN(0);
array->buffer= (uchar*) new_ptr;
array->max_element+=array->alloc_increment;
}
- return array->buffer+(array->elements++ * array->size_of_element);
+ DBUG_RETURN(array->buffer+(array->elements++ * array->size_of_element));
}
@@ -165,8 +166,8 @@ uchar *alloc_dynamic(DYNAMIC_ARRAY *array)
SYNOPSIS
pop_dynamic()
array
-
- RETURN VALUE
+
+ RETURN VALUE
pointer Ok
0 Array is empty
*/
@@ -179,7 +180,7 @@ uchar *pop_dynamic(DYNAMIC_ARRAY *array)
}
/*
- Replace elemnent in array with given element and index
+ Replace element in array with given element and index
SYNOPSIS
set_dynamic()
@@ -188,9 +189,9 @@ uchar *pop_dynamic(DYNAMIC_ARRAY *array)
idx Index where element is to be inserted
DESCRIPTION
- set_dynamic() replaces element in array.
- If idx > max_element insert new element. Allocate memory if needed.
-
+ set_dynamic() replaces element in array.
+ If idx > max_element insert new element. Allocate memory if needed.
+
RETURN VALUE
TRUE Idx was out of range and allocation of new memory failed
FALSE Ok
@@ -200,50 +201,76 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx)
{
if (idx >= array->elements)
{
- if (idx >= array->max_element)
- {
- uint size;
- char *new_ptr;
- size=(idx+array->alloc_increment)/array->alloc_increment;
- size*= array->alloc_increment;
- if (array->buffer == (uchar *)(array + 1))
- {
- /*
- In this senerio, the buffer is statically preallocated,
- so we have to create an all-new malloc since we overflowed
- */
- if (!(new_ptr= (char *) my_malloc(size *
- array->size_of_element,
- MYF(MY_WME))))
- return 0;
- memcpy(new_ptr, array->buffer,
- array->elements * array->size_of_element);
- }
- else
- if (!(new_ptr=(char*) my_realloc(array->buffer,size*
- array->size_of_element,
- MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
- return TRUE;
- array->buffer= (uchar*) new_ptr;
- array->max_element=size;
- }
+ if (idx >= array->max_element && allocate_dynamic(array, idx))
+ return TRUE;
bzero((uchar*) (array->buffer+array->elements*array->size_of_element),
- (idx - array->elements)*array->size_of_element);
+ (idx - array->elements)*array->size_of_element);
array->elements=idx+1;
}
memcpy(array->buffer+(idx * array->size_of_element),element,
- (size_t) array->size_of_element);
+ (size_t) array->size_of_element);
return FALSE;
}
+
+/*
+ Ensure that dynamic array has enough elements
+
+ SYNOPSIS
+ allocate_dynamic()
+ array
+ max_elements Numbers of elements that is needed
+
+ NOTES
+ Any new allocated element are NOT initialized
+
+ RETURN VALUE
+ FALSE Ok
+ TRUE Allocation of new memory failed
+*/
+
+my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements)
+{
+ DBUG_ENTER("allocate_dynamic");
+
+ if (max_elements >= array->max_element)
+ {
+ uint size;
+ char *new_ptr;
+ size= (max_elements + array->alloc_increment)/array->alloc_increment;
+ size*= array->alloc_increment;
+ if (array->buffer == (uchar *)(array + 1))
+ {
+ /*
+ In this senerio, the buffer is statically preallocated,
+ so we have to create an all-new malloc since we overflowed
+ */
+ if (!(new_ptr= (char *) my_malloc(size *
+ array->size_of_element,
+ MYF(MY_WME))))
+ DBUG_RETURN(0);
+ memcpy(new_ptr, array->buffer,
+ array->elements * array->size_of_element);
+ }
+ else if (!(new_ptr= (char*) my_realloc(array->buffer,size*
+ array->size_of_element,
+ MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
+ DBUG_RETURN(TRUE);
+ array->buffer= new_ptr;
+ array->max_element= size;
+ }
+ DBUG_RETURN(FALSE);
+}
+
+
/*
Get an element from array by given index
SYNOPSIS
get_dynamic()
- array
+ array
uchar* Element to be returned. If idx > elements contain zeroes.
- idx Index of element wanted.
+ idx Index of element wanted.
*/
void get_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx)
@@ -320,7 +347,7 @@ void freeze_size(DYNAMIC_ARRAY *array)
*/
if (array->buffer == (uchar *)(array + 1))
return;
-
+
if (array->buffer && array->max_element != elements)
{
array->buffer=(uchar*) my_realloc(array->buffer,
@@ -337,7 +364,7 @@ void freeze_size(DYNAMIC_ARRAY *array)
SYNOPSIS
get_index_dynamic()
array Array
- element Whose element index
+ element Whose element index
*/
diff --git a/mysys/checksum.c b/mysys/checksum.c
index 4f86f6845f0..0cc9801c2b1 100644
--- a/mysys/checksum.c
+++ b/mysys/checksum.c
@@ -18,6 +18,8 @@
#include <my_sys.h>
#include <zlib.h>
+ha_checksum my_crc_dbug_check= 1; /* Unlikely number */
+
/*
Calculate a long checksum for a memoryblock.
@@ -34,9 +36,13 @@ ha_checksum my_checksum(ha_checksum crc, const uchar *pos, size_t length)
const uchar *end=pos+length;
for ( ; pos != end ; pos++)
crc=((crc << 8) + *((uchar*) pos)) + (crc >> (8*sizeof(ha_checksum)-8));
- return crc;
#else
- return (ha_checksum)crc32((uint)crc, pos, length);
+ crc= (ha_checksum) crc32((uint)crc, pos, length);
+#endif /* NOT_USED */
+ DBUG_PRINT("info", ("crc: %lu", (ulong) crc));
+#ifndef DBUG_OFF
+ if (crc == my_crc_dbug_check)
+ my_debug_put_break_here();
#endif
+ return crc;
}
-
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
new file mode 100644
index 00000000000..ff9c5a42f81
--- /dev/null
+++ b/mysys/lf_alloc-pin.c
@@ -0,0 +1,529 @@
+/* QQ: TODO multi-pinbox */
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ wait-free concurrent allocator based on pinning addresses
+
+ It works as follows: every thread (strictly speaking - every CPU, but
+ it's too difficult to do) has a small array of pointers. They're called
+ "pins". Before using an object its address must be stored in this array
+ (pinned). When an object is no longer necessary its address must be
+ removed from this array (unpinned). When a thread wants to free() an
+ object it scans all pins of all threads to see if somebody has this
+ object pinned. If yes - the object is not freed (but stored in a
+ "purgatory"). To reduce the cost of a single free() pins are not scanned
+ on every free() but only added to (thread-local) purgatory. On every
+ LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects
+ are freed.
+
+ Pins are used to solve ABA problem. To use pins one must obey
+ a pinning protocol:
+
+ 1. Let's assume that PTR is a shared pointer to an object. Shared means
+ that any thread may modify it anytime to point to a different object
+ and free the old object. Later the freed object may be potentially
+ allocated by another thread. If we're unlucky that other thread may
+ set PTR to point to this object again. This is ABA problem.
+ 2. Create a local pointer LOCAL_PTR.
+ 3. Pin the PTR in a loop:
+ do
+ {
+ LOCAL_PTR= PTR;
+ pin(PTR, PIN_NUMBER);
+ } while (LOCAL_PTR != PTR)
+ 4. It is guaranteed that after the loop has ended, LOCAL_PTR
+ points to an object (or NULL, if PTR may be NULL), that
+ will never be freed. It is not guaranteed though
+ that LOCAL_PTR == PTR (as PTR can change any time)
+ 5. When done working with the object, remove the pin:
+ unpin(PIN_NUMBER)
+ 6. When copying pins (as in the list traversing loop:
+ pin(CUR, 1);
+ while ()
+ {
+ do // standard
+ { // pinning
+ NEXT=CUR->next; // loop
+ pin(NEXT, 0); // see #3
+ } while (NEXT != CUR->next); // above
+ ...
+ ...
+ CUR=NEXT;
+ pin(CUR, 1); // copy pin[0] to pin[1]
+ }
+ which keeps CUR address constantly pinned), note than pins may be
+ copied only upwards (!!!), that is pin[N] to pin[M], M > N.
+ 7. Don't keep the object pinned longer than necessary - the number of
+ pins you have is limited (and small), keeping an object pinned
+ prevents its reuse and cause unnecessary mallocs.
+
+ Explanations:
+
+ 3. The loop is important. The following can occur:
+ thread1> LOCAL_PTR= PTR
+ thread2> free(PTR); PTR=0;
+ thread1> pin(PTR, PIN_NUMBER);
+ now thread1 cannot access LOCAL_PTR, even if it's pinned,
+ because it points to a freed memory. That is, it *must*
+ verify that it has indeed pinned PTR, the shared pointer.
+
+ 6. When a thread wants to free some LOCAL_PTR, and it scans
+ all lists of pins to see whether it's pinned, it does it
+ upwards, from low pin numbers to high. Thus another thread
+ must copy an address from one pin to another in the same
+ direction - upwards, otherwise the scanning thread may
+ miss it.
+
+ Implementation details:
+
+ Pins are given away from a "pinbox". Pinbox is stack-based allocator.
+ It used dynarray for storing pins, new elements are allocated by dynarray
+ as necessary, old are pushed in the stack for reuse. ABA is solved by
+ versioning a pointer - because we use an array, a pointer to pins is 16 bit,
+ upper 16 bits are used for a version.
+
+ It is assumed that pins belong to a thread and are not transferable
+ between threads (LF_PINS::stack_ends_here being a primary reason
+ for this limitation).
+*/
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <lf.h>
+
+#define LF_PINBOX_MAX_PINS 65536
+
+static void _lf_pinbox_real_free(LF_PINS *pins);
+
+/*
+ Initialize a pinbox. Normally called from lf_alloc_init.
+ See the latter for details.
+*/
+void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
+ lf_pinbox_free_func *free_func, void *free_func_arg)
+{
+ DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
+ compile_time_assert(sizeof(LF_PINS) == 128);
+ lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS));
+ pinbox->pinstack_top_ver= 0;
+ pinbox->pins_in_array= 0;
+ pinbox->free_ptr_offset= free_ptr_offset;
+ pinbox->free_func= free_func;
+ pinbox->free_func_arg= free_func_arg;
+}
+
+void lf_pinbox_destroy(LF_PINBOX *pinbox)
+{
+ lf_dynarray_destroy(&pinbox->pinarray);
+}
+
+/*
+ Get pins from a pinbox. Usually called via lf_alloc_get_pins() or
+ lf_hash_get_pins().
+
+ SYNOPSYS
+ pinbox -
+ stack_end - a pointer to the end (top/bottom, depending on the
+ STACK_DIRECTION) of stack. Used for safe alloca. There's
+ no safety margin deducted, a caller should take care of it,
+ if necessary.
+
+ DESCRIPTION
+ get a new LF_PINS structure from a stack of unused pins,
+ or allocate a new one out of dynarray.
+
+ NOTE
+ It is assumed that pins belong to a thread and are not transferable
+ between threads.
+*/
+LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end)
+{
+ uint32 pins, next, top_ver;
+ LF_PINS *el;
+ /*
+ We have an array of max. 64k elements.
+ The highest index currently allocated is pinbox->pins_in_array.
+ Freed elements are in a lifo stack, pinstack_top_ver.
+ pinstack_top_ver is 32 bits; 16 low bits are the index in the
+ array, to the first element of the list. 16 high bits are a version
+ (every time the 16 low bits are updated, the 16 high bits are
+ incremented). Versioniong prevents the ABA problem.
+ */
+ top_ver= pinbox->pinstack_top_ver;
+ do
+ {
+ if (!(pins= top_ver % LF_PINBOX_MAX_PINS))
+ {
+ /* the stack of free elements is empty */
+ pins= my_atomic_add32(&pinbox->pins_in_array, 1)+1;
+ if (unlikely(pins >= LF_PINBOX_MAX_PINS))
+ return 0;
+ /*
+ note that the first allocated element has index 1 (pins==1).
+ index 0 is reserved to mean "NULL pointer"
+ */
+ el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinarray, pins);
+ if (unlikely(!el))
+ return 0;
+ break;
+ }
+ el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinarray, pins);
+ next= el->link;
+ } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver,
+ top_ver-pins+next+LF_PINBOX_MAX_PINS));
+ /*
+ set el->link to the index of el in the dynarray (el->link has two usages:
+ - if element is allocated, it's its own index
+ - if element is free, it's its next element in the free stack
+ */
+ el->link= pins;
+ el->purgatory_count= 0;
+ el->pinbox= pinbox;
+ el->stack_ends_here= stack_end;
+ return el;
+}
+
+/*
+ Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or
+ lf_hash_put_pins().
+
+ DESCRIPTION
+ empty the purgatory (XXX deadlock warning below!),
+ push LF_PINS structure to a stack
+*/
+void _lf_pinbox_put_pins(LF_PINS *pins)
+{
+ LF_PINBOX *pinbox= pins->pinbox;
+ uint32 top_ver, nr;
+ nr= pins->link;
+#ifdef MY_LF_EXTRA_DEBUG
+ {
+ int i;
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ DBUG_ASSERT(pins->pin[i] == 0);
+ }
+#endif
+ /*
+ XXX this will deadlock if other threads will wait for
+ the caller to do something after _lf_pinbox_put_pins(),
+ and they would have pinned addresses that the caller wants to free.
+ Thus: only free pins when all work is done and nobody can wait for you!!!
+ */
+ while (pins->purgatory_count)
+ {
+ _lf_pinbox_real_free(pins);
+ if (pins->purgatory_count)
+ {
+ my_atomic_rwlock_wrunlock(&pins->pinbox->pinarray.lock);
+ pthread_yield();
+ my_atomic_rwlock_wrlock(&pins->pinbox->pinarray.lock);
+ }
+ }
+ top_ver= pinbox->pinstack_top_ver;
+ do
+ {
+ pins->link= top_ver % LF_PINBOX_MAX_PINS;
+ } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver,
+ top_ver-pins->link+nr+LF_PINBOX_MAX_PINS));
+ return;
+}
+
+static int ptr_cmp(void **a, void **b)
+{
+ return *a < *b ? -1 : *a == *b ? 0 : 1;
+}
+
+#define add_to_purgatory(PINS, ADDR) \
+ do \
+ { \
+ *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \
+ (PINS)->purgatory; \
+ (PINS)->purgatory= (ADDR); \
+ (PINS)->purgatory_count++; \
+ } while (0)
+
+/*
+ Free an object allocated via pinbox allocator
+
+ DESCRIPTION
+ add an object to purgatory. if necessary, call _lf_pinbox_real_free()
+ to actually free something.
+*/
+void _lf_pinbox_free(LF_PINS *pins, void *addr)
+{
+ add_to_purgatory(pins, addr);
+ if (pins->purgatory_count % LF_PURGATORY_SIZE)
+ _lf_pinbox_real_free(pins);
+}
+
+struct st_harvester {
+ void **granary;
+ int npins;
+};
+
+/*
+ callback for _lf_dynarray_iterate:
+ scan all pins of all threads and accumulate all pins
+*/
+static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
+{
+ int i;
+ LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
+ for (; el < el_end; el++)
+ {
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ {
+ void *p= el->pin[i];
+ if (p)
+ *hv->granary++= p;
+ }
+ }
+ /*
+ hv->npins may become negative below, but it means that
+ we're on the last dynarray page and harvest_pins() won't be
+ called again. We don't bother to make hv->npins() correct
+ (that is 0) in this case.
+ */
+ hv->npins-= LF_DYNARRAY_LEVEL_LENGTH;
+ return 0;
+}
+
+/*
+ callback for _lf_dynarray_iterate:
+ scan all pins of all threads and see if addr is present there
+*/
+static int match_pins(LF_PINS *el, void *addr)
+{
+ int i;
+ LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH;
+ for (; el < el_end; el++)
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ if (el->pin[i] == addr)
+ return 1;
+ return 0;
+}
+
+#if STACK_DIRECTION < 0
+#define available_stack_size(CUR,END) (long) ((char*)(CUR) - (char*)(END))
+#else
+#define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR))
+#endif
+
+/*
+ Scan the purgatory and free everything that can be freed
+*/
+static void _lf_pinbox_real_free(LF_PINS *pins)
+{
+ int npins, alloca_size;
+ void *list, **addr;
+ struct st_lf_alloc_node *first, *last= NULL;
+ LF_PINBOX *pinbox= pins->pinbox;
+
+ LINT_INIT(first);
+ npins= pinbox->pins_in_array+1;
+
+#ifdef HAVE_ALLOCA
+ alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins;
+ /* create a sorted list of pinned addresses, to speed up searches */
+ if (available_stack_size(&pinbox, pins->stack_ends_here) > alloca_size)
+ {
+ struct st_harvester hv;
+ addr= (void **) alloca(alloca_size);
+ hv.granary= addr;
+ hv.npins= npins;
+ /* scan the dynarray and accumulate all pinned addresses */
+ _lf_dynarray_iterate(&pinbox->pinarray,
+ (lf_dynarray_func)harvest_pins, &hv);
+
+ npins= hv.granary-addr;
+ /* and sort them */
+ if (npins)
+ qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp);
+ }
+ else
+#endif
+ addr= 0;
+
+ list= pins->purgatory;
+ pins->purgatory= 0;
+ pins->purgatory_count= 0;
+ while (list)
+ {
+ void *cur= list;
+ list= *(void **)((char *)cur+pinbox->free_ptr_offset);
+ if (npins)
+ {
+ if (addr) /* use binary search */
+ {
+ void **a, **b, **c;
+ for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2)
+ if (cur == *c)
+ a= b= c;
+ else if (cur > *c)
+ a= c;
+ else
+ b= c;
+ if (cur == *a || cur == *b)
+ goto found;
+ }
+ else /* no alloca - no cookie. linear search here */
+ {
+ if (_lf_dynarray_iterate(&pinbox->pinarray,
+ (lf_dynarray_func)match_pins, cur))
+ goto found;
+ }
+ }
+ /* not pinned - freeing */
+ if (last)
+ last= last->next= (struct st_lf_alloc_node *)cur;
+ else
+ first= last= (struct st_lf_alloc_node *)cur;
+ continue;
+found:
+ /* pinned - keeping */
+ add_to_purgatory(pins, cur);
+ }
+ if (last)
+ pinbox->free_func(first, last, pinbox->free_func_arg);
+}
+
+/* lock-free memory allocator for fixed-size objects */
+
+LF_REQUIRE_PINS(1);
+
+/*
+ callback for _lf_pinbox_real_free to free a list of unpinned objects -
+ add it back to the allocator stack
+
+ DESCRIPTION
+ 'first' and 'last' are the ends of the linked list of st_lf_alloc_node's:
+ first->el->el->....->el->last. Use first==last to free only one element.
+*/
+static void alloc_free(struct st_lf_alloc_node *first,
+ struct st_lf_alloc_node volatile *last,
+ LF_ALLOCATOR *allocator)
+{
+ /*
+ we need a union here to access type-punned pointer reliably.
+ otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop
+ */
+ union { struct st_lf_alloc_node * node; void *ptr; } tmp;
+ tmp.node= allocator->top;
+ do
+ {
+ last->next= tmp.node;
+ } while (!my_atomic_casptr((void **)(char *)&allocator->top,
+ (void **)&tmp.ptr, first) && LF_BACKOFF);
+}
+
+/*
+ initialize lock-free allocator
+
+ SYNOPSYS
+ allocator -
+ size a size of an object to allocate
+ free_ptr_offset an offset inside the object to a sizeof(void *)
+ memory that is guaranteed to be unused after
+ the object is put in the purgatory. Unused by ANY
+ thread, not only the purgatory owner.
+ This memory will be used to link waiting-to-be-freed
+ objects in a purgatory list.
+*/
+void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
+{
+ lf_pinbox_init(&allocator->pinbox, free_ptr_offset,
+ (lf_pinbox_free_func *)alloc_free, allocator);
+ allocator->top= 0;
+ allocator->mallocs= 0;
+ allocator->element_size= size;
+ DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset);
+}
+
+/*
+ destroy the allocator, free everything that's in it
+
+ NOTE
+ As every other init/destroy function here and elsewhere it
+ is not thread safe. No, this function is no different, ensure
+ that no thread needs the allocator before destroying it.
+ We are not responsible for any damage that may be caused by
+ accessing the allocator when it is being or has been destroyed.
+ Oh yes, and don't put your cat in a microwave.
+*/
+void lf_alloc_destroy(LF_ALLOCATOR *allocator)
+{
+ struct st_lf_alloc_node *node= allocator->top;
+ while (node)
+ {
+ struct st_lf_alloc_node *tmp= node->next;
+ my_free((void *)node, MYF(0));
+ node= tmp;
+ }
+ lf_pinbox_destroy(&allocator->pinbox);
+ allocator->top= 0;
+}
+
+/*
+ Allocate and return an new object.
+
+ DESCRIPTION
+ Pop an unused object from the stack or malloc it is the stack is empty.
+ pin[0] is used, it's removed on return.
+*/
+void *_lf_alloc_new(LF_PINS *pins)
+{
+ LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
+ struct st_lf_alloc_node *node;
+ for (;;)
+ {
+ do
+ {
+ node= allocator->top;
+ _lf_pin(pins, 0, node);
+ } while (node != allocator->top && LF_BACKOFF);
+ if (!node)
+ {
+ node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
+#ifdef MY_LF_EXTRA_DEBUG
+ if (likely(node != 0))
+ my_atomic_add32(&allocator->mallocs, 1);
+#endif
+ break;
+ }
+ if (my_atomic_casptr((void **)(char *)&allocator->top,
+ (void *)&node, node->next))
+ break;
+ }
+ _lf_unpin(pins, 0);
+ return node;
+}
+
+/*
+ count the number of objects in a pool.
+
+ NOTE
+ This is NOT thread-safe !!!
+*/
+uint lf_alloc_pool_count(LF_ALLOCATOR *allocator)
+{
+ uint i;
+ struct st_lf_alloc_node *node;
+ for (node= allocator->top, i= 0; node; node= node->next, i++)
+ /* no op */;
+ return i;
+}
+
diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c
new file mode 100644
index 00000000000..770b1f9342b
--- /dev/null
+++ b/mysys/lf_dynarray.c
@@ -0,0 +1,208 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Analog of DYNAMIC_ARRAY that never reallocs
+ (so no pointer into the array may ever become invalid).
+
+ Memory is allocated in non-contiguous chunks.
+ This data structure is not space efficient for sparse arrays.
+
+ Every element is aligned to sizeof(element) boundary
+ (to avoid false sharing if element is big enough).
+
+ LF_DYNARRAY is a recursive structure. On the zero level
+ LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements,
+ on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers
+ to arrays of elements, on the second level it's an array of pointers
+ to arrays of pointers to arrays of elements. And so on.
+
+ With four levels the number of elements is limited to 4311810304
+ (but as in all functions index is uint, the real limit is 2^32-1)
+
+ Actually, it's wait-free, not lock-free ;-)
+*/
+
+#include <my_global.h>
+#include <strings.h>
+#include <my_sys.h>
+#include <lf.h>
+
+void lf_dynarray_init(LF_DYNARRAY *array, uint element_size)
+{
+ bzero(array, sizeof(*array));
+ array->size_of_element= element_size;
+ my_atomic_rwlock_init(&array->lock);
+}
+
+static void recursive_free(void **alloc, int level)
+{
+ if (!alloc)
+ return;
+
+ if (level)
+ {
+ int i;
+ for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+ recursive_free(alloc[i], level-1);
+ my_free((void *)alloc, MYF(0));
+ }
+ else
+ my_free(alloc[-1], MYF(0));
+}
+
+void lf_dynarray_destroy(LF_DYNARRAY *array)
+{
+ int i;
+ for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+ recursive_free(array->level[i], i);
+ my_atomic_rwlock_destroy(&array->lock);
+}
+
+static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]=
+{
+ 0, /* +1 here to to avoid -1's below */
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH +
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH
+};
+
+static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
+{
+ 0, /* +1 here to to avoid -1's below */
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH,
+};
+
+/*
+ Returns a valid lvalue pointer to the element number 'idx'.
+ Allocates memory if necessary.
+*/
+void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
+{
+ void * ptr, * volatile * ptr_ptr= 0;
+ int i;
+
+ for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+ /* no-op */;
+ ptr_ptr= &array->level[i];
+ idx-= dynarray_idxes_in_prev_levels[i];
+ for (; i > 0; i--)
+ {
+ if (!(ptr= *ptr_ptr))
+ {
+ void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!alloc))
+ return(NULL);
+ if (my_atomic_casptr(ptr_ptr, &ptr, alloc))
+ ptr= alloc;
+ else
+ my_free(alloc, MYF(0));
+ }
+ ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+ idx%= dynarray_idxes_in_prev_level[i];
+ }
+ if (!(ptr= *ptr_ptr))
+ {
+ void *alloc, *data;
+ alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
+ max(array->size_of_element, sizeof(void *)),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!alloc))
+ return(NULL);
+ /* reserve the space for free() address */
+ data= alloc + sizeof(void *);
+ { /* alignment */
+ intptr mod= ((intptr)data) % array->size_of_element;
+ if (mod)
+ data+= array->size_of_element - mod;
+ }
+ ((void **)data)[-1]= alloc; /* free() will need the original pointer */
+ if (my_atomic_casptr(ptr_ptr, &ptr, data))
+ ptr= data;
+ else
+ my_free(alloc, MYF(0));
+ }
+ return ptr + array->size_of_element * idx;
+}
+
+/*
+ Returns a pointer to the element number 'idx'
+ or NULL if an element does not exists
+*/
+void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx)
+{
+ void * ptr, * volatile * ptr_ptr= 0;
+ int i;
+
+ for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+ /* no-op */;
+ ptr_ptr= &array->level[i];
+ idx-= dynarray_idxes_in_prev_levels[i];
+ for (; i > 0; i--)
+ {
+ if (!(ptr= *ptr_ptr))
+ return(NULL);
+ ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+ idx %= dynarray_idxes_in_prev_level[i];
+ }
+ if (!(ptr= *ptr_ptr))
+ return(NULL);
+ return ptr + array->size_of_element * idx;
+}
+
+static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level,
+ lf_dynarray_func func, void *arg)
+{
+ int res, i;
+ if (!ptr)
+ return 0;
+ if (!level)
+ return func(ptr, arg);
+ for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+ if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg)))
+ return res;
+ return 0;
+}
+
+/*
+ Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements
+ in lf_dynarray.
+
+ DESCRIPTION
+ lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements
+ each. _lf_dynarray_iterate() calls user-supplied function on every array
+ from the set. It is the fastest way to scan the array, faster than
+ for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); }
+
+ NOTE
+ if func() returns non-zero, the scan is aborted
+*/
+int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg)
+{
+ int i, res;
+ for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+ if ((res= recursive_iterate(array, array->level[i], i, func, arg)))
+ return res;
+ return 0;
+}
+
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c
new file mode 100644
index 00000000000..322f04cdc41
--- /dev/null
+++ b/mysys/lf_hash.c
@@ -0,0 +1,493 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ extensible hash
+
+ TODO
+ try to get rid of dummy nodes ?
+ for non-unique hash, count only _distinct_ values
+ (but how to do it in lf_hash_delete ?)
+*/
+#include <my_global.h>
+#include <m_string.h>
+#include <my_sys.h>
+#include <my_bit.h>
+#include <lf.h>
+
+LF_REQUIRE_PINS(3);
+
+/* An element of the list */
+typedef struct {
+ intptr volatile link; /* a pointer to the next element in a listand a flag */
+ uint32 hashnr; /* reversed hash number, for sorting */
+ const uchar *key;
+ size_t keylen;
+ /*
+ data is stored here, directly after the keylen.
+ thus the pointer to data is (void*)(slist_element_ptr+1)
+ */
+} LF_SLIST;
+
+/*
+ a structure to pass the context (pointers two the three successive elements
+ in a list) from lfind to linsert/ldelete
+*/
+typedef struct {
+ intptr volatile *prev;
+ LF_SLIST *curr, *next;
+} CURSOR;
+
+/*
+ the last bit in LF_SLIST::link is a "deleted" flag.
+ the helper macros below convert it to a pure pointer or a pure flag
+*/
+#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
+#define DELETED(V) ((V) & 1)
+
+/*
+ DESCRIPTION
+ Search for hashnr/key/keylen in the list starting from 'head' and
+ position the cursor. The list is ORDER BY hashnr, key
+
+ RETURN
+ 0 - not found
+ 1 - found
+
+ NOTE
+ cursor is positioned in either case
+ pins[0..2] are used, they are NOT removed on return
+*/
+static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+ const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
+{
+ uint32 cur_hashnr;
+ const uchar *cur_key;
+ uint cur_keylen;
+ intptr link;
+
+retry:
+ cursor->prev= (intptr *)head;
+ do { /* PTR() isn't necessary below, head is a dummy node */
+ cursor->curr= (LF_SLIST *)(*cursor->prev);
+ _lf_pin(pins, 1, cursor->curr);
+ } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+ for (;;)
+ {
+ if (unlikely(!cursor->curr))
+ return 0; /* end of the list */
+ do {
+ /* QQ: XXX or goto retry ? */
+ link= cursor->curr->link;
+ cursor->next= PTR(link);
+ _lf_pin(pins, 0, cursor->next);
+ } while (link != cursor->curr->link && LF_BACKOFF);
+ cur_hashnr= cursor->curr->hashnr;
+ cur_key= cursor->curr->key;
+ cur_keylen= cursor->curr->keylen;
+ if (*cursor->prev != (intptr)cursor->curr)
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ if (!DELETED(link))
+ {
+ if (cur_hashnr >= hashnr)
+ {
+ int r= 1;
+ if (cur_hashnr > hashnr ||
+ (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key,
+ keylen)) >= 0)
+ return !r;
+ }
+ cursor->prev= &(cursor->curr->link);
+ _lf_pin(pins, 2, cursor->curr);
+ }
+ else
+ {
+ /*
+ we found a deleted node - be nice, help the other thread
+ and remove this deleted node
+ */
+ if (my_atomic_casptr((void **)cursor->prev,
+ (void **)&cursor->curr, cursor->next))
+ _lf_alloc_free(pins, cursor->curr);
+ else
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ }
+ cursor->curr= cursor->next;
+ _lf_pin(pins, 1, cursor->curr);
+ }
+}
+
+/*
+ DESCRIPTION
+ insert a 'node' in the list that starts from 'head' in the correct
+ position (as found by lfind)
+
+ RETURN
+ 0 - inserted
+ not 0 - a pointer to a duplicate (not pinned and thus unusable)
+
+ NOTE
+ it uses pins[0..2], on return all pins are removed.
+ if there're nodes with the same key value, a new node is added before them.
+*/
+static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+ LF_SLIST *node, LF_PINS *pins, uint flags)
+{
+ CURSOR cursor;
+ int res;
+
+ for (;;)
+ {
+ if (lfind(head, cs, node->hashnr, node->key, node->keylen,
+ &cursor, pins) &&
+ (flags & LF_HASH_UNIQUE))
+ {
+ res= 0; /* duplicate found */
+ break;
+ }
+ else
+ {
+ node->link= (intptr)cursor.curr;
+ DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
+ DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
+ if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
+ {
+ res= 1; /* inserted ok */
+ break;
+ }
+ }
+ }
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ /*
+ Note that cursor.curr is not pinned here and the pointer is unreliable,
+ the object may dissapear anytime. But if it points to a dummy node, the
+ pointer is safe, because dummy nodes are never freed - initialize_bucket()
+ uses this fact.
+ */
+ return res ? 0 : cursor.curr;
+}
+
+/*
+ DESCRIPTION
+ deletes a node as identified by hashnr/keey/keylen from the list
+ that starts from 'head'
+
+ RETURN
+ 0 - ok
+ 1 - not found
+
+ NOTE
+ it uses pins[0..2], on return all pins are removed.
+*/
+static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+ const uchar *key, uint keylen, LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res;
+
+ for (;;)
+ {
+ if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins))
+ {
+ res= 1; /* not found */
+ break;
+ }
+ else
+ {
+ /* mark the node deleted */
+ if (my_atomic_casptr((void **)&(cursor.curr->link),
+ (void **)&cursor.next,
+ (void *)(((intptr)cursor.next) | 1)))
+ {
+ /* and remove it from the list */
+ if (my_atomic_casptr((void **)cursor.prev,
+ (void **)&cursor.curr, cursor.next))
+ _lf_alloc_free(pins, cursor.curr);
+ else
+ {
+ /*
+ somebody already "helped" us and removed the node ?
+ Let's check if we need to help that someone too!
+ (to ensure the number of "set DELETED flag" actions
+ is equal to the number of "remove from the list" actions)
+ */
+ lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+ }
+ res= 0;
+ break;
+ }
+ }
+ }
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ return res;
+}
+
+/*
+ DESCRIPTION
+ searches for a node as identified by hashnr/keey/keylen in the list
+ that starts from 'head'
+
+ RETURN
+ 0 - not found
+ node - found
+
+ NOTE
+ it uses pins[0..2], on return the pin[2] keeps the node found
+ all other pins are removed.
+*/
+static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+ uint32 hashnr, const uchar *key, uint keylen,
+ LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+ if (res)
+ _lf_pin(pins, 2, cursor.curr);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ return res ? cursor.curr : 0;
+}
+
+static inline const uchar* hash_key(const LF_HASH *hash,
+ const uchar *record, size_t *length)
+{
+ if (hash->get_key)
+ return (*hash->get_key)(record, length, 0);
+ *length= hash->key_length;
+ return record + hash->key_offset;
+}
+
+/*
+ compute the hash key value from the raw key.
+ note, that the hash value is limited to 2^31, because we need one
+ bit to distinguish between normal and dummy nodes.
+*/
+static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen)
+{
+ ulong nr1= 1, nr2= 4;
+ hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen,
+ &nr1, &nr2);
+ return nr1 & INT_MAX32;
+}
+
+#define MAX_LOAD 1.0 /* average number of elements in a bucket */
+
+static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *);
+
+/*
+ Initializes lf_hash, the arguments are compatible with hash_init
+*/
+void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
+ uint key_offset, uint key_length, hash_get_key get_key,
+ CHARSET_INFO *charset)
+{
+ lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
+ offsetof(LF_SLIST, key));
+ lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
+ hash->size= 1;
+ hash->count= 0;
+ hash->element_size= element_size;
+ hash->flags= flags;
+ hash->charset= charset ? charset : &my_charset_bin;
+ hash->key_offset= key_offset;
+ hash->key_length= key_length;
+ hash->get_key= get_key;
+ DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
+}
+
+void lf_hash_destroy(LF_HASH *hash)
+{
+ LF_SLIST *el, **head= (LF_SLIST **)_lf_dynarray_value(&hash->array, 0);
+
+ if (unlikely(!head))
+ return;
+ el= *head;
+
+ while (el)
+ {
+ intptr next= el->link;
+ if (el->hashnr & 1)
+ lf_alloc_direct_free(&hash->alloc, el); /* normal node */
+ else
+ my_free((void *)el, MYF(0)); /* dummy node */
+ el= (LF_SLIST *)next;
+ }
+ lf_alloc_destroy(&hash->alloc);
+ lf_dynarray_destroy(&hash->array);
+}
+
+/*
+ DESCRIPTION
+ inserts a new element to a hash. it will have a _copy_ of
+ data, not a pointer to it.
+
+ RETURN
+ 0 - inserted
+ 1 - didn't (unique key conflict)
+ -1 - out of memory
+
+ NOTE
+ see linsert() for pin usage notes
+*/
+int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
+{
+ int csize, bucket, hashnr;
+ LF_SLIST *node, * volatile *el;
+
+ lf_rwlock_by_pins(pins);
+ node= (LF_SLIST *)_lf_alloc_new(pins);
+ if (unlikely(!node))
+ return -1;
+ memcpy(node+1, data, hash->element_size);
+ node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
+ hashnr= calc_hash(hash, node->key, node->keylen);
+ bucket= hashnr % hash->size;
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (unlikely(!el))
+ return -1;
+ if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+ return -1;
+ node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
+ if (linsert(el, hash->charset, node, pins, hash->flags))
+ {
+ _lf_alloc_free(pins, node);
+ lf_rwunlock_by_pins(pins);
+ return 1;
+ }
+ csize= hash->size;
+ if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
+ my_atomic_cas32(&hash->size, &csize, csize*2);
+ lf_rwunlock_by_pins(pins);
+ return 0;
+}
+
+/*
+ DESCRIPTION
+ deletes an element with the given key from the hash (if a hash is
+ not unique and there're many elements with this key - the "first"
+ matching element is deleted)
+ RETURN
+ 0 - deleted
+ 1 - didn't (not found)
+ -1 - out of memory
+ NOTE
+ see ldelete() for pin usage notes
+*/
+int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+ LF_SLIST * volatile *el;
+ uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen);
+
+ bucket= hashnr % hash->size;
+ lf_rwlock_by_pins(pins);
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (unlikely(!el))
+ return -1;
+ /*
+ note that we still need to initialize_bucket here,
+ we cannot return "node not found", because an old bucket of that
+ node may've been split and the node was assigned to a new bucket
+ that was never accessed before and thus is not initialized.
+ */
+ if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+ return -1;
+ if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1,
+ (uchar *)key, keylen, pins))
+ {
+ lf_rwunlock_by_pins(pins);
+ return 1;
+ }
+ my_atomic_add32(&hash->count, -1);
+ lf_rwunlock_by_pins(pins);
+ return 0;
+}
+
+/*
+ RETURN
+ a pointer to an element with the given key (if a hash is not unique and
+ there're many elements with this key - the "first" matching element)
+ NULL if nothing is found
+ MY_ERRPTR if OOM
+
+ NOTE
+ see lsearch() for pin usage notes
+*/
+void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+ LF_SLIST * volatile *el, *found;
+ uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen);
+
+ bucket= hashnr % hash->size;
+ lf_rwlock_by_pins(pins);
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (unlikely(!el))
+ return MY_ERRPTR;
+ if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+ return MY_ERRPTR;
+ found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1,
+ (uchar *)key, keylen, pins);
+ lf_rwunlock_by_pins(pins);
+ return found ? found+1 : 0;
+}
+
+static const uchar *dummy_key= "";
+
+/*
+ RETURN
+ 0 - ok
+ -1 - out of memory
+*/
+static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
+ uint bucket, LF_PINS *pins)
+{
+ uint parent= my_clear_highest_bit(bucket);
+ LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
+ LF_SLIST **tmp= 0, *cur;
+ LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent);
+ if (unlikely(!el || !dummy))
+ return -1;
+ if (*el == NULL && bucket &&
+ unlikely(initialize_bucket(hash, el, parent, pins)))
+ return -1;
+ dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
+ dummy->key= (char*) dummy_key;
+ dummy->keylen= 0;
+ if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
+ {
+ my_free((void *)dummy, MYF(0));
+ dummy= cur;
+ }
+ my_atomic_casptr((void **)node, (void **)&tmp, dummy);
+ /*
+ note that if the CAS above failed (after linsert() succeeded),
+ it would mean that some other thread has executed linsert() for
+ the same dummy node, its linsert() failed, it picked up our
+ dummy node (in "dummy= cur") and executed the same CAS as above.
+ Which means that even if CAS above failed we don't need to retry,
+ and we should not free(dummy) - there's no memory leak here
+ */
+ return 0;
+}
diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c
index 58650733490..1124ebceb2c 100644
--- a/mysys/mf_iocache.c
+++ b/mysys/mf_iocache.c
@@ -1701,6 +1701,7 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock)
my_bool append_cache;
my_off_t pos_in_file;
DBUG_ENTER("my_b_flush_io_cache");
+ DBUG_PRINT("enter", ("cache: 0x%lx", (long) info));
if (!(append_cache = (info->type == SEQ_READ_APPEND)))
need_append_buffer_lock=0;
@@ -1833,6 +1834,9 @@ int end_io_cache(IO_CACHE *info)
pthread_mutex_destroy(&info->append_buffer_lock);
#endif
}
+#ifdef THREAD
+ info->share= 0;
+#endif
DBUG_RETURN(error);
} /* end_io_cache */
diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c
index a03d71f32d8..2fcf397766d 100644
--- a/mysys/mf_keycache.c
+++ b/mysys/mf_keycache.c
@@ -105,6 +105,7 @@
#include <keycache.h>
#include "my_static.h"
#include <m_string.h>
+#include <my_bit.h>
#include <errno.h>
#include <stdarg.h>
@@ -1262,12 +1263,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
KEYCACHE_THREAD_TRACE("unlink_block");
#if defined(KEYCACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
keycache->blocks_available--;
KEYCACHE_DBUG_PRINT("unlink_block",
("unlinked block %u status=%x #requests=%u #available=%u",
BLOCK_NUMBER(block), block->status,
block->requests, keycache->blocks_available));
- KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0);
#endif
}
@@ -2360,9 +2361,9 @@ restart:
(block->hash_link->diskpos == filepos)));
*page_st=page_status;
KEYCACHE_DBUG_PRINT("find_key_block",
- ("fd: %d pos: %lu block->status: %u page_status: %u",
+ ("fd: %d pos: %lu block->status: %u page_status: %d",
file, (ulong) filepos, block->status,
- (uint) page_status));
+ page_status));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
DBUG_EXECUTE("check_keycache2",
@@ -2513,10 +2514,10 @@ static void read_block(KEY_CACHE *keycache,
*/
uchar *key_cache_read(KEY_CACHE *keycache,
- File file, my_off_t filepos, int level,
- uchar *buff, uint length,
- uint block_length __attribute__((unused)),
- int return_buffer __attribute__((unused)))
+ File file, my_off_t filepos, int level,
+ uchar *buff, uint length,
+ uint block_length __attribute__((unused)),
+ int return_buffer __attribute__((unused)))
{
my_bool locked_and_incremented= FALSE;
int error=0;
@@ -2534,12 +2535,13 @@ uchar *key_cache_read(KEY_CACHE *keycache,
uint status;
int page_st;
- /*
+
+ /*
When the key cache is once initialized, we use the cache_lock to
reliably distinguish the cases of normal operation, resizing, and
disabled cache. We always increment and decrement
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
- */
+ */
keycache_pthread_mutex_lock(&keycache->cache_lock);
/*
Cache resizing has two phases: Flushing and re-initializing. In
@@ -2566,7 +2568,7 @@ uchar *key_cache_read(KEY_CACHE *keycache,
do
{
/* Cache could be disabled in a later iteration. */
-
+
if (!keycache->can_be_used)
goto no_key_cache;
/* Start reading at the beginning of the cache block. */
@@ -2976,9 +2978,10 @@ int key_cache_write(KEY_CACHE *keycache,
int error=0;
DBUG_ENTER("key_cache_write");
DBUG_PRINT("enter",
- ("fd: %u pos: %lu length: %u block_length: %u key_block_length: %u",
- (uint) file, (ulong) filepos, length, block_length,
- keycache ? keycache->key_cache_block_size : 0));
+ ("fd: %u pos: %lu length: %u block_length: %u"
+ " key_block_length: %u",
+ (uint) file, (ulong) filepos, length, block_length,
+ keycache ? keycache->key_cache_block_size : 0));
if (!dont_write)
{
@@ -3170,7 +3173,7 @@ int key_cache_write(KEY_CACHE *keycache,
if (!dont_write)
{
- /* Not used in the server. buff has been written to disk at start. */
+ /* Not used in the server. buff has been written to disk at start. */
if ((block->status & BLOCK_CHANGED) &&
(!offset && read_length >= keycache->key_cache_block_size))
link_to_file_list(keycache, block, block->hash_link->file, 1);
@@ -3184,7 +3187,6 @@ int key_cache_write(KEY_CACHE *keycache,
a flush.
*/
block->status&= ~BLOCK_FOR_UPDATE;
-
set_if_smaller(block->offset, offset);
set_if_bigger(block->length, read_length+offset);
@@ -3555,10 +3557,11 @@ static int flush_key_blocks_int(KEY_CACHE *keycache,
file, keycache->blocks_used, keycache->blocks_changed));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
- DBUG_EXECUTE("check_keycache",
- test_key_cache(keycache, "start of flush_key_blocks", 0););
+ DBUG_EXECUTE("check_keycache",
+ test_key_cache(keycache, "start of flush_key_blocks", 0););
#endif
+ DBUG_ASSERT(type != FLUSH_KEEP_LAZY);
cache= cache_buff;
if (keycache->disk_blocks > 0 &&
(!my_disable_flush_key_blocks || type != FLUSH_KEEP))
diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c
index 6227a05ce06..9ea5678da9a 100644
--- a/mysys/mf_keycaches.c
+++ b/mysys/mf_keycaches.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 MySQL AB
+/* Copyright (C) 2003-2007 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -25,269 +25,7 @@
#include <keycache.h>
#include <hash.h>
#include <m_string.h>
-
-/*****************************************************************************
- General functions to handle SAFE_HASH objects.
-
- A SAFE_HASH object is used to store the hash, the mutex and default value
- needed by the rest of the key cache code.
- This is a separate struct to make it easy to later reuse the code for other
- purposes
-
- All entries are linked in a list to allow us to traverse all elements
- and delete selected ones. (HASH doesn't allow any easy ways to do this).
-*****************************************************************************/
-
-/*
- Struct to store a key and pointer to object
-*/
-
-typedef struct st_safe_hash_entry
-{
- uchar *key;
- uint length;
- uchar *data;
- struct st_safe_hash_entry *next, **prev;
-} SAFE_HASH_ENTRY;
-
-
-typedef struct st_safe_hash_with_default
-{
-#ifdef THREAD
- rw_lock_t mutex;
-#endif
- HASH hash;
- uchar *default_value;
- SAFE_HASH_ENTRY *root;
-} SAFE_HASH;
-
-
-/*
- Free a SAFE_HASH_ENTRY
-
- This function is called by the hash object on delete
-*/
-
-static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry)
-{
- DBUG_ENTER("free_assign_entry");
- my_free((uchar*) entry, MYF(0));
- DBUG_VOID_RETURN;
-}
-
-
-/* Get key and length for a SAFE_HASH_ENTRY */
-
-static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length,
- my_bool not_used __attribute__((unused)))
-{
- *length=entry->length;
- return (uchar*) entry->key;
-}
-
-
-/*
- Init a SAFE_HASH object
-
- SYNOPSIS
- safe_hash_init()
- hash safe_hash handler
- elements Expected max number of elements
- default_value default value
-
- NOTES
- In case of error we set hash->default_value to 0 to allow one to call
- safe_hash_free on an object that couldn't be initialized.
-
- RETURN
- 0 ok
- 1 error
-*/
-
-static my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
- uchar *default_value)
-{
- DBUG_ENTER("safe_hash");
- if (hash_init(&hash->hash, &my_charset_bin, elements,
- 0, 0, (hash_get_key) safe_hash_entry_get,
- (void (*)(void*)) safe_hash_entry_free, 0))
- {
- hash->default_value= 0;
- DBUG_RETURN(1);
- }
- my_rwlock_init(&hash->mutex, 0);
- hash->default_value= default_value;
- hash->root= 0;
- DBUG_RETURN(0);
-}
-
-
-/*
- Free a SAFE_HASH object
-
- NOTES
- This is safe to call on any object that has been sent to safe_hash_init()
-*/
-
-static void safe_hash_free(SAFE_HASH *hash)
-{
- /*
- Test if safe_hash_init succeeded. This will also guard us against multiple
- free calls.
- */
- if (hash->default_value)
- {
- hash_free(&hash->hash);
- rwlock_destroy(&hash->mutex);
- hash->default_value=0;
- }
-}
-
-/*
- Return the value stored for a key or default value if no key
-*/
-
-static uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length)
-{
- uchar *result;
- DBUG_ENTER("safe_hash_search");
- rw_rdlock(&hash->mutex);
- result= hash_search(&hash->hash, key, length);
- rw_unlock(&hash->mutex);
- if (!result)
- result= hash->default_value;
- else
- result= ((SAFE_HASH_ENTRY*) result)->data;
- DBUG_PRINT("exit",("data: 0x%lx", (long) result));
- DBUG_RETURN(result);
-}
-
-
-/*
- Associate a key with some data
-
- SYONOPSIS
- safe_hash_set()
- hash Hash handle
- key key (path to table etc..)
- length Length of key
- data data to to associate with the data
-
- NOTES
- This can be used both to insert a new entry and change an existing
- entry.
- If one associates a key with the default key cache, the key is deleted
-
- RETURN
- 0 ok
- 1 error (Can only be EOM). In this case my_message() is called.
-*/
-
-static my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length,
- uchar *data)
-{
- SAFE_HASH_ENTRY *entry;
- my_bool error= 0;
- DBUG_ENTER("safe_hash_set");
- DBUG_PRINT("enter",("key: %.*s data: 0x%lx", length, key, (long) data));
-
- rw_wrlock(&hash->mutex);
- entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length);
-
- if (data == hash->default_value)
- {
- /*
- The key is to be associated with the default entry. In this case
- we can just delete the entry (if it existed) from the hash as a
- search will return the default entry
- */
- if (!entry) /* nothing to do */
- goto end;
- /* unlink entry from list */
- if ((*entry->prev= entry->next))
- entry->next->prev= entry->prev;
- hash_delete(&hash->hash, (uchar*) entry);
- goto end;
- }
- if (entry)
- {
- /* Entry existed; Just change the pointer to point at the new data */
- entry->data= data;
- }
- else
- {
- if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length,
- MYF(MY_WME))))
- {
- error= 1;
- goto end;
- }
- entry->key= (uchar*) (entry +1);
- memcpy((char*) entry->key, (char*) key, length);
- entry->length= length;
- entry->data= data;
- /* Link entry to list */
- if ((entry->next= hash->root))
- entry->next->prev= &entry->next;
- entry->prev= &hash->root;
- hash->root= entry;
- if (my_hash_insert(&hash->hash, (uchar*) entry))
- {
- /* This can only happen if hash got out of memory */
- my_free((char*) entry, MYF(0));
- error= 1;
- goto end;
- }
- }
-
-end:
- rw_unlock(&hash->mutex);
- DBUG_RETURN(error);
-}
-
-
-/*
- Change all entres with one data value to another data value
-
- SYONOPSIS
- safe_hash_change()
- hash Hash handle
- old_data Old data
- new_data Change all 'old_data' to this
-
- NOTES
- We use the linked list to traverse all elements in the hash as
- this allows us to delete elements in the case where 'new_data' is the
- default value.
-*/
-
-static void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data)
-{
- SAFE_HASH_ENTRY *entry, *next;
- DBUG_ENTER("safe_hash_set");
-
- rw_wrlock(&hash->mutex);
-
- for (entry= hash->root ; entry ; entry= next)
- {
- next= entry->next;
- if (entry->data == old_data)
- {
- if (new_data == hash->default_value)
- {
- if ((*entry->prev= entry->next))
- entry->next->prev= entry->prev;
- hash_delete(&hash->hash, (uchar*) entry);
- }
- else
- entry->data= new_data;
- }
- }
-
- rw_unlock(&hash->mutex);
- DBUG_VOID_RETURN;
-}
-
+#include "my_safehash.h"
/*****************************************************************************
Functions to handle the key cache objects
@@ -315,6 +53,7 @@ void multi_keycache_free(void)
multi_key_cache_search()
key key to find (usually table path)
uint length Length of key.
+ def Default value if no key cache
NOTES
This function is coded in such a way that we will return the
@@ -325,11 +64,13 @@ void multi_keycache_free(void)
key cache to use
*/
-KEY_CACHE *multi_key_cache_search(uchar *key, uint length)
+KEY_CACHE *multi_key_cache_search(uchar *key, uint length,
+ KEY_CACHE *def)
{
if (!key_cache_hash.hash.records)
- return dflt_key_cache;
- return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length);
+ return def;
+ return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length,
+ (void*) def);
}
@@ -361,3 +102,5 @@ void multi_key_cache_change(KEY_CACHE *old_data,
{
safe_hash_change(&key_cache_hash, (uchar*) old_data, (uchar*) new_data);
}
+
+
diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c
index 9460f27b104..40016210de4 100644
--- a/mysys/mf_tempfile.c
+++ b/mysys/mf_tempfile.c
@@ -136,6 +136,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix,
if (org_file >= 0 && file < 0)
{
int tmp=my_errno;
+ close(org_file);
(void) my_delete(to, MYF(MY_WME | ME_NOINPUT));
my_errno=tmp;
}
diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c
index 6a30267eb80..aa04d55f624 100644
--- a/mysys/my_atomic.c
+++ b/mysys/my_atomic.c
@@ -17,11 +17,10 @@
#include <my_pthread.h>
#ifndef HAVE_INLINE
-/*
- the following will cause all inline functions to be instantiated
-*/
+/* the following will cause all inline functions to be instantiated */
#define HAVE_INLINE
-#define static extern
+#undef STATIC_INLINE
+#define STATIC_INLINE extern
#endif
#include <my_atomic.h>
@@ -35,7 +34,7 @@
*/
int my_atomic_initialize()
{
- DBUG_ASSERT(sizeof(intptr) == sizeof(void *));
+ compile_time_assert(sizeof(intptr) == sizeof(void *));
/* currently the only thing worth checking is SMP/UP issue */
#ifdef MY_ATOMIC_MODE_DUMMY
return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU;
diff --git a/mysys/my_bit.c b/mysys/my_bit.c
index 5a9b1187c83..2881eb1ebd2 100644
--- a/mysys/my_bit.c
+++ b/mysys/my_bit.c
@@ -13,23 +13,18 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-/* Some useful bit functions */
+#include <my_global.h>
-#include "mysys_priv.h"
-
-/*
- Find smallest X in 2^X >= value
- This can be used to divide a number with value by doing a shift instead
-*/
+#ifndef HAVE_INLINE
+/* the following will cause all inline functions to be instantiated */
+#define HAVE_INLINE
+#undef STATIC_INLINE
+#define STATIC_INLINE extern
+#endif
-uint my_bit_log2(ulong value)
-{
- uint bit;
- for (bit=0 ; value > 1 ; value>>=1, bit++) ;
- return bit;
-}
+#include <my_bit.h>
-static char nbits[256] = {
+const char _my_bits_nbits[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -48,60 +43,29 @@ static char nbits[256] = {
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
-uint my_count_bits(ulonglong v)
-{
-#if SIZEOF_LONG_LONG > 4
- /* The following code is a bit faster on 16 bit machines than if we would
- only shift v */
- ulong v2=(ulong) (v >> 32);
- return (uint) (uchar) (nbits[(uchar) v] +
- nbits[(uchar) (v >> 8)] +
- nbits[(uchar) (v >> 16)] +
- nbits[(uchar) (v >> 24)] +
- nbits[(uchar) (v2)] +
- nbits[(uchar) (v2 >> 8)] +
- nbits[(uchar) (v2 >> 16)] +
- nbits[(uchar) (v2 >> 24)]);
-#else
- return (uint) (uchar) (nbits[(uchar) v] +
- nbits[(uchar) (v >> 8)] +
- nbits[(uchar) (v >> 16)] +
- nbits[(uchar) (v >> 24)]);
-#endif
-}
-
-uint my_count_bits_ushort(ushort v)
-{
- return nbits[v];
-}
-
-
/*
- Next highest power of two
-
- SYNOPSIS
- my_round_up_to_next_power()
- v Value to check
-
- RETURN
- Next or equal power of 2
- Note: 0 will return 0
-
- NOTES
- Algorithm by Sean Anderson, according to:
- http://graphics.stanford.edu/~seander/bithacks.html
- (Orignal code public domain)
-
- Comments shows how this works with 01100000000000000000000000001011
+ perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)'
*/
+const uchar _my_bits_reverse_table[256]={
+0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30,
+0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98,
+0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64,
+0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC,
+0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02,
+0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2,
+0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A,
+0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E,
+0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81,
+0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71,
+0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
+0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15,
+0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
+0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43,
+0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B,
+0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97,
+0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F,
+0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
-uint32 my_round_up_to_next_power(uint32 v)
-{
- v--; /* 01100000000000000000000000001010 */
- v|= v >> 1; /* 01110000000000000000000000001111 */
- v|= v >> 2; /* 01111100000000000000000000001111 */
- v|= v >> 4; /* 01111111110000000000000000001111 */
- v|= v >> 8; /* 01111111111111111100000000001111 */
- v|= v >> 16; /* 01111111111111111111111111111111 */
- return v+1; /* 10000000000000000000000000000000 */
-}
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 10eff40b9ed..e127b2584ae 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -38,6 +38,7 @@
#include "mysys_priv.h"
#include <my_bitmap.h>
#include <m_string.h>
+#include <my_bit.h>
void create_last_word_mask(MY_BITMAP *map)
{
diff --git a/mysys/my_compress.c b/mysys/my_compress.c
index bc9f8317487..ab17b10e72c 100644
--- a/mysys/my_compress.c
+++ b/mysys/my_compress.c
@@ -181,8 +181,9 @@ int packfrm(uchar *data, size_t len,
if (my_compress((uchar*)data, &org_len, &comp_len))
goto err;
- DBUG_PRINT("info", ("org_len: %lu comp_len: %lu", (ulong) org_len, (ulong) comp_len));
- DBUG_DUMP("compressed", data, org_len);
+ DBUG_PRINT("info", ("org_len: %lu comp_len: %lu", (ulong) org_len,
+ (ulong) comp_len));
+ DBUG_DUMP("compressed", (char*)data, org_len);
error= 2;
blob_len= BLOB_HEADER + org_len;
diff --git a/mysys/my_create.c b/mysys/my_create.c
index 55878318ead..454ccf6ab7d 100644
--- a/mysys/my_create.c
+++ b/mysys/my_create.c
@@ -52,6 +52,13 @@ File my_create(const char *FileName, int CreateFlags, int access_flags,
fd = open(FileName, access_flags);
#endif
+ if ((MyFlags & MY_SYNC_DIR) && (fd >=0) &&
+ my_sync_dir_by_file(FileName, MyFlags))
+ {
+ my_close(fd, MyFlags);
+ fd= -1;
+ }
+
DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE,
EE_CANTCREATEFILE, MyFlags));
} /* my_create */
diff --git a/mysys/my_delete.c b/mysys/my_delete.c
index cff00bf7e08..4d1115410cb 100644
--- a/mysys/my_delete.c
+++ b/mysys/my_delete.c
@@ -29,6 +29,9 @@ int my_delete(const char *name, myf MyFlags)
my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)),
name,errno);
}
+ else if ((MyFlags & MY_SYNC_DIR) &&
+ my_sync_dir_by_file(name, MyFlags))
+ err= -1;
DBUG_RETURN(err);
} /* my_delete */
diff --git a/mysys/my_error.c b/mysys/my_error.c
index e8fd8b938ee..d26c3d8cfde 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -84,19 +84,14 @@ int my_error(int nr, myf MyFlags, ...)
if (nr <= meh_p->meh_last)
break;
-#ifdef SHARED_LIBRARY
- if ((meh_p == &my_errmsgs_globerrs) && ! globerrs[0])
- init_glob_errs();
-#endif
-
/* get the error message string. Default, if NULL or empty string (""). */
if (! (format= (meh_p && (nr >= meh_p->meh_first)) ?
meh_p->meh_errmsgs[nr - meh_p->meh_first] : NULL) || ! *format)
- (void) my_snprintf (ebuff, sizeof(ebuff), "Unknown error %d", nr);
+ (void) my_snprintf(ebuff, sizeof(ebuff), "Unknown error %d", nr);
else
{
va_start(args,MyFlags);
- (void) my_vsnprintf (ebuff, sizeof(ebuff), format, args);
+ (void) my_vsnprintf(ebuff, sizeof(ebuff), format, args);
va_end(args);
}
DBUG_RETURN((*error_handler_hook)(nr, ebuff, MyFlags));
diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c
index 44156da6ae3..351851cca76 100644
--- a/mysys/my_fopen.c
+++ b/mysys/my_fopen.c
@@ -134,7 +134,7 @@ FILE *my_fdopen(File Filedes, const char *name, int Flags, myf MyFlags)
FILE *fd;
char type[5];
DBUG_ENTER("my_fdopen");
- DBUG_PRINT("my",("Fd: %d Flags: %d MyFlags: %d",
+ DBUG_PRINT("my",("fd: %d Flags: %d MyFlags: %d",
Filedes, Flags, MyFlags));
make_ftype(type,Flags);
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 5132ac820b8..63ef57300fa 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -28,14 +28,14 @@ static void default_reporter(enum loglevel level, const char *format, ...);
my_error_reporter my_getopt_error_reporter= &default_reporter;
static int findopt(char *optpat, uint length,
- const struct my_option **opt_res,
- char **ffname);
+ const struct my_option **opt_res,
+ char **ffname);
my_bool getopt_compare_strings(const char *s,
- const char *t,
- uint length);
+ const char *t,
+ uint length);
static longlong getopt_ll(char *arg, const struct my_option *optp, int *err);
static ulonglong getopt_ull(char *arg, const struct my_option *optp,
- int *err);
+ int *err);
static double getopt_double(char *arg, const struct my_option *optp, int *err);
static void init_variables(const struct my_option *options,
init_func_p init_one_value);
@@ -43,8 +43,8 @@ static void init_one_value(const struct my_option *option, uchar* *variable,
longlong value);
static void fini_one_value(const struct my_option *option, uchar* *variable,
longlong value);
-static int setval(const struct my_option *opts, uchar* *value, char *argument,
- my_bool set_maximum_value);
+static int setval(const struct my_option *opts, uchar **value, char *argument,
+ my_bool set_maximum_value);
static char *check_struct_option(char *cur_arg, char *key_name);
/*
@@ -770,7 +770,7 @@ static longlong eval_num_suffix(char *argument, int *error, char *option_name)
return num;
}
-/*
+/*
function: getopt_ll
Evaluates and returns the value that user gave as an argument
@@ -867,7 +867,7 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp,
bool *fix)
{
bool adjusted= FALSE;
- ulonglong old= num;
+ ulonglong old= num, mod;
char buf1[255], buf2[255];
if ((ulonglong) num > (ulonglong) optp->max_value &&
@@ -892,6 +892,8 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp,
num= ((ulonglong) ULONG_MAX);
adjusted= TRUE;
}
+#else
+ num= min(num, LONG_MAX);
#endif
break;
default:
@@ -917,7 +919,6 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp,
my_getopt_error_reporter(WARNING_LEVEL,
"option '%s': unsigned value %s adjusted to %s",
optp->name, ullstr(old, buf1), ullstr(num, buf2));
-
return num;
}
@@ -958,8 +959,8 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err)
SYNOPSIS
init_one_value()
- option Option to initialize
- value Pointer to variable
+ option Option to initialize
+ value Pointer to variable
*/
static void init_one_value(const struct my_option *option, uchar* *variable,
@@ -973,7 +974,7 @@ static void init_one_value(const struct my_option *option, uchar* *variable,
case GET_INT:
*((int*) variable)= (int) value;
break;
- case GET_UINT:
+ case GET_UINT: /* Fall through */
case GET_ENUM:
*((uint*) variable)= (uint) value;
break;
@@ -986,7 +987,7 @@ static void init_one_value(const struct my_option *option, uchar* *variable,
case GET_LL:
*((longlong*) variable)= (longlong) value;
break;
- case GET_ULL:
+ case GET_ULL: /* Fall through */
case GET_SET:
*((ulonglong*) variable)= (ulonglong) value;
break;
@@ -1054,7 +1055,7 @@ void my_cleanup_options(const struct my_option *options)
}
-/*
+/*
initialize all variables to their default values
SYNOPSIS
diff --git a/mysys/my_handler.c b/mysys/my_handler.c
index 1c3bb20426e..312227891c5 100644
--- a/mysys/my_handler.c
+++ b/mysys/my_handler.c
@@ -16,25 +16,30 @@
MA 02111-1307, USA */
#include <my_global.h>
-#include "my_handler.h"
+#include <m_ctype.h>
+#include <my_base.h>
+#include <my_handler.h>
+#include <my_sys.h>
-int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
- uchar *b, uint b_length, my_bool part_key,
+int ha_compare_text(CHARSET_INFO *charset_info, const uchar *a, uint a_length,
+ const uchar *b, uint b_length, my_bool part_key,
my_bool skip_end_space)
{
if (!part_key)
return charset_info->coll->strnncollsp(charset_info, a, a_length,
- b, b_length, (my_bool)!skip_end_space);
+ b, b_length,
+ (my_bool)!skip_end_space);
return charset_info->coll->strnncoll(charset_info, a, a_length,
b, b_length, part_key);
}
-static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
+static int compare_bin(const uchar *a, uint a_length,
+ const uchar *b, uint b_length,
my_bool part_key, my_bool skip_end_space)
{
uint length= min(a_length,b_length);
- uchar *end= a+ length;
+ const uchar *end= a+ length;
int flag;
while (a < end)
@@ -113,8 +118,8 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
#define FCMP(A,B) ((int) (A) - (int) (B))
-int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
- register uchar *b, uint key_length, uint nextflag,
+int ha_key_cmp(register HA_KEYSEG *keyseg, register const uchar *a,
+ register const uchar *b, uint key_length, uint nextflag,
uint *diff_pos)
{
int flag;
@@ -124,12 +129,12 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
float f_1,f_2;
double d_1,d_2;
uint next_key_length;
- uchar *orig_b= b;
+ const uchar *orig_b= b;
*diff_pos=0;
for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++)
{
- uchar *end;
+ const uchar *end;
uint piks=! (keyseg->flag & HA_NO_SORT);
(*diff_pos)++;
diff_pos[1]= (uint)(b - orig_b);
@@ -174,7 +179,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
next_key_length=key_length-b_length-pack_length;
if (piks &&
- (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length,
+ (flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0),
(my_bool)!(nextflag & SEARCH_PREFIX))))
@@ -187,7 +192,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
{
uint length=(uint) (end-a), a_length=length, b_length=length;
if (piks &&
- (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length,
+ (flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0),
(my_bool)!(nextflag & SEARCH_PREFIX))))
@@ -235,7 +240,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
next_key_length=key_length-b_length-pack_length;
if (piks &&
- (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length,
+ (flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0),
(my_bool) ((nextflag & (SEARCH_FIND |
@@ -361,7 +366,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
if (keyseg->flag & HA_REVERSE_SORT)
{
- swap_variables(uchar*, a, b);
+ swap_variables(const uchar*, a, b);
swap_flag=1; /* Remember swap of a & b */
end= a+ (int) (end-b);
}
@@ -386,7 +391,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
if (*b != '-')
return -1;
a++; b++;
- swap_variables(uchar*, a, b);
+ swap_variables(const uchar*, a, b);
swap_variables(int, alength, blength);
swap_flag=1-swap_flag;
alength--; blength--;
@@ -415,7 +420,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
}
if (swap_flag) /* Restore pointers */
- swap_variables(uchar*, a, b);
+ swap_variables(const uchar*, a, b);
break;
}
#ifdef HAVE_LONG_LONG
@@ -482,12 +487,15 @@ end:
DESCRIPTION
Find the first NULL value in index-suffix values tuple.
- TODO Consider optimizing this fuction or its use so we don't search for
- NULL values in completely NOT NULL index suffixes.
+
+ TODO
+ Consider optimizing this function or its use so we don't search for
+ NULL values in completely NOT NULL index suffixes.
RETURN
- First key part that has NULL as value in values tuple, or the last key part
- (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs.
+ First key part that has NULL as value in values tuple, or the last key
+ part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain
+ NULLs.
*/
HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
@@ -557,3 +565,91 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
}
return keyseg;
}
+
+
+/*
+ Errors a handler can give you
+*/
+
+static const char *handler_error_messages[]=
+{
+ "Didn't find key on read or update",
+ "Duplicate key on write or update",
+ "Internal (unspecified) error in handler",
+ "Someone has changed the row since it was read (while the table was locked to prevent it)",
+ "Wrong index given to function",
+ "Undefined handler error 125",
+ "Index file is crashed",
+ "Record file is crashed",
+ "Out of memory in engine",
+ "Undefined handler error 129",
+ "Incorrect file format",
+ "Command not supported by database",
+ "Old database file",
+ "No record read before update",
+ "Record was already deleted (or record file crashed)",
+ "No more room in record file",
+ "No more room in index file",
+ "No more records (read after end of file)",
+ "Unsupported extension used for table",
+ "Too big row",
+ "Wrong create options",
+ "Duplicate unique key or constraint on write or update",
+ "Unknown character set used in table",
+ "Conflicting table definitions in sub-tables of MERGE table",
+ "Table is crashed and last repair failed",
+ "Table was marked as crashed and should be repaired",
+ "Lock timed out; Retry transaction",
+ "Lock table is full; Restart program with a larger locktable",
+ "Updates are not allowed under a read only transactions",
+ "Lock deadlock; Retry transaction",
+ "Foreign key constraint is incorrectly formed",
+ "Cannot add a child row",
+ "Cannot delete a parent row",
+ "No savepoint with that name",
+ "Non unique key block size",
+ "The table does not exist in engine",
+ "The table already existed in storage engine",
+ "Could not connect to storage engine",
+ "Unexpected null pointer found when using spatial index",
+ "The table changed in storage engine",
+ "There's no partition in table for the given value",
+ "Row-based binlogging of row failed",
+ "Index needed in foreign key constraint",
+ "Upholding foreign key constraints would lead to a duplicate key error in "
+ "some other table",
+ "Table needs to be upgraded before it can be used",
+ "Table is read only",
+ "Failed to get next auto increment value",
+ "Failed to set row auto increment value",
+ "Unknown (generic) error from engine",
+ "Record is the same",
+ "It is not possible to log this statement",
+ "The table is of a new format not supported by this version",
+ "Got a fatal error during initialzaction of handler",
+ "File to short; Expected more data in file",
+ "Read page with wrong checksum"
+};
+
+
+/*
+ Register handler error messages for usage with my_error()
+
+ NOTES
+ This is safe to call multiple times as my_error_register()
+ will ignore calls to register already registered error numbers.
+*/
+
+
+void my_handler_error_register(void)
+{
+ my_error_register(handler_error_messages, HA_ERR_FIRST,
+ HA_ERR_FIRST+ array_elements(handler_error_messages)-1);
+}
+
+
+void my_handler_error_unregister(void)
+{
+ my_error_unregister(HA_ERR_FIRST,
+ HA_ERR_FIRST+ array_elements(handler_error_messages)-1);
+}
diff --git a/mysys/my_init.c b/mysys/my_init.c
index 6d1b9ec04be..145a435b4b6 100644
--- a/mysys/my_init.c
+++ b/mysys/my_init.c
@@ -43,6 +43,7 @@ static void netware_init();
my_bool my_init_done= 0;
uint mysys_usage_id= 0; /* Incremented for each my_init() */
+ulong my_thread_stack_size= 65536;
static ulong atoi_octal(const char *str)
{
@@ -76,6 +77,11 @@ my_bool my_init(void)
mysys_usage_id++;
my_umask= 0660; /* Default umask for new files */
my_umask_dir= 0700; /* Default umask for new directories */
+ init_glob_errs();
+ my_progname_short= "unknown";
+ if (my_progname)
+ my_progname_short= my_progname + dirname_length(my_progname);
+
#if defined(THREAD) && defined(SAFE_MUTEX)
safe_mutex_global_init(); /* Must be called early */
#endif
@@ -230,6 +236,13 @@ Voluntary context switches %ld, Involuntary context switches %ld\n",
my_init_done=0;
} /* my_end */
+#ifndef DBUG_OFF
+/* Dummy tag function for debugging */
+
+void my_debug_put_break_here(void)
+{
+}
+#endif
#ifdef __WIN__
diff --git a/mysys/my_lock.c b/mysys/my_lock.c
index c0522ee849d..200ee7188c9 100644
--- a/mysys/my_lock.c
+++ b/mysys/my_lock.c
@@ -49,12 +49,12 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length,
int nxErrno;
#endif
DBUG_ENTER("my_lock");
- DBUG_PRINT("my",("Fd: %d Op: %d start: %ld Length: %ld MyFlags: %d",
+ DBUG_PRINT("my",("fd: %d Op: %d start: %ld Length: %ld MyFlags: %d",
fd,locktype,(long) start,(long) length,MyFlags));
#ifdef VMS
DBUG_RETURN(0);
#else
- if (my_disable_locking)
+ if (my_disable_locking && ! (MyFlags & MY_FORCE_LOCK))
DBUG_RETURN(0);
#if defined(__NETWARE__)
@@ -131,10 +131,16 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length,
lock.l_start= (off_t) start;
lock.l_len= (off_t) length;
- if (MyFlags & MY_DONT_WAIT)
+ if (MyFlags & (MY_NO_WAIT | MY_SHORT_WAIT))
{
if (fcntl(fd,F_SETLK,&lock) != -1) /* Check if we can lock */
- DBUG_RETURN(0); /* Ok, file locked */
+ DBUG_RETURN(0); /* Ok, file locked */
+ if (MyFlags & MY_NO_WAIT)
+ {
+ my_errno= (errno == EACCES) ? EAGAIN : errno ? errno : -1;
+ DBUG_RETURN(-1);
+ }
+
DBUG_PRINT("info",("Was locked, trying with alarm"));
ALARM_INIT;
while ((value=fcntl(fd,F_SETLKW,&lock)) && ! ALARM_TEST &&
diff --git a/mysys/my_open.c b/mysys/my_open.c
index 938dbc5dde2..fe7f65c450b 100644
--- a/mysys/my_open.c
+++ b/mysys/my_open.c
@@ -71,6 +71,7 @@ File my_open(const char *FileName, int Flags, myf MyFlags)
#else
fd = open((char *) FileName, Flags);
#endif
+
DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_OPEN,
EE_FILENOTFOUND, MyFlags));
} /* my_open */
@@ -124,61 +125,66 @@ int my_close(File fd, myf MyFlags)
SYNOPSIS
my_register_filename()
- fd
- FileName
- type_file_type
+ fd File number opened, -1 if error on open
+ FileName File name
+ type_file_type How file was created
+ error_message_number Error message number if caller got error (fd == -1)
+ MyFlags Flags for my_close()
+
+ RETURN
+ -1 error
+ # Filenumber
+
*/
File my_register_filename(File fd, const char *FileName, enum file_type
type_of_file, uint error_message_number, myf MyFlags)
{
+ DBUG_ENTER("my_register_filename");
if ((int) fd >= 0)
{
if ((uint) fd >= my_file_limit)
{
#if defined(THREAD) && !defined(HAVE_PREAD)
- (void) my_close(fd,MyFlags);
- my_errno=EMFILE;
- if (MyFlags & (MY_FFNF | MY_FAE | MY_WME))
- my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG),
- FileName, my_errno);
- return(-1);
-#endif
+ my_errno= EMFILE;
+#else
thread_safe_increment(my_file_opened,&THR_LOCK_open);
- return(fd); /* safeguard */
+ DBUG_RETURN(fd); /* safeguard */
+#endif
}
- pthread_mutex_lock(&THR_LOCK_open);
- if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags)))
+ else
{
- my_file_opened++;
- my_file_total_opened++;
- my_file_info[fd].type = type_of_file;
+ pthread_mutex_lock(&THR_LOCK_open);
+ if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags)))
+ {
+ my_file_opened++;
+ my_file_total_opened++;
+ my_file_info[fd].type = type_of_file;
#if defined(THREAD) && !defined(HAVE_PREAD)
- pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST);
+ pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST);
#endif
+ pthread_mutex_unlock(&THR_LOCK_open);
+ DBUG_PRINT("exit",("fd: %d",fd));
+ DBUG_RETURN(fd);
+ }
pthread_mutex_unlock(&THR_LOCK_open);
- DBUG_PRINT("exit",("fd: %d",fd));
- return(fd);
+ my_errno= ENOMEM;
}
- pthread_mutex_unlock(&THR_LOCK_open);
(void) my_close(fd, MyFlags);
- my_errno=ENOMEM;
}
else
- my_errno=errno;
- DBUG_PRINT("error",("Got error %d on open",my_errno));
- if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) {
- if (my_errno == EMFILE) {
- DBUG_PRINT("error",("print err: %d",EE_OUT_OF_FILERESOURCES));
- my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG),
- FileName, my_errno);
- } else {
- DBUG_PRINT("error",("print err: %d",error_message_number));
- my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG),
- FileName, my_errno);
- }
+ my_errno= errno;
+
+ DBUG_PRINT("error",("Got error %d on open", my_errno));
+ if (MyFlags & (MY_FFNF | MY_FAE | MY_WME))
+ {
+ if (my_errno == EMFILE)
+ error_message_number= EE_OUT_OF_FILERESOURCES;
+ DBUG_PRINT("error",("print err: %d",error_message_number));
+ my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG),
+ FileName, my_errno);
}
- return(fd);
+ DBUG_RETURN(-1);
}
#ifdef __WIN__
diff --git a/mysys/my_pread.c b/mysys/my_pread.c
index 6e98132db73..e0218cd1f1f 100644
--- a/mysys/my_pread.c
+++ b/mysys/my_pread.c
@@ -15,6 +15,7 @@
#include "mysys_priv.h"
#include "mysys_err.h"
+#include "my_base.h"
#include <errno.h>
#ifdef HAVE_PREAD
#include <unistd.h>
@@ -47,7 +48,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset,
size_t readbytes;
int error= 0;
DBUG_ENTER("my_pread");
- DBUG_PRINT("my",("Fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d",
+ DBUG_PRINT("my",("fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d",
Filedes, (ulong) offset, (long) Buffer, (uint) Count,
MyFlags));
for (;;)
@@ -63,12 +64,16 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset,
pthread_mutex_unlock(&my_file_info[Filedes].mutex);
#else
if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count)))
+ {
my_errno= errno;
+ if (errno == 0 || (errno == -1 && (MyFlags & (MY_NABP | MY_FNABP))))
+ my_errno= HA_ERR_FILE_TOO_SHORT;
+ }
#endif
if (error || readbytes != Count)
{
DBUG_PRINT("warning",("Read only %d bytes off %u from %d, errno: %d",
- (int) readbytes, (uint) Count,Filedes,my_errno));
+ (int) readbytes, (uint) Count,Filedes,my_errno));
#ifdef THREAD
if ((readbytes == 0 || readbytes == (size_t) -1) && errno == EINTR)
{
@@ -115,7 +120,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset,
RETURN
(size_t) -1 Error
# Number of bytes read
- */
+*/
size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count,
my_off_t offset, myf MyFlags)
@@ -123,7 +128,7 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count,
size_t writenbytes, written;
uint errors;
DBUG_ENTER("my_pwrite");
- DBUG_PRINT("my",("Fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d",
+ DBUG_PRINT("my",("fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d",
Filedes, (ulong) offset, (long) Buffer, (uint) Count,
MyFlags));
errors= 0;
diff --git a/mysys/my_read.c b/mysys/my_read.c
index f3e8a4b300e..63f1d4fdebd 100644
--- a/mysys/my_read.c
+++ b/mysys/my_read.c
@@ -15,9 +15,9 @@
#include "mysys_priv.h"
#include "mysys_err.h"
+#include <my_base.h>
#include <errno.h>
-
/*
Read a chunk of bytes from a file with retry's if needed
@@ -37,7 +37,7 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags)
{
size_t readbytes, save_count;
DBUG_ENTER("my_read");
- DBUG_PRINT("my",("Fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d",
+ DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d",
Filedes, (long) Buffer, (ulong) Count, MyFlags));
save_count= Count;
@@ -46,7 +46,9 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags)
errno= 0; /* Linux doesn't reset this */
if ((readbytes= read(Filedes, Buffer, Count)) != Count)
{
- my_errno= errno ? errno : -1;
+ my_errno= errno;
+ if (errno == 0 || (errno == -1 && (MyFlags & (MY_NABP | MY_FNABP))))
+ my_errno= HA_ERR_FILE_TOO_SHORT;
DBUG_PRINT("warning",("Read only %d bytes off %lu from %d, errno: %d",
(int) readbytes, (ulong) Count, Filedes,
my_errno));
diff --git a/mysys/my_realloc.c b/mysys/my_realloc.c
index c7cf1323cd4..828890a0dc2 100644
--- a/mysys/my_realloc.c
+++ b/mysys/my_realloc.c
@@ -22,6 +22,17 @@
/* My memory re allocator */
+/**
+ @brief wrapper around realloc()
+
+ @param oldpoint pointer to currently allocated area
+ @param size new size requested, must be >0
+ @param my_flags flags
+
+ @note if size==0 realloc() may return NULL; my_realloc() treats this as an
+ error which is not the intention of realloc()
+*/
+
void* my_realloc(void* oldpoint, size_t size, myf my_flags)
{
void *point;
@@ -29,6 +40,7 @@ void* my_realloc(void* oldpoint, size_t size, myf my_flags)
DBUG_PRINT("my",("ptr: 0x%lx size: %lu my_flags: %d", (long) oldpoint,
(ulong) size, my_flags));
+ DBUG_ASSERT(size > 0);
if (!oldpoint && (my_flags & MY_ALLOW_ZERO_PTR))
DBUG_RETURN(my_malloc(size,my_flags));
#ifdef USE_HALLOC
diff --git a/mysys/my_rename.c b/mysys/my_rename.c
index 6a6aa6a5796..39e6056a9e4 100644
--- a/mysys/my_rename.c
+++ b/mysys/my_rename.c
@@ -16,8 +16,9 @@
#include "mysys_priv.h"
#include <my_dir.h>
#include "mysys_err.h"
-
+#include "m_string.h"
#undef my_rename
+
/* On unix rename deletes to file if it exists */
int my_rename(const char *from, const char *to, myf MyFlags)
@@ -60,5 +61,19 @@ int my_rename(const char *from, const char *to, myf MyFlags)
if (MyFlags & (MY_FAE+MY_WME))
my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno);
}
+ else if (MyFlags & MY_SYNC_DIR)
+ {
+#ifdef NEED_EXPLICIT_SYNC_DIR
+ /* do only the needed amount of syncs: */
+ char dir_from[FN_REFLEN], dir_to[FN_REFLEN];
+ size_t dir_from_length, dir_to_length;
+ dirname_part(dir_from, from, &dir_from_length);
+ dirname_part(dir_to, to, &dir_to_length);
+ if (my_sync_dir(dir_from, MyFlags) ||
+ (strcmp(dir_from, dir_to) &&
+ my_sync_dir(dir_to, MyFlags)))
+ error= -1;
+#endif
+ }
DBUG_RETURN(error);
} /* my_rename */
diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c
new file mode 100644
index 00000000000..b7dca0f2afd
--- /dev/null
+++ b/mysys/my_rnd.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2007 MySQL AB & Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "mysys_priv.h"
+#include <m_string.h>
+
+/*
+ Initialize random generator
+
+ NOTES
+ MySQL's password checks depends on this, so don't do any changes
+ that changes the random numbers that are generated!
+*/
+
+void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2)
+{
+#ifdef HAVE_purify
+ bzero((char*) rand_st,sizeof(*rand_st)); /* Avoid UMC varnings */
+#endif
+ rand_st->max_value= 0x3FFFFFFFL;
+ rand_st->max_value_dbl=(double) rand_st->max_value;
+ rand_st->seed1=seed1%rand_st->max_value ;
+ rand_st->seed2=seed2%rand_st->max_value;
+}
+
+
+/*
+ Generate random number.
+
+ SYNOPSIS
+ my_rnd()
+ rand_st INOUT Structure used for number generation
+
+ RETURN VALUE
+ generated pseudo random number
+*/
+
+double my_rnd(struct my_rnd_struct *rand_st)
+{
+ rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
+ rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
+ return (((double) rand_st->seed1)/rand_st->max_value_dbl);
+}
diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c
new file mode 100644
index 00000000000..b3d6439793c
--- /dev/null
+++ b/mysys/my_safehash.c
@@ -0,0 +1,297 @@
+/* Copyright (C) 2003-2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Handling of multiple key caches
+
+ The idea is to have a thread safe hash on the table name,
+ with a default key cache value that is returned if the table name is not in
+ the cache.
+*/
+
+#include "mysys_priv.h"
+#include <m_string.h>
+#include "my_safehash.h"
+
+/*****************************************************************************
+ General functions to handle SAFE_HASH objects.
+
+ A SAFE_HASH object is used to store the hash, the mutex and default value
+ needed by the rest of the key cache code.
+ This is a separate struct to make it easy to later reuse the code for other
+ purposes
+
+ All entries are linked in a list to allow us to traverse all elements
+ and delete selected ones. (HASH doesn't allow any easy ways to do this).
+*****************************************************************************/
+
+
+/*
+ Free a SAFE_HASH_ENTRY
+
+ SYNOPSIS
+ safe_hash_entry_free()
+ entry The entry which should be freed
+
+ NOTE
+ This function is called by the hash object on delete
+*/
+
+static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry)
+{
+ DBUG_ENTER("safe_hash_entry_free");
+ my_free((uchar*) entry, MYF(0));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Get key and length for a SAFE_HASH_ENTRY
+
+ SYNOPSIS
+ safe_hash_entry_get()
+ entry The entry for which the key should be returned
+ length Length of the key
+
+ RETURN
+ # reference on the key
+*/
+
+static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length,
+ my_bool not_used __attribute__((unused)))
+{
+ *length= entry->length;
+ return (uchar*) entry->key;
+}
+
+
+/*
+ Init a SAFE_HASH object
+
+ SYNOPSIS
+ safe_hash_init()
+ hash safe_hash handler
+ elements Expected max number of elements
+ default_value default value
+
+ NOTES
+ In case of error we set hash->default_value to 0 to allow one to call
+ safe_hash_free on an object that couldn't be initialized.
+
+ RETURN
+ 0 OK
+ 1 error
+*/
+
+my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
+ uchar *default_value)
+{
+ DBUG_ENTER("safe_hash_init");
+ if (hash_init(&hash->hash, &my_charset_bin, elements,
+ 0, 0, (hash_get_key) safe_hash_entry_get,
+ (void (*)(void*)) safe_hash_entry_free, 0))
+ {
+ hash->default_value= 0;
+ DBUG_RETURN(1);
+ }
+ my_rwlock_init(&hash->mutex, 0);
+ hash->default_value= default_value;
+ hash->root= 0;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Free a SAFE_HASH object
+
+ SYNOPSIS
+ safe_hash_free()
+ hash Hash handle
+
+ NOTES
+ This is safe to call on any object that has been sent to safe_hash_init()
+*/
+
+void safe_hash_free(SAFE_HASH *hash)
+{
+ /*
+ Test if safe_hash_init succeeded. This will also guard us against multiple
+ free calls.
+ */
+ if (hash->default_value)
+ {
+ hash_free(&hash->hash);
+ rwlock_destroy(&hash->mutex);
+ hash->default_value=0;
+ }
+}
+
+
+/*
+ Return the value stored for a key or default value if no key
+
+ SYNOPSIS
+ safe_hash_search()
+ hash Hash handle
+ key key (path to table etc..)
+ length Length of key
+ def Default value of data
+
+ RETURN
+ # data associated with the key of default value if data was not found
+*/
+
+uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length,
+ uchar *def)
+{
+ uchar *result;
+ DBUG_ENTER("safe_hash_search");
+ rw_rdlock(&hash->mutex);
+ result= hash_search(&hash->hash, key, length);
+ rw_unlock(&hash->mutex);
+ if (!result)
+ result= def;
+ else
+ result= ((SAFE_HASH_ENTRY*) result)->data;
+ DBUG_PRINT("exit",("data: 0x%lx", (long) result));
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Associate a key with some data
+
+ SYNOPSIS
+ safe_hash_set()
+ hash Hash handle
+ key key (path to table etc..)
+ length Length of key
+ data data to to associate with the data
+
+ NOTES
+ This can be used both to insert a new entry and change an existing
+ entry.
+ If one associates a key with the default key cache, the key is deleted
+
+ RETURN
+ 0 OK
+ 1 error (Can only be EOM). In this case my_message() is called.
+*/
+
+my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length,
+ uchar *data)
+{
+ SAFE_HASH_ENTRY *entry;
+ my_bool error= 0;
+ DBUG_ENTER("safe_hash_set");
+ DBUG_PRINT("enter",("key: %.*s data: 0x%lx", length, key, (long) data));
+
+ rw_wrlock(&hash->mutex);
+ entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length);
+
+ if (data == hash->default_value)
+ {
+ /*
+ The key is to be associated with the default entry. In this case
+ we can just delete the entry (if it existed) from the hash as a
+ search will return the default entry
+ */
+ if (!entry) /* nothing to do */
+ goto end;
+ /* unlink entry from list */
+ if ((*entry->prev= entry->next))
+ entry->next->prev= entry->prev;
+ hash_delete(&hash->hash, (uchar*) entry);
+ goto end;
+ }
+ if (entry)
+ {
+ /* Entry existed; Just change the pointer to point at the new data */
+ entry->data= data;
+ }
+ else
+ {
+ if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length,
+ MYF(MY_WME))))
+ {
+ error= 1;
+ goto end;
+ }
+ entry->key= (uchar*) (entry +1);
+ memcpy((char*) entry->key, (char*) key, length);
+ entry->length= length;
+ entry->data= data;
+ /* Link entry to list */
+ if ((entry->next= hash->root))
+ entry->next->prev= &entry->next;
+ entry->prev= &hash->root;
+ hash->root= entry;
+ if (my_hash_insert(&hash->hash, (uchar*) entry))
+ {
+ /* This can only happen if hash got out of memory */
+ my_free((char*) entry, MYF(0));
+ error= 1;
+ goto end;
+ }
+ }
+
+end:
+ rw_unlock(&hash->mutex);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Change all entries with one data value to another data value
+
+ SYNOPSIS
+ safe_hash_change()
+ hash Hash handle
+ old_data Old data
+ new_data Change all 'old_data' to this
+
+ NOTES
+ We use the linked list to traverse all elements in the hash as
+ this allows us to delete elements in the case where 'new_data' is the
+ default value.
+*/
+
+void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data)
+{
+ SAFE_HASH_ENTRY *entry, *next;
+ DBUG_ENTER("safe_hash_change");
+
+ rw_wrlock(&hash->mutex);
+
+ for (entry= hash->root ; entry ; entry= next)
+ {
+ next= entry->next;
+ if (entry->data == old_data)
+ {
+ if (new_data == hash->default_value)
+ {
+ if ((*entry->prev= entry->next))
+ entry->next->prev= entry->prev;
+ hash_delete(&hash->hash, (uchar*) entry);
+ }
+ else
+ entry->data= new_data;
+ }
+ }
+
+ rw_unlock(&hash->mutex);
+ DBUG_VOID_RETURN;
+}
diff --git a/mysys/my_safehash.h b/mysys/my_safehash.h
new file mode 100644
index 00000000000..8a5856b6763
--- /dev/null
+++ b/mysys/my_safehash.h
@@ -0,0 +1,58 @@
+/* Copyright (C) 2003 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Handling of multiple key caches
+
+ The idea is to have a thread safe hash on the table name,
+ with a default key cache value that is returned if the table name is not in
+ the cache.
+*/
+
+#include <hash.h>
+
+/*
+ Struct to store a key and pointer to object
+*/
+
+typedef struct st_safe_hash_entry
+{
+ uchar *key;
+ uint length;
+ uchar *data;
+ struct st_safe_hash_entry *next, **prev;
+} SAFE_HASH_ENTRY;
+
+
+typedef struct st_safe_hash_with_default
+{
+#ifdef THREAD
+ rw_lock_t mutex;
+#endif
+ HASH hash;
+ uchar *default_value;
+ SAFE_HASH_ENTRY *root;
+} SAFE_HASH;
+
+
+my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
+ uchar *default_value);
+void safe_hash_free(SAFE_HASH *hash);
+uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length,
+ uchar *def);
+my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length,
+ uchar *data);
+void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data);
diff --git a/mysys/my_seek.c b/mysys/my_seek.c
index 2c661baeff7..4e18b510a1e 100644
--- a/mysys/my_seek.c
+++ b/mysys/my_seek.c
@@ -47,7 +47,7 @@ my_off_t my_seek(File fd, my_off_t pos, int whence,
{
reg1 os_off_t newpos= -1;
DBUG_ENTER("my_seek");
- DBUG_PRINT("my",("Fd: %d Hpos: %lu Pos: %lu Whence: %d MyFlags: %d",
+ DBUG_PRINT("my",("fd: %d Hpos: %lu Pos: %lu Whence: %d MyFlags: %d",
fd, (ulong) (((ulonglong) pos) >> 32), (ulong) pos,
whence, MyFlags));
DBUG_ASSERT(pos != MY_FILEPOS_ERROR); /* safety check */
@@ -87,7 +87,7 @@ my_off_t my_tell(File fd, myf MyFlags __attribute__((unused)))
{
os_off_t pos;
DBUG_ENTER("my_tell");
- DBUG_PRINT("my",("Fd: %d MyFlags: %d",fd, MyFlags));
+ DBUG_PRINT("my",("fd: %d MyFlags: %d",fd, MyFlags));
DBUG_ASSERT(fd >= 0);
#ifdef HAVE_TELL
pos=tell(fd);
diff --git a/mysys/my_static.c b/mysys/my_static.c
index cb482b19b57..ef25a89bad9 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -26,7 +26,7 @@ my_bool timed_mutexes= 0;
/* from my_init */
char * home_dir=0;
-const char *my_progname=0;
+const char *my_progname= NULL, *my_progname_short= NULL;
char NEAR curr_dir[FN_REFLEN]= {0},
NEAR home_dir_buff[FN_REFLEN]= {0};
ulong my_stream_opened=0,my_file_opened=0, my_tmp_file_created=0;
diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c
index 810c0c72632..98059ccd508 100644
--- a/mysys/my_symlink.c
+++ b/mysys/my_symlink.c
@@ -84,6 +84,8 @@ int my_symlink(const char *content, const char *linkname, myf MyFlags)
if (MyFlags & MY_WME)
my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno);
}
+ else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags))
+ result= -1;
DBUG_RETURN(result);
#endif /* HAVE_READLINK */
}
diff --git a/mysys/my_sync.c b/mysys/my_sync.c
index 64fce3aac21..1b8420c034e 100644
--- a/mysys/my_sync.c
+++ b/mysys/my_sync.c
@@ -44,10 +44,20 @@ int my_sync(File fd, myf my_flags)
{
int res;
DBUG_ENTER("my_sync");
- DBUG_PRINT("my",("Fd: %d my_flags: %d", fd, my_flags));
+ DBUG_PRINT("my",("fd: %d my_flags: %d", fd, my_flags));
do
{
+#if defined(F_FULLFSYNC)
+ /*
+ In Mac OS X >= 10.3 this call is safer than fsync() (it forces the
+ disk's cache and guarantees ordered writes).
+ */
+ if (!(res= fcntl(fd, F_FULLFSYNC, 0)))
+ break; /* ok */
+ /* Some file systems don't support F_FULLFSYNC and fail above: */
+ DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back"));
+#endif
#if defined(HAVE_FDATASYNC)
res= fdatasync(fd);
#elif defined(HAVE_FSYNC)
@@ -55,6 +65,7 @@ int my_sync(File fd, myf my_flags)
#elif defined(__WIN__)
res= _commit(fd);
#else
+#error Cannot find a way to sync a file, durability in danger
res= 0; /* No sync (strange OS) */
#endif
} while (res == -1 && errno == EINTR);
@@ -66,10 +77,79 @@ int my_sync(File fd, myf my_flags)
my_errno= -1; /* Unknown error */
if ((my_flags & MY_IGNORE_BADFD) &&
(er == EBADF || er == EINVAL || er == EROFS))
+ {
+ DBUG_PRINT("info", ("ignoring errno %d", er));
res= 0;
+ }
else if (my_flags & MY_WME)
my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno);
}
DBUG_RETURN(res);
} /* my_sync */
+
+static const char cur_dir_name[]= {FN_CURLIB, 0};
+/*
+ Force directory information to disk.
+
+ SYNOPSIS
+ my_sync_dir()
+ dir_name the name of the directory
+ my_flags flags (MY_WME etc)
+
+ RETURN
+ 0 if ok, !=0 if error
+*/
+int my_sync_dir(const char *dir_name, myf my_flags)
+{
+#ifdef NEED_EXPLICIT_SYNC_DIR
+ DBUG_ENTER("my_sync_dir");
+ DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags));
+ File dir_fd;
+ int res= 0;
+ const char *correct_dir_name;
+ /* Sometimes the path does not contain an explicit directory */
+ correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name;
+ /*
+ Syncing a dir may give EINVAL on tmpfs on Linux, which is ok.
+ EIO on the other hand is very important. Hence MY_IGNORE_BADFD.
+ */
+ if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0)
+ {
+ if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD)))
+ res= 2;
+ if (my_close(dir_fd, MYF(my_flags)))
+ res= 3;
+ }
+ else
+ res= 1;
+ DBUG_RETURN(res);
+#else
+ return 0;
+#endif
+}
+
+
+/*
+ Force directory information to disk.
+
+ SYNOPSIS
+ my_sync_dir_by_file()
+ file_name the name of a file in the directory
+ my_flags flags (MY_WME etc)
+
+ RETURN
+ 0 if ok, !=0 if error
+*/
+int my_sync_dir_by_file(const char *file_name, myf my_flags)
+{
+#ifdef NEED_EXPLICIT_SYNC_DIR
+ char dir_name[FN_REFLEN];
+ size_t dir_name_length;
+ dirname_part(dir_name, file_name, &dir_name_length);
+ return my_sync_dir(dir_name, my_flags);
+#else
+ return 0;
+#endif
+}
+
diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c
index 1ba6e5ac92d..aadb86d39ed 100644
--- a/mysys/my_thr_init.c
+++ b/mysys/my_thr_init.c
@@ -289,12 +289,12 @@ my_bool my_thread_init(void)
#endif
pthread_mutex_init(&tmp->mutex,MY_MUTEX_INIT_FAST);
pthread_cond_init(&tmp->suspend, NULL);
- tmp->init= 1;
pthread_mutex_lock(&THR_LOCK_threads);
tmp->id= ++thread_id;
++THR_thread_count;
pthread_mutex_unlock(&THR_LOCK_threads);
+ tmp->init= 1;
#ifndef DBUG_OFF
/* Generate unique name for thread */
(void) my_thread_name();
@@ -347,6 +347,9 @@ void my_thread_end(void)
tmp->init= 0;
#endif
+#if !defined(__WIN__) || defined(USE_TLS)
+ pthread_setspecific(THR_KEY_mysys,0);
+#endif
/*
Decrement counter for number of running threads. We are using this
in my_thread_global_end() to wait until all threads have called
@@ -359,10 +362,12 @@ void my_thread_end(void)
pthread_cond_signal(&THR_COND_threads);
pthread_mutex_unlock(&THR_LOCK_threads);
}
- /* The following free has to be done, even if my_thread_var() is 0 */
+ else
+ {
#if !defined(__WIN__) || defined(USE_TLS)
- pthread_setspecific(THR_KEY_mysys,0);
+ pthread_setspecific(THR_KEY_mysys,0);
#endif
+ }
}
struct st_my_thread_var *_my_thread_var(void)
@@ -380,6 +385,16 @@ struct st_my_thread_var *_my_thread_var(void)
return tmp;
}
+#ifndef DBUG_OFF
+/* Return pointer to DBUG for holding current state */
+
+extern void **my_thread_var_dbug()
+{
+ struct st_my_thread_var *tmp=
+ my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys);
+ return tmp && tmp->init ? &tmp->dbug : 0;
+}
+#endif
/****************************************************************************
Get name of current thread.
diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c
new file mode 100644
index 00000000000..d97aaf604fa
--- /dev/null
+++ b/mysys/my_uuid.c
@@ -0,0 +1,178 @@
+/* Copyright (C) 2007 MySQL AB, Sergei Golubchik & Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ implements Universal Unique Identifiers (UUIDs), as in
+ DCE 1.1: Remote Procedure Call,
+ Open Group Technical Standard Document Number C706, October 1997,
+ (supersedes C309 DCE: Remote Procedure Call 8/1994,
+ which was basis for ISO/IEC 11578:1996 specification)
+
+ A UUID has the following structure:
+
+ Field NDR Data Type Octet # Note
+ time_low unsigned long 0-3 The low field of the
+ timestamp.
+ time_mid unsigned short 4-5 The middle field of
+ the timestamp.
+ time_hi_and_version unsigned short 6-7 The high field of the
+ timestamp multiplexed
+ with the version number.
+ clock_seq_hi_and_reserved unsigned small 8 The high field of the
+ clock sequence multi-
+ plexed with the variant.
+ clock_seq_low unsigned small 9 The low field of the
+ clock sequence.
+ node character 10-15 The spatially unique node
+ identifier.
+*/
+
+#include "mysys_priv.h"
+#include <m_string.h>
+
+static my_bool my_uuid_inited= 0;
+static struct my_rnd_struct uuid_rand;
+static uint nanoseq;
+static ulonglong uuid_time= 0;
+static uchar uuid_suffix[2+6]; /* clock_seq and node */
+
+#ifdef THREAD
+pthread_mutex_t LOCK_uuid_generator;
+#endif
+
+/*
+ Number of 100-nanosecond intervals between
+ 1582-10-15 00:00:00.00 and 1970-01-01 00:00:00.00
+*/
+
+#define UUID_TIME_OFFSET ((ulonglong) 141427 * 24 * 60 * 60 * 1000 * 10)
+#define UUID_VERSION 0x1000
+#define UUID_VARIANT 0x8000
+
+
+/* Helper function */
+
+static void set_clock_seq()
+{
+ uint16 clock_seq= ((uint)(my_rnd(&uuid_rand)*16383)) | UUID_VARIANT;
+ int2store(uuid_suffix, clock_seq);
+}
+
+
+/**
+ Init structures needed for my_uuid
+
+ @func my_uuid_init()
+ @param seed1 Seed for random generator
+ @param seed2 Seed for random generator
+
+ @note
+ Seed1 & seed2 should NOT depend on clock. This is to be able to
+ generate a random mac address according to UUID specs.
+*/
+
+void my_uuid_init(ulong seed1, ulong seed2)
+{
+ uchar *mac= uuid_suffix+2;
+ ulonglong now;
+
+ if (my_uuid_inited)
+ return;
+ my_uuid_inited= 1;
+ now= my_getsystime();
+ nanoseq= 0;
+
+ if (my_gethwaddr(mac))
+ {
+ uint i;
+ /*
+ Generating random "hardware addr"
+
+ Specs explicitly specify that node identifier should NOT
+ correlate with a clock_seq value, so we use a separate
+ randominit() here.
+ */
+ /* purecov: begin inspected */
+ my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), now+random());
+ for (i=0; i < sizeof(mac); i++)
+ mac[i]= (uchar)(my_rnd(&uuid_rand)*255);
+ /* purecov: end */
+ }
+ my_rnd_init(&uuid_rand, (ulong) (seed1 + now), (ulong) (now/2+ getpid()));
+ set_clock_seq();
+ pthread_mutex_init(&LOCK_uuid_generator, MY_MUTEX_INIT_FAST);
+}
+
+
+/**
+ Create a global unique identifier (uuid)
+
+ @func my_uuid()
+ @param to Store uuid here. Must be of size MY_uuid_SIZE (16)
+*/
+
+void my_uuid(uchar *to)
+{
+ ulonglong tv;
+ uint32 time_low;
+ uint16 time_mid, time_hi_and_version;
+
+ DBUG_ASSERT(my_uuid_inited);
+
+ pthread_mutex_lock(&LOCK_uuid_generator);
+ tv= my_getsystime() + UUID_TIME_OFFSET + nanoseq;
+ if (unlikely(tv < uuid_time))
+ set_clock_seq();
+ else if (unlikely(tv == uuid_time))
+ {
+ /* special protection for low-res system clocks */
+ nanoseq++;
+ tv++;
+ }
+ else
+ {
+ if (nanoseq && likely(tv-nanoseq >= uuid_time))
+ {
+ tv-=nanoseq;
+ nanoseq=0;
+ }
+ }
+ uuid_time=tv;
+ pthread_mutex_unlock(&LOCK_uuid_generator);
+
+ time_low= (uint32) (tv & 0xFFFFFFFF);
+ time_mid= (uint16) ((tv >> 32) & 0xFFFF);
+ time_hi_and_version= (uint16) ((tv >> 48) | UUID_VERSION);
+
+ /*
+ Note, that the standard does NOT specify byte ordering in
+ multi-byte fields. it's implementation defined (but must be
+ the same for all fields).
+ */
+ int4store(to, time_low);
+ int2store(to+4, time_mid);
+ int2store(to+6, time_hi_and_version);
+ bmove(to+8, uuid_suffix, sizeof(uuid_suffix));
+}
+
+
+void my_uuid_end()
+{
+ if (my_uuid_inited)
+ {
+ my_uuid_inited= 0;
+ pthread_mutex_destroy(&LOCK_uuid_generator);
+ }
+}
diff --git a/mysys/my_write.c b/mysys/my_write.c
index c67b1d8f3f2..7f8b85c241e 100644
--- a/mysys/my_write.c
+++ b/mysys/my_write.c
@@ -25,7 +25,7 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags)
size_t writenbytes, written;
uint errors;
DBUG_ENTER("my_write");
- DBUG_PRINT("my",("Fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d",
+ DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d",
Filedes, (long) Buffer, (ulong) Count, MyFlags));
errors=0; written=0;
diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c
index 7a2f448b2dc..6c8a080fbf3 100644
--- a/mysys/safemalloc.c
+++ b/mysys/safemalloc.c
@@ -430,6 +430,28 @@ void TERMINATE(FILE *file, uint flag)
}
+/*
+ Report where a piece of memory was allocated
+
+ This is usefull to call from withing a debugger
+*/
+
+void sf_malloc_report_allocated(void *memory)
+{
+ struct st_irem *irem;
+ for (irem= sf_malloc_root ; irem ; irem=irem->next)
+ {
+ char *data= (((char*) irem) + ALIGN_SIZE(sizeof(struct st_irem)) +
+ sf_malloc_prehunc);
+ if (data <= (char*) memory && (char*) memory <= data + irem->datasize)
+ {
+ printf("%u bytes at 0x%lx, allocated at line %u in '%s'\n",
+ irem->datasize, (long) data, irem->linenum, irem->filename);
+ break;
+ }
+ }
+}
+
/* Returns 0 if chunk is ok */
static int _checkchunk(register struct st_irem *irem, const char *filename,
diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c
index 7f7be4835a5..ff70faaefea 100644
--- a/mysys/thr_lock.c
+++ b/mysys/thr_lock.c
@@ -24,7 +24,7 @@ Locks are prioritized according to:
The current lock types are:
-TL_READ # Low priority read
+TL_READ # Low priority read
TL_READ_WITH_SHARED_LOCKS
TL_READ_HIGH_PRIORITY # High priority read
TL_READ_NO_INSERT # Read without concurrent inserts
@@ -57,8 +57,12 @@ check_status:
In MyISAM this is a simple check if the insert can be done
at the end of the datafile.
update_status:
- Before a write lock is released, this function is called.
- In MyISAM this functions updates the count and length of the datafile
+ in thr_reschedule_write_lock(), when an insert delayed thread
+ downgrades TL_WRITE lock to TL_WRITE_DELAYED, to allow SELECT
+ threads to proceed.
+ A storage engine should also call update_status internally
+ in the ::external_lock(F_UNLCK) method.
+ In MyISAM and CSV this functions updates the length of the datafile.
get_status:
When one gets a lock this functions is called.
In MyISAM this stores the number of rows and size of the datafile
@@ -783,16 +787,6 @@ void thr_unlock(THR_LOCK_DATA *data)
}
else
lock->write.last=data->prev;
- if (lock_type >= TL_WRITE_CONCURRENT_INSERT)
- {
- if (lock->update_status)
- (*lock->update_status)(data->status_param);
- }
- else
- {
- if (lock->restore_status)
- (*lock->restore_status)(data->status_param);
- }
if (lock_type == TL_READ_NO_INSERT)
lock->read_no_write_count--;
data->type=TL_UNLOCK; /* Mark unlocked */
diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c
index 49003553f0b..aa46021a938 100644
--- a/mysys/thr_mutex.c
+++ b/mysys/thr_mutex.c
@@ -54,7 +54,7 @@ void safe_mutex_global_init(void)
int safe_mutex_init(safe_mutex_t *mp,
const pthread_mutexattr_t *attr __attribute__((unused)),
const char *file,
- uint line)
+ uint line, const char *name)
{
bzero((char*) mp,sizeof(*mp));
pthread_mutex_init(&mp->global,MY_MUTEX_INIT_ERRCHK);
@@ -62,6 +62,8 @@ int safe_mutex_init(safe_mutex_t *mp,
/* Mark that mutex is initialized */
mp->file= file;
mp->line= line;
+ /* Skip the very common '&' prefix from the autogenerated name */
+ mp->name= name[0] == '&' ? name + 1 : name;
#ifdef SAFE_MUTEX_DETECT_DESTROY
/*
@@ -94,6 +96,8 @@ int safe_mutex_init(safe_mutex_t *mp,
int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint line)
{
int error;
+ DBUG_PRINT("mutex", ("%s (0x%lx) locking", mp->name ? mp->name : "Null",
+ (ulong) mp));
if (!mp->file)
{
fprintf(stderr,
@@ -150,22 +154,23 @@ int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint l
if (error || (error=pthread_mutex_lock(&mp->global)))
{
- fprintf(stderr,"Got error %d when trying to lock mutex at %s, line %d\n",
- error, file, line);
+ fprintf(stderr,"Got error %d when trying to lock mutex %s at %s, line %d\n",
+ error, mp->name, file, line);
fflush(stderr);
abort();
}
mp->thread= pthread_self();
if (mp->count++)
{
- fprintf(stderr,"safe_mutex: Error in thread libray: Got mutex at %s, \
-line %d more than 1 time\n", file,line);
+ fprintf(stderr,"safe_mutex: Error in thread libray: Got mutex %s at %s, "
+ "line %d more than 1 time\n", mp->name, file,line);
fflush(stderr);
abort();
}
mp->file= file;
- mp->line=line;
+ mp->line= line;
pthread_mutex_unlock(&mp->global);
+ DBUG_PRINT("mutex", ("%s (0x%lx) locked", mp->name, (ulong) mp));
return error;
}
@@ -173,18 +178,22 @@ line %d more than 1 time\n", file,line);
int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line)
{
int error;
+ DBUG_PRINT("mutex", ("%s (0x%lx) unlocking", mp->name, (ulong) mp));
pthread_mutex_lock(&mp->global);
if (mp->count == 0)
{
- fprintf(stderr,"safe_mutex: Trying to unlock mutex that wasn't locked at %s, line %d\n Last used at %s, line: %d\n",
- file,line,mp->file ? mp->file : "",mp->line);
+ fprintf(stderr,"safe_mutex: Trying to unlock mutex %s that wasn't locked at %s, line %d\n"
+ "Last used at %s, line: %d\n",
+ mp->name ? mp->name : "Null", file, line,
+ mp->file ? mp->file : "Null", mp->line);
fflush(stderr);
abort();
}
if (!pthread_equal(pthread_self(),mp->thread))
{
- fprintf(stderr,"safe_mutex: Trying to unlock mutex at %s, line %d that was locked by another thread at: %s, line: %d\n",
- file,line,mp->file,mp->line);
+ fprintf(stderr,"safe_mutex: Trying to unlock mutex %s at %s, line %d that was locked by "
+ "another thread at: %s, line: %d\n",
+ mp->name, file, line, mp->file, mp->line);
fflush(stderr);
abort();
}
@@ -197,7 +206,8 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line)
error=pthread_mutex_unlock(&mp->mutex);
if (error)
{
- fprintf(stderr,"safe_mutex: Got error: %d (%d) when trying to unlock mutex at %s, line %d\n", error, errno, file, line);
+ fprintf(stderr,"safe_mutex: Got error: %d (%d) when trying to unlock mutex %s at %s, "
+ "line %d\n", error, errno, mp->name, file, line);
fflush(stderr);
abort();
}
@@ -214,22 +224,24 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file,
pthread_mutex_lock(&mp->global);
if (mp->count == 0)
{
- fprintf(stderr,"safe_mutex: Trying to cond_wait on a unlocked mutex at %s, line %d\n",file,line);
+ fprintf(stderr,"safe_mutex: Trying to cond_wait on a unlocked mutex %s at %s, line %d\n",
+ mp->name ? mp->name : "Null", file, line);
fflush(stderr);
abort();
}
if (!pthread_equal(pthread_self(),mp->thread))
{
- fprintf(stderr,"safe_mutex: Trying to cond_wait on a mutex at %s, line %d that was locked by another thread at: %s, line: %d\n",
- file,line,mp->file,mp->line);
+ fprintf(stderr,"safe_mutex: Trying to cond_wait on a mutex %s at %s, line %d that was "
+ "locked by another thread at: %s, line: %d\n",
+ mp->name, file, line, mp->file, mp->line);
fflush(stderr);
abort();
}
if (mp->count-- != 1)
{
- fprintf(stderr,"safe_mutex: Count was %d on locked mutex at %s, line %d\n",
- mp->count+1, file, line);
+ fprintf(stderr,"safe_mutex: Count was %d on locked mutex %s at %s, line %d\n",
+ mp->count+1, mp->name, file, line);
fflush(stderr);
abort();
}
@@ -238,7 +250,8 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file,
pthread_mutex_lock(&mp->global);
if (error)
{
- fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait at %s, line %d\n", error, errno, file, line);
+ fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait on %s at %s, "
+ "line %d\n", error, errno, mp->name, file, line);
fflush(stderr);
abort();
}
@@ -246,8 +259,8 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file,
if (mp->count++)
{
fprintf(stderr,
- "safe_mutex: Count was %d in thread 0x%lx when locking mutex at %s, line %d\n",
- mp->count-1, my_thread_dbug_id(), file, line);
+ "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s at %s, line %d\n",
+ mp->count-1, my_thread_dbug_id(), mp->name, file, line);
fflush(stderr);
abort();
}
@@ -266,7 +279,8 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp,
pthread_mutex_lock(&mp->global);
if (mp->count != 1 || !pthread_equal(pthread_self(),mp->thread))
{
- fprintf(stderr,"safe_mutex: Trying to cond_wait at %s, line %d on a not hold mutex\n",file,line);
+ fprintf(stderr,"safe_mutex: Trying to cond_wait at %s, line %d on a not hold mutex %s\n",
+ file, line, mp->name ? mp->name : "Null");
fflush(stderr);
abort();
}
@@ -276,7 +290,10 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp,
#ifdef EXTRA_DEBUG
if (error && (error != EINTR && error != ETIMEDOUT && error != ETIME))
{
- fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait at %s, line %d\n", error, errno, file, line);
+ fprintf(stderr,
+ "safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait on %s at %s, "
+ "line %d\n",
+ error, errno, mp->name, file, line);
}
#endif
pthread_mutex_lock(&mp->global);
@@ -284,8 +301,10 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp,
if (mp->count++)
{
fprintf(stderr,
- "safe_mutex: Count was %d in thread 0x%lx when locking mutex at %s, line %d (error: %d (%d))\n",
- mp->count-1, my_thread_dbug_id(), file, line, error, error);
+ "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s at %s, line %d "
+ "(error: %d (%d))\n",
+ mp->count-1, my_thread_dbug_id(), mp->name, file, line,
+ error, error);
fflush(stderr);
abort();
}
@@ -309,8 +328,9 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line)
}
if (mp->count != 0)
{
- fprintf(stderr,"safe_mutex: Trying to destroy a mutex that was locked at %s, line %d at %s, line %d\n",
- mp->file,mp->line, file, line);
+ fprintf(stderr,"safe_mutex: Trying to destroy a mutex %s that was locked at %s, "
+ "line %d at %s, line %d\n",
+ mp->name, mp->file, mp->line, file, line);
fflush(stderr);
abort();
}
@@ -382,8 +402,8 @@ void safe_mutex_end(FILE *file __attribute__((unused)))
struct st_safe_mutex_info_t *ptr;
for (ptr= safe_mutex_root ; ptr ; ptr= ptr->next)
{
- fprintf(file, "\tMutex initiated at line %4u in '%s'\n",
- ptr->init_line, ptr->init_file);
+ fprintf(file, "\tMutex %s initiated at line %4u in '%s'\n",
+ ptr->name, ptr->init_line, ptr->init_file);
(void) fflush(file);
}
}
diff --git a/mysys/wqueue.c b/mysys/wqueue.c
new file mode 100644
index 00000000000..bfe9cba1235
--- /dev/null
+++ b/mysys/wqueue.c
@@ -0,0 +1,169 @@
+
+#include <wqueue.h>
+
+#define STRUCT_PTR(TYPE, MEMBER, a) \
+ (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
+/*
+ Link a thread into double-linked queue of waiting threads.
+
+ SYNOPSIS
+ wqueue_link_into_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Queue is represented by a circular list of the thread structures
+ The list is double-linked of the type (**prev,*next), accessed by
+ a pointer to the last element.
+*/
+
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ struct st_my_thread_var *last;
+ if (!(last= wqueue->last_thread))
+ {
+ /* Queue is empty */
+ thread->next= thread;
+ thread->prev= &thread->next;
+ }
+ else
+ {
+ thread->prev= last->next->prev;
+ last->next->prev= &thread->next;
+ thread->next= last->next;
+ last->next= thread;
+ }
+ wqueue->last_thread= thread;
+}
+
+
+/*
+ Add a thread to single-linked queue of waiting threads
+
+ SYNOPSIS
+ wqueue_add_to_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Queue is represented by a circular list of the thread structures
+ The list is single-linked of the type (*next), accessed by a pointer
+ to the last element.
+*/
+
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ struct st_my_thread_var *last;
+ if (!(last= wqueue->last_thread))
+ thread->next= thread;
+ else
+ {
+ thread->next= last->next;
+ last->next= thread;
+ }
+ wqueue->last_thread= thread;
+}
+
+/*
+ Unlink a thread from double-linked queue of waiting threads
+
+ SYNOPSIS
+ wqueue_unlink_from_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be removed from the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See NOTES for link_into_queue
+*/
+
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ if (thread->next == thread)
+ /* The queue contains only one member */
+ wqueue->last_thread= NULL;
+ else
+ {
+ thread->next->prev= thread->prev;
+ *thread->prev= thread->next;
+ if (wqueue->last_thread == thread)
+ wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
+ thread->prev);
+ }
+ thread->next= NULL;
+}
+
+
+/*
+ Remove all threads from queue signaling them to proceed
+
+ SYNOPSIS
+ wqueue_realease_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See notes for add_to_queue
+ When removed from the queue each thread is signaled via condition
+ variable thread->suspend.
+*/
+
+void wqueue_release_queue(WQUEUE *wqueue)
+{
+ struct st_my_thread_var *last= wqueue->last_thread;
+ struct st_my_thread_var *next= last->next;
+ struct st_my_thread_var *thread;
+ do
+ {
+ thread= next;
+ pthread_cond_signal(&thread->suspend);
+ next= thread->next;
+ thread->next= NULL;
+ }
+ while (thread != last);
+ wqueue->last_thread= NULL;
+}
+
+
+/*
+ Add thread and wait
+
+ SYNOPSYS
+ wqueue_add_and_wait()
+ wqueue queue to add to
+ thread thread which is waiting
+ lock mutex need for the operation
+*/
+
+void wqueue_add_and_wait(WQUEUE *wqueue,
+ struct st_my_thread_var *thread,
+ pthread_mutex_t *lock)
+{
+ DBUG_ENTER("wqueue_add_and_wait");
+ DBUG_PRINT("enter",
+ ("thread: 0x%lx cond: 0x%lx mutex: 0x%lx",
+ (ulong) thread, (ulong) &thread->suspend, (ulong) lock));
+ wqueue_add_to_queue(wqueue, thread);
+ do
+ {
+ DBUG_PRINT("info", ("wait... cond: 0x%lx mutex: 0x%lx",
+ (ulong) &thread->suspend, (ulong) lock));
+ pthread_cond_wait(&thread->suspend, lock);
+ DBUG_PRINT("info", ("wait done cond: 0x%lx mutex: 0x%lx next: 0x%lx",
+ (ulong) &thread->suspend, (ulong) lock,
+ (ulong) thread->next));
+ }
+ while (thread->next);
+ DBUG_VOID_RETURN;
+}
diff --git a/plugin/daemon_example/daemon_example.cc b/plugin/daemon_example/daemon_example.cc
index af585bb4302..e683bf7ab7a 100644
--- a/plugin/daemon_example/daemon_example.cc
+++ b/plugin/daemon_example/daemon_example.cc
@@ -81,7 +81,7 @@ pthread_handler_t mysql_heartbeat(void *p)
1 failure (cannot happen)
*/
-static int daemon_example_plugin_init(void *p)
+static int daemon_example_plugin_init(void *p __attribute__ ((unused)))
{
DBUG_ENTER("daemon_example_plugin_init");
@@ -147,7 +147,7 @@ static int daemon_example_plugin_init(void *p)
*/
-static int daemon_example_plugin_deinit(void *p)
+static int daemon_example_plugin_deinit(void *p __attribute__ ((unused)))
{
DBUG_ENTER("daemon_example_plugin_deinit");
char buffer[HEART_STRING_BUFFER];
diff --git a/server-tools/instance-manager/CMakeLists.txt b/server-tools/instance-manager/CMakeLists.txt
index 2b9bce56ff7..4b9c386afe1 100755
--- a/server-tools/instance-manager/CMakeLists.txt
+++ b/server-tools/instance-manager/CMakeLists.txt
@@ -25,7 +25,7 @@ ADD_EXECUTABLE(mysqlmanager buffer.cc command.cc commands.cc guardian.cc instanc
instance_options.cc listener.cc log.cc manager.cc messages.cc mysql_connection.cc
mysqlmanager.cc options.cc parse.cc parse_output.cc priv.cc protocol.cc
thread_registry.cc user_map.cc IMService.cpp WindowsService.cpp
- user_management_commands.cc
+ user_management_commands.cc ../../mysys/my_rnd.c
../../sql/net_serv.cc ../../sql-common/pack.c ../../sql/password.c
../../sql/sql_state.c ../../sql-common/client.c ../../libmysql/get_password.c
../../libmysql/errmsg.c)
diff --git a/server-tools/instance-manager/listener.cc b/server-tools/instance-manager/listener.cc
index 4d8a33e7db1..4c32d10ab09 100644
--- a/server-tools/instance-manager/listener.cc
+++ b/server-tools/instance-manager/listener.cc
@@ -17,12 +17,12 @@
#pragma implementation
#endif
-#include "listener.h"
-
#include <my_global.h>
#include <mysql.h>
-#include <violite.h>
+#include <my_sys.h>
+#include "listener.h"
+#include <violite.h>
#include <sys/stat.h>
#ifndef __WIN__
#include <sys/un.h>
diff --git a/server-tools/instance-manager/mysql_connection.cc b/server-tools/instance-manager/mysql_connection.cc
index 3233b7513a1..5da6c09e0b1 100644
--- a/server-tools/instance-manager/mysql_connection.cc
+++ b/server-tools/instance-manager/mysql_connection.cc
@@ -17,13 +17,12 @@
#pragma implementation
#endif
-#include "mysql_connection.h"
-
-#include <m_string.h>
-#include <m_string.h>
#include <my_global.h>
#include <mysql.h>
#include <my_sys.h>
+#include "mysql_connection.h"
+
+#include <m_string.h>
#include <violite.h>
#include "command.h"
@@ -88,7 +87,7 @@ bool Mysql_connection::init()
{
ulong seed1= (ulong) &rand_st + rand();
ulong seed2= (ulong) rand() + (ulong) time(0);
- randominit(&rand_st, seed1, seed2);
+ my_rnd_init(&rand_st, seed1, seed2);
}
/* Fill scramble - server's random message used for handshake */
diff --git a/server-tools/instance-manager/mysql_connection.h b/server-tools/instance-manager/mysql_connection.h
index 56bbf76e146..933dd820372 100644
--- a/server-tools/instance-manager/mysql_connection.h
+++ b/server-tools/instance-manager/mysql_connection.h
@@ -17,7 +17,6 @@
#define INCLUDES_MYSQL_INSTANCE_MANAGER_MYSQL_CONNECTION_H
#include "thread_registry.h"
-#include <mysql_com.h>
#if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE)
#pragma interface
@@ -55,7 +54,7 @@ private:
Thread_registry *thread_registry;
User_map *user_map;
NET net;
- struct rand_struct rand_st;
+ struct my_rnd_struct rand_st;
char scramble[SCRAMBLE_LENGTH + 1];
uint status;
ulong client_capabilities;
diff --git a/sql-bench/example b/sql-bench/example
index df2a9b8be69..cb39fad819e 100644
--- a/sql-bench/example
+++ b/sql-bench/example
@@ -6,15 +6,14 @@ machine="Linux-x64"
# InnoDB tests
-./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
-
-./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
+./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
+./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
# MyISAM tests
-./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
+./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
-./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
+./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
compare-results --relative output/RUN-mysql-myisam-* output/RUN-mysql_fast-myisam* output/RUN-mysql*
diff --git a/sql/field.h b/sql/field.h
index 9f76deb18bc..53788560e68 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -237,11 +237,11 @@ public:
return test(record[(uint) (null_ptr -table->record[0])] &
null_bit);
}
- inline bool is_null_in_record_with_offset(my_ptrdiff_t offset)
+ inline bool is_null_in_record_with_offset(my_ptrdiff_t col_offset)
{
if (!null_ptr)
return 0;
- return test(null_ptr[offset] & null_bit);
+ return test(null_ptr[col_offset] & null_bit);
}
inline void set_null(my_ptrdiff_t row_offset= 0)
{ if (null_ptr) null_ptr[row_offset]|= null_bit; }
@@ -337,7 +337,7 @@ public:
Number of copied bytes (excluding padded zero bytes -- see above).
*/
- virtual uint get_key_image(uchar *buff, uint length, imagetype type)
+ virtual uint get_key_image(uchar *buff, uint length, imagetype type_arg)
{
get_image(buff, length, &my_charset_bin);
return length;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 61c8ca3b256..1c1a3b8d9a3 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -461,7 +461,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
ref_pos= ref_buff;
quick_select=select && select->quick;
record=0;
- flag= ((!indexfile && file->ha_table_flags() & HA_REC_NOT_IN_SEQ)
+ flag= ((!indexfile && (file->ha_table_flags() & HA_REC_NOT_IN_SEQ))
|| quick_select);
if (indexfile || flag)
ref_pos= &file->ref[0];
@@ -1161,7 +1161,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
offset= rec_length-res_length;
maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1));
to_start_filepos= my_b_tell(to_file);
- strpos= (uchar*) sort_buffer;
+ strpos= sort_buffer;
org_max_rows=max_rows= param->max_rows;
/* The following will fire if there is not enough space in sort_buffer */
@@ -1185,7 +1185,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
buffpek->base= strpos;
buffpek->max_keys= maxcount;
strpos+= (uint) (error= (int) read_to_buffer(from_file, buffpek,
- rec_length));
+ rec_length));
if (error == -1)
goto err; /* purecov: inspected */
buffpek->max_keys= buffpek->mem_count; // If less data in buffers than expected
@@ -1274,7 +1274,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
}
}
buffpek= (BUFFPEK*) queue_top(&queue);
- buffpek->base= sort_buffer;
+ buffpek->base= (uchar*) sort_buffer;
buffpek->max_keys= param->keys;
/*
@@ -1314,7 +1314,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
strpos != end ;
strpos+= rec_length)
{
- if (my_b_write(to_file, (uchar *) strpos, res_length))
+ if (my_b_write(to_file, strpos, res_length))
{
error=1; goto err;
}
diff --git a/sql/gen_lex_hash.cc b/sql/gen_lex_hash.cc
index c9c1813a429..7b2395673eb 100644
--- a/sql/gen_lex_hash.cc
+++ b/sql/gen_lex_hash.cc
@@ -486,8 +486,8 @@ int main(int argc,char **argv)
printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2);
printf("\
-static inline SYMBOL *get_hash_symbol(const char *s,\n\
- unsigned int len,bool function)\n\
+static SYMBOL *get_hash_symbol(const char *s,\n\
+ unsigned int len,bool function)\n\
{\n\
register uchar *hash_map;\n\
register const char *cur_str= s;\n\
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 3f1634a6ad1..bd0004f92cb 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -4759,11 +4759,14 @@ void ha_partition::get_dynamic_partition_info(PARTITION_INFO *stat_info,
about this call). We pass this along to all underlying MyISAM handlers
and ignore it for the rest.
- HA_EXTRA_PREPARE_FOR_DELETE:
+ HA_EXTRA_PREPARE_FOR_DROP:
Only used by MyISAM, called in preparation for a DROP TABLE.
It's used mostly by Windows that cannot handle dropping an open file.
On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
+ HA_EXTRA_PREPARE_FOR_RENAME:
+ Informs the handler we are about to attempt a rename of the table.
+
HA_EXTRA_READCHECK:
HA_EXTRA_NO_READCHECK:
Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
@@ -4898,14 +4901,15 @@ int ha_partition::extra(enum ha_extra_function operation)
}
/* Category 3), used by MyISAM handlers */
- case HA_EXTRA_PREPARE_FOR_DELETE:
- DBUG_RETURN(prepare_for_delete());
+ case HA_EXTRA_PREPARE_FOR_RENAME:
+ DBUG_RETURN(prepare_for_rename());
break;
case HA_EXTRA_NORMAL:
case HA_EXTRA_QUICK:
case HA_EXTRA_NO_READCHECK:
case HA_EXTRA_PREPARE_FOR_UPDATE:
case HA_EXTRA_FORCE_REOPEN:
+ case HA_EXTRA_PREPARE_FOR_DROP:
case HA_EXTRA_FLUSH_CACHE:
{
if (m_myisam)
@@ -5064,24 +5068,24 @@ void ha_partition::prepare_extra_cache(uint cachesize)
0 Success
*/
-int ha_partition::prepare_for_delete()
+int ha_partition::prepare_for_rename()
{
int result= 0, tmp;
handler **file;
- DBUG_ENTER("ha_partition::prepare_for_delete()");
+ DBUG_ENTER("ha_partition::prepare_for_rename()");
if (m_new_file != NULL)
{
for (file= m_new_file; *file; file++)
- if ((tmp= (*file)->extra(HA_EXTRA_PREPARE_FOR_DELETE)))
+ if ((tmp= (*file)->extra(HA_EXTRA_PREPARE_FOR_RENAME)))
result= tmp;
for (file= m_reorged_file; *file; file++)
- if ((tmp= (*file)->extra(HA_EXTRA_PREPARE_FOR_DELETE)))
+ if ((tmp= (*file)->extra(HA_EXTRA_PREPARE_FOR_RENAME)))
result= tmp;
DBUG_RETURN(result);
}
- DBUG_RETURN(loop_extra(HA_EXTRA_PREPARE_FOR_DELETE));
+ DBUG_RETURN(loop_extra(HA_EXTRA_PREPARE_FOR_RENAME));
}
/*
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 9d10aea2b6f..ac00581fae0 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -211,7 +211,7 @@ public:
}
virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
private:
- int prepare_for_delete();
+ int prepare_for_rename();
int copy_partitions(ulonglong *copied, ulonglong *deleted);
void cleanup_new_partition(uint part_count);
int prepare_new_partition(TABLE *table, HA_CREATE_INFO *create_info,
diff --git a/sql/handler.cc b/sql/handler.cc
index 0715ef3cc9c..d70eca512ef 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -26,7 +26,7 @@
#include "mysql_priv.h"
#include "rpl_filter.h"
#include <myisampack.h>
-#include <errno.h>
+#include "myisam.h"
#ifdef WITH_PARTITION_STORAGE_ENGINE
#include "ha_partition.h"
@@ -288,7 +288,8 @@ handler *get_ha_partition(partition_info *part_info)
@retval
!=0 Error
*/
-static int ha_init_errors(void)
+
+int ha_init_errors(void)
{
#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
const char **errmsgs;
@@ -501,9 +502,6 @@ int ha_init()
int error= 0;
DBUG_ENTER("ha_init");
- if (ha_init_errors())
- DBUG_RETURN(1);
-
DBUG_ASSERT(total_ha < MAX_HA);
/*
Check if there is a transaction-capable storage engine besides the
@@ -2528,10 +2526,10 @@ int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
int ha_enable_transaction(THD *thd, bool on)
{
int error=0;
-
DBUG_ENTER("ha_enable_transaction");
- thd->transaction.on= on;
- if (on)
+ DBUG_PRINT("info", ("on: %d", (int) on));
+
+ if ((thd->transaction.on= on))
{
/*
Now all storage engines should have transaction handling enabled.
@@ -2834,6 +2832,7 @@ int ha_change_key_cache(KEY_CACHE *old_key_cache,
}
+
/**
Try to discover one table from handler(s).
diff --git a/sql/handler.h b/sql/handler.h
index c5b867e315f..d7183449ad5 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -20,6 +20,7 @@
#pragma interface /* gcc class implementation */
#endif
+#include <my_handler.h>
#include <ft_global.h>
#include <keycache.h>
@@ -236,8 +237,6 @@
#define HA_LEX_CREATE_TMP_TABLE 1
#define HA_LEX_CREATE_IF_NOT_EXISTS 2
#define HA_LEX_CREATE_TABLE_LIKE 4
-#define HA_OPTION_NO_CHECKSUM (1L << 17)
-#define HA_OPTION_NO_DELAY_KEY_WRITE (1L << 18)
#define HA_MAX_REC_LENGTH 65535
/* Table caching type */
@@ -322,6 +321,7 @@ enum enum_binlog_command {
#define HA_CREATE_USED_CONNECTION (1L << 18)
#define HA_CREATE_USED_KEY_BLOCK_SIZE (1L << 19)
#define HA_CREATE_USED_TRANSACTIONAL (1L << 20)
+#define HA_CREATE_USED_PAGE_CHECKSUM (1L << 21)
typedef ulonglong my_xid; // this line is the same as in log_event.h
#define MYSQL_XID_PREFIX "MySQLXid"
@@ -818,6 +818,7 @@ typedef struct st_ha_create_information
bool frm_only; /* 1 if no ha_create_table() */
bool varchar; /* 1 if table has a VARCHAR */
enum ha_storage_media storage_media; /* DEFAULT, DISK or MEMORY */
+ enum ha_choice page_checksum; /* If we have page_checksums */
} HA_CREATE_INFO;
@@ -892,7 +893,7 @@ typedef struct st_ha_check_opt
ulong sort_buffer_size;
uint flags; /* isam layer flags (e.g. for myisamchk) */
uint sql_flags; /* sql layer flags - for something myisamchk cannot do */
- KEY_CACHE *key_cache; /* new key cache when changing key cache */
+ KEY_CACHE *key_cache; /* new key cache when changing key cache */
void init();
} HA_CHECK_OPT;
@@ -1325,14 +1326,18 @@ public:
}
virtual int read_first_row(uchar *buf, uint primary_key);
/**
- The following function is only needed for tables that may be temporary
- tables during joins.
+ The following 3 function is only needed for tables that may be
+ internal temporary tables during joins.
*/
- virtual int restart_rnd_next(uchar *buf, uchar *pos)
+ virtual int remember_rnd_pos()
+ { return HA_ERR_WRONG_COMMAND; }
+ virtual int restart_rnd_next(uchar *buf)
{ return HA_ERR_WRONG_COMMAND; }
virtual int rnd_same(uchar *buf, uint inx)
{ return HA_ERR_WRONG_COMMAND; }
- virtual ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key)
+
+ virtual ha_rows records_in_range(uint inx, key_range *min_key,
+ key_range *max_key)
{ return (ha_rows) 10; }
virtual void position(const uchar *record)=0;
virtual int info(uint)=0; // see my_base.h for full description
@@ -1477,7 +1482,8 @@ public:
*/
virtual const char **bas_ext() const =0;
- virtual int get_default_no_partitions(HA_CREATE_INFO *info) { return 1;}
+ virtual int get_default_no_partitions(HA_CREATE_INFO *create_info)
+ { return 1;}
virtual void set_auto_partitions(partition_info *part_info) { return; }
virtual bool get_no_parts(const char *name,
uint *no_parts)
@@ -1537,7 +1543,8 @@ public:
#define CHF_INDEX_FLAG 3
virtual int create_handler_files(const char *name, const char *old_name,
- int action_flag, HA_CREATE_INFO *info)
+ int action_flag,
+ HA_CREATE_INFO *create_info)
{ return FALSE; }
virtual int change_partitions(HA_CREATE_INFO *create_info,
@@ -1827,6 +1834,7 @@ static inline bool ha_storage_engine_is_enabled(const handlerton *db_type)
}
/* basic stuff */
+int ha_init_errors(void);
int ha_init(void);
int ha_end(void);
int ha_initialize_handlerton(st_plugin_int *plugin);
diff --git a/sql/item.h b/sql/item.h
index b98389bc8d4..4ac3b2959b3 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -1945,9 +1945,9 @@ public:
class Item_partition_func_safe_string: public Item_string
{
public:
- Item_partition_func_safe_string(const char *name, uint length,
+ Item_partition_func_safe_string(const char *name_arg, uint length,
CHARSET_INFO *cs= NULL):
- Item_string(name, length, cs)
+ Item_string(name_arg, length, cs)
{}
};
@@ -1967,8 +1967,8 @@ public:
class Item_blob :public Item_partition_func_safe_string
{
public:
- Item_blob(const char *name, uint length) :
- Item_partition_func_safe_string(name, length, &my_charset_bin)
+ Item_blob(const char *name_arg, uint length) :
+ Item_partition_func_safe_string(name_arg, length, &my_charset_bin)
{ max_length= length; }
enum Type type() const { return TYPE_HOLDER; }
enum_field_types field_type() const { return MYSQL_TYPE_BLOB; }
@@ -1996,8 +1996,8 @@ class Item_return_int :public Item_int
enum_field_types int_field_type;
public:
Item_return_int(const char *name_arg, uint length,
- enum_field_types field_type_arg, longlong value= 0)
- :Item_int(name_arg, value, length), int_field_type(field_type_arg)
+ enum_field_types field_type_arg, longlong value_arg= 0)
+ :Item_int(name_arg, value_arg, length), int_field_type(field_type_arg)
{
unsigned_flag=1;
}
diff --git a/sql/item_func.cc b/sql/item_func.cc
index be8966598d6..007853b564e 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -32,6 +32,7 @@
#include <hash.h>
#include <time.h>
#include <ft_global.h>
+#include <my_bit.h>
#include "sp_head.h"
#include "sp_rcontext.h"
@@ -2116,7 +2117,7 @@ void Item_func_rand::seed_random(Item *arg)
args[0] is a constant.
*/
uint32 tmp= (uint32) arg->val_int();
- randominit(rand, (uint32) (tmp*0x10001L+55555555L),
+ my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L),
(uint32) (tmp*0x10000001L));
}
@@ -2136,7 +2137,7 @@ bool Item_func_rand::fix_fields(THD *thd,Item **ref)
No need to send a Rand log event if seed was given eg: RAND(seed),
as it will be replicated in the query as such.
*/
- if (!rand && !(rand= (struct rand_struct*)
+ if (!rand && !(rand= (struct my_rnd_struct*)
thd->stmt_arena->alloc(sizeof(*rand))))
return TRUE;
diff --git a/sql/item_func.h b/sql/item_func.h
index e09b584de95..4981d1694e0 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -684,7 +684,7 @@ public:
class Item_func_rand :public Item_real_func
{
- struct rand_struct *rand;
+ struct my_rnd_struct *rand;
public:
Item_func_rand(Item *a) :Item_real_func(a), rand(0) {}
Item_func_rand() :Item_real_func() {}
@@ -1431,7 +1431,6 @@ public:
/* for fulltext search */
-#include <ft_global.h>
class Item_func_match :public Item_real_func
{
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 42314872997..bdcc488175e 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -3355,7 +3355,7 @@ err:
which was basis for ISO/IEC 11578:1996 specification)
*/
-static struct rand_struct uuid_rand;
+static struct my_rnd_struct uuid_rand;
static uint nanoseq;
static ulonglong uuid_time=0;
static char clock_seq_and_node_str[]="-0000-000000000000";
@@ -3405,9 +3405,9 @@ String *Item_func_uuid::val_str(String *str)
generating random "hardware addr"
and because specs explicitly specify that it should NOT correlate
with a clock_seq value (initialized random below), we use a separate
- randominit() here
+ my_rnd_init() here
*/
- randominit(&uuid_rand, tmp + (ulong) thd, tmp + (ulong)global_query_id);
+ my_rnd_init(&uuid_rand, tmp + (ulong) thd, tmp + (ulong)global_query_id);
for (i=0; i < (int)sizeof(mac); i++)
mac[i]=(uchar)(my_rnd(&uuid_rand)*255);
/* purecov: end */
@@ -3418,7 +3418,7 @@ String *Item_func_uuid::val_str(String *str)
*--s=_dig_vec_lower[mac[i] & 15];
*--s=_dig_vec_lower[mac[i] >> 4];
}
- randominit(&uuid_rand, tmp + (ulong) server_start_time,
+ my_rnd_init(&uuid_rand, tmp + (ulong) server_start_time,
tmp + (ulong) thd->status_var.bytes_sent);
set_clock_seq_str();
}
diff --git a/sql/lex.h b/sql/lex.h
index 0b601de772a..d97f0718913 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -386,6 +386,8 @@ static SYMBOL symbols[] = {
{ "OWNER", SYM(OWNER_SYM)},
{ "PACK_KEYS", SYM(PACK_KEYS_SYM)},
{ "PARSER", SYM(PARSER_SYM)},
+ { "PAGE", SYM(PAGE_SYM)},
+ { "PAGE_CHECKSUM", SYM(PAGE_CHECKSUM_SYM)},
{ "PARTIAL", SYM(PARTIAL)},
{ "PAGE", SYM(PAGE_SYM)},
{ "PARTITION", SYM(PARTITION_SYM)},
@@ -524,7 +526,8 @@ static SYMBOL symbols[] = {
{ "SWITCHES", SYM(SWITCHES_SYM)},
{ "TABLE", SYM(TABLE_SYM)},
{ "TABLES", SYM(TABLES)},
- { "TABLESPACE", SYM(TABLESPACE)},
+ { "TABLESPACE", SYM(TABLESPACE)},
+ { "TABLE_CHECKSUM", SYM(TABLE_CHECKSUM_SYM)},
{ "TEMPORARY", SYM(TEMPORARY)},
{ "TEMPTABLE", SYM(TEMPTABLE_SYM)},
{ "TERMINATED", SYM(TERMINATED)},
@@ -541,6 +544,7 @@ static SYMBOL symbols[] = {
{ "TO", SYM(TO_SYM)},
{ "TRAILING", SYM(TRAILING)},
{ "TRANSACTION", SYM(TRANSACTION_SYM)},
+ { "TRANSACTIONAL", SYM(TRANSACTIONAL_SYM)},
{ "TRIGGER", SYM(TRIGGER_SYM)},
{ "TRIGGERS", SYM(TRIGGERS_SYM)},
{ "TRUE", SYM(TRUE_SYM)},
diff --git a/sql/lock.cc b/sql/lock.cc
index a0d6faa6604..bb1c2c46284 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -34,8 +34,8 @@
This is followed by a call to thr_multi_lock() for all tables.
- When statement is done, we call mysql_unlock_tables().
- This will call thr_multi_unlock() followed by
- table_handler->external_lock(thd, F_UNLCK) for each table.
+ table_handler->external_lock(thd, F_UNLCK) followed by
+ thr_multi_unlock() for each table.
- Note that mysql_unlock_tables() may be called several times as
MySQL in some cases can free some tables earlier than others.
@@ -385,10 +385,10 @@ static int lock_external(THD *thd, TABLE **tables, uint count)
void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock)
{
DBUG_ENTER("mysql_unlock_tables");
- if (sql_lock->lock_count)
- thr_multi_unlock(sql_lock->locks,sql_lock->lock_count);
if (sql_lock->table_count)
VOID(unlock_external(thd,sql_lock->table,sql_lock->table_count));
+ if (sql_lock->lock_count)
+ thr_multi_unlock(sql_lock->locks,sql_lock->lock_count);
my_free((uchar*) sql_lock,MYF(0));
DBUG_VOID_RETURN;
}
diff --git a/sql/log.cc b/sql/log.cc
index ba457b3e10b..a0c35059902 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -2287,6 +2287,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
{
+ /*
+ TODO: all operations creating/deleting the index file or a log, should
+ call my_sync_dir() or my_sync_dir_by_file() to be durable.
+ TODO: file creation should be done with my_create() not my_open().
+ */
if (index_file_nr >= 0)
my_close(index_file_nr,MYF(0));
return TRUE;
diff --git a/sql/log_event_old.h b/sql/log_event_old.h
index 81e55097905..4ae0b00aeac 100644
--- a/sql/log_event_old.h
+++ b/sql/log_event_old.h
@@ -112,9 +112,9 @@ public:
};
#if !defined(MYSQL_CLIENT)
- Write_rows_log_event_old(THD *thd, TABLE *table, ulong table_id,
+ Write_rows_log_event_old(THD *thd_arg, TABLE *table, ulong table_id,
MY_BITMAP const *cols, bool is_transactional)
- : Write_rows_log_event(thd, table, table_id, cols, is_transactional)
+ : Write_rows_log_event(thd_arg, table, table_id, cols, is_transactional)
{
}
#endif
@@ -158,9 +158,9 @@ public:
};
#if !defined(MYSQL_CLIENT)
- Update_rows_log_event_old(THD *thd, TABLE *table, ulong table_id,
+ Update_rows_log_event_old(THD *thd_arg, TABLE *table, ulong table_id,
MY_BITMAP const *cols, bool is_transactional)
- : Update_rows_log_event(thd, table, table_id, cols, is_transactional),
+ : Update_rows_log_event(thd_arg, table, table_id, cols, is_transactional),
m_after_image(NULL), m_memory(NULL)
{
}
@@ -205,9 +205,9 @@ public:
};
#if !defined(MYSQL_CLIENT)
- Delete_rows_log_event_old(THD *thd, TABLE *table, ulong table_id,
+ Delete_rows_log_event_old(THD *thd_arg, TABLE *table, ulong table_id,
MY_BITMAP const *cols, bool is_transactional)
- : Delete_rows_log_event(thd, table, table_id, cols, is_transactional),
+ : Delete_rows_log_event(thd_arg, table, table_id, cols, is_transactional),
m_after_image(NULL), m_memory(NULL)
{
}
diff --git a/sql/my_lock.c b/sql/my_lock.c
index f66d7282f72..276259b106a 100644
--- a/sql/my_lock.c
+++ b/sql/my_lock.c
@@ -25,7 +25,15 @@
#include <thr_alarm.h>
#include <errno.h>
- /* Lock a part of a file */
+/**
+ @breif Lock a part of a file
+
+ @note
+ This works like mysys/my_lock.c, with the exception that this function
+ uses the thr_alarm() to break long lock statements.
+ (mysys can't use thr_alarm() as by default the alarm handling doesn't
+ exists)
+*/
int my_lock(File fd,int locktype,my_off_t start,my_off_t length,myf MyFlags)
{
@@ -36,29 +44,34 @@ int my_lock(File fd,int locktype,my_off_t start,my_off_t length,myf MyFlags)
DBUG_ENTER("my_lock");
DBUG_PRINT("my",("Fd: %d Op: %d start: %ld Length: %ld MyFlags: %d",
fd,locktype,(ulong) start,(ulong) length,MyFlags));
- if (my_disable_locking)
+ if (my_disable_locking && ! (MyFlags & MY_FORCE_LOCK))
DBUG_RETURN(0); /* purecov: inspected */
+
m_lock.l_type=(short) locktype;
m_lock.l_whence=0L;
m_lock.l_start=(long) start;
m_lock.l_len=(long) length;
- wait_for_alarm=(MyFlags & MY_DONT_WAIT ? MY_HOW_OFTEN_TO_ALARM :
- (uint) 12*60*60);
if (fcntl(fd,F_SETLK,&m_lock) != -1) /* Check if we can lock */
DBUG_RETURN(0); /* Ok, file locked */
- DBUG_PRINT("info",("Was locked, trying with alarm"));
- if (!thr_alarm(&alarmed,wait_for_alarm,&alarm_buff))
- {
- int value;
- while ((value=fcntl(fd,F_SETLKW,&m_lock)) && !thr_got_alarm(&alarmed) &&
- errno == EINTR) ;
- thr_end_alarm(&alarmed);
- if (value != -1)
- DBUG_RETURN(0);
- }
- else
+
+ if (!(MyFlags & MY_NO_WAIT))
{
- errno=EINTR;
+ wait_for_alarm= (MyFlags & MY_SHORT_WAIT ? MY_HOW_OFTEN_TO_ALARM :
+ (uint) 12*60*60);
+ DBUG_PRINT("info",("Was locked, trying with alarm"));
+ if (!thr_alarm(&alarmed,wait_for_alarm,&alarm_buff))
+ {
+ int value;
+ while ((value=fcntl(fd,F_SETLKW,&m_lock)) && !thr_got_alarm(&alarmed) &&
+ errno == EINTR) ;
+ thr_end_alarm(&alarmed);
+ if (value != -1)
+ DBUG_RETURN(0);
+ }
+ else
+ {
+ errno=EINTR;
+ }
}
if (errno == EINTR || errno == EACCES)
my_errno=EAGAIN; /* Easier to check for this */
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 0adf90dc258..b1880e4be70 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -1837,7 +1837,7 @@ extern ulong max_connections,max_connect_errors, connect_timeout;
extern ulong slave_net_timeout, slave_trans_retries;
extern uint max_user_connections;
extern ulong what_to_log,flush_time;
-extern ulong query_buff_size, thread_stack;
+extern ulong query_buff_size;
extern ulong max_prepared_stmt_count, prepared_stmt_count;
extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit;
extern ulong max_binlog_size, max_relay_log_size;
@@ -1941,7 +1941,10 @@ extern struct system_variables global_system_variables;
#ifdef MYSQL_SERVER
extern struct system_variables max_system_variables;
extern struct system_status_var global_status_var;
-extern struct rand_struct sql_rand;
+extern struct my_rnd_struct sql_rand;
+
+extern handlerton *maria_hton; /* @todo remove, make it static in ha_maria.cc
+ currently it's needed for sql_delete.cc */
extern const char *opt_date_time_formats[];
extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[];
@@ -1959,10 +1962,11 @@ extern uint sql_command_flags[];
extern TYPELIB log_output_typelib;
/* optional things, have_* variables */
+extern SHOW_COMP_OPTION have_maria_db;
extern SHOW_COMP_OPTION have_community_features;
-
extern handlerton *partition_hton;
extern handlerton *myisam_hton;
+extern handlerton *maria_hton;
extern handlerton *heap_hton;
extern SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 2471ab3f81f..78a888ca4d1 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -16,6 +16,7 @@
#include "mysql_priv.h"
#include <m_ctype.h>
#include <my_dir.h>
+#include <my_bit.h>
#include "slave.h"
#include "rpl_mi.h"
#include "sql_repl.h"
@@ -353,6 +354,7 @@ static char *default_storage_engine_str;
static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME;
static I_List<THD> thread_cache;
static double long_query_time;
+static ulong opt_my_crc_dbug_check;
static pthread_cond_t COND_thread_cache, COND_flush_thread_cache;
@@ -455,7 +457,7 @@ uint volatile thread_count, thread_running;
ulonglong thd_startup_options;
ulong back_log, connect_timeout, concurrency, server_id;
ulong table_cache_size, table_def_size;
-ulong thread_stack, what_to_log;
+ulong what_to_log;
ulong query_buff_size, slow_launch_time, slave_open_temp_tables;
ulong open_files_limit, max_binlog_size, max_relay_log_size;
ulong slave_net_timeout, slave_trans_retries;
@@ -565,7 +567,7 @@ pthread_key(MEM_ROOT**,THR_MALLOC);
pthread_key(THD*, THR_THD);
pthread_mutex_t LOCK_mysql_create_db, LOCK_Acl, LOCK_open, LOCK_thread_count,
LOCK_mapped_file, LOCK_status, LOCK_global_read_lock,
- LOCK_error_log, LOCK_uuid_generator,
+ LOCK_error_log,
LOCK_delayed_insert, LOCK_delayed_status, LOCK_delayed_create,
LOCK_crypt, LOCK_bytes_sent, LOCK_bytes_received,
LOCK_global_system_variables,
@@ -619,7 +621,7 @@ static char **defaults_argv;
static char *opt_bin_logname;
static my_socket unix_sock,ip_sock;
-struct rand_struct sql_rand; ///< used by sql_class.cc:THD::THD()
+struct my_rnd_struct sql_rand; ///< used by sql_class.cc:THD::THD()
#ifndef EMBEDDED_LIBRARY
struct passwd *user_info;
@@ -1274,6 +1276,7 @@ void clean_up(bool print_message)
/* do the broadcast inside the lock to ensure that my_end() is not called */
(void) pthread_cond_broadcast(&COND_thread_count);
(void) pthread_mutex_unlock(&LOCK_thread_count);
+ my_uuid_end();
/*
The following lines may never be executed as the main thread may have
@@ -1345,7 +1348,6 @@ static void clean_up_mutexes()
(void) pthread_mutex_destroy(&LOCK_global_system_variables);
(void) rwlock_destroy(&LOCK_system_variables_hash);
(void) pthread_mutex_destroy(&LOCK_global_read_lock);
- (void) pthread_mutex_destroy(&LOCK_uuid_generator);
(void) pthread_mutex_destroy(&LOCK_prepared_stmt_count);
(void) pthread_cond_destroy(&COND_thread_count);
(void) pthread_cond_destroy(&COND_refresh);
@@ -2209,7 +2211,7 @@ or misconfigured. This error can also be caused by malfunctioning hardware.\n",
We will try our best to scrape up some info that will hopefully help diagnose\n\
the problem, but since we have already crashed, something is definitely wrong\n\
and this may fail.\n\n");
- fprintf(stderr, "key_buffer_size=%lu\n",
+ fprintf(stderr, "key_buffer_size=%lu\n",
(ulong) dflt_key_cache->key_cache_mem_size);
fprintf(stderr, "read_buffer_size=%ld\n", (long) global_system_variables.read_buff_size);
fprintf(stderr, "max_used_connections=%lu\n", max_used_connections);
@@ -2241,7 +2243,7 @@ the thread stack. Please read http://dev.mysql.com/doc/mysql/en/linux.html\n\n",
{
fprintf(stderr,"thd: 0x%lx\n",(long) thd);
print_stacktrace(thd ? (uchar*) thd->thread_stack : (uchar*) 0,
- thread_stack);
+ my_thread_stack_size);
}
if (thd)
{
@@ -2400,9 +2402,9 @@ static void start_signal_handler(void)
Peculiar things with ia64 platforms - it seems we only have half the
stack size in reality, so we have to double it here
*/
- pthread_attr_setstacksize(&thr_attr,thread_stack*2);
+ pthread_attr_setstacksize(&thr_attr,my_thread_stack_size*2);
#else
- pthread_attr_setstacksize(&thr_attr,thread_stack);
+ pthread_attr_setstacksize(&thr_attr,my_thread_stack_size);
#endif
#endif
@@ -2569,6 +2571,8 @@ extern "C" int my_message_sql(uint error, const char *str, myf MyFlags);
int my_message_sql(uint error, const char *str, myf MyFlags)
{
THD *thd;
+ MYSQL_ERROR::enum_warning_level level;
+ sql_print_message_func func;
DBUG_ENTER("my_message_sql");
DBUG_PRINT("error", ("error: %u message: '%s'", error, str));
/*
@@ -2576,16 +2580,36 @@ int my_message_sql(uint error, const char *str, myf MyFlags)
will be fixed
DBUG_ASSERT(error != 0);
*/
+ if (MyFlags & ME_JUST_INFO)
+ {
+ level= MYSQL_ERROR::WARN_LEVEL_NOTE;
+ func= sql_print_information;
+ }
+ else if (MyFlags & ME_JUST_WARNING)
+ {
+ level= MYSQL_ERROR::WARN_LEVEL_WARN;
+ func= sql_print_warning;
+ }
+ else
+ {
+ level= MYSQL_ERROR::WARN_LEVEL_ERROR;
+ func= sql_print_error;
+ }
+
if ((thd= current_thd))
{
/*
TODO: There are two exceptions mechanism (THD and sp_rcontext),
this could be improved by having a common stack of handlers.
*/
- if (thd->handle_error(error, str,
- MYSQL_ERROR::WARN_LEVEL_ERROR))
+ if (thd->handle_error(error, str, level))
DBUG_RETURN(0);
+ if (level == MYSQL_ERROR::WARN_LEVEL_WARN)
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, error, str);
+ if (level != MYSQL_ERROR::WARN_LEVEL_ERROR)
+ goto to_error_log;
+
thd->is_slave_error= 1; // needed to catch query errors during replication
/*
@@ -2639,8 +2663,9 @@ int my_message_sql(uint error, const char *str, myf MyFlags)
thd->no_warnings_for_error= FALSE;
}
}
+to_error_log:
if (!thd || MyFlags & ME_NOREFRESH)
- sql_print_error("%s: %s",my_progname,str); /* purecov: inspected */
+ (*func)("%s: %s", my_progname_short, str); /* purecov: inspected */
DBUG_RETURN(0);
}
@@ -3254,7 +3279,6 @@ static int init_thread_environment()
(void) my_rwlock_init(&LOCK_system_variables_hash, NULL);
(void) pthread_mutex_init(&LOCK_global_read_lock, MY_MUTEX_INIT_FAST);
(void) pthread_mutex_init(&LOCK_prepared_stmt_count, MY_MUTEX_INIT_FAST);
- (void) pthread_mutex_init(&LOCK_uuid_generator, MY_MUTEX_INIT_FAST);
#ifdef HAVE_OPENSSL
(void) pthread_mutex_init(&LOCK_des_key_file,MY_MUTEX_INIT_FAST);
#ifndef HAVE_YASSL
@@ -3430,9 +3454,10 @@ static int init_server_components()
query_cache_set_min_res_unit(query_cache_min_res_unit);
query_cache_init();
query_cache_resize(query_cache_size);
- randominit(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2);
+ my_rnd_init(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2);
set_proper_floating_point_mode();
init_thr_lock();
+ my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345);
#ifdef HAVE_REPLICATION
init_slave_list();
#endif
@@ -3463,6 +3488,9 @@ static int init_server_components()
}
}
+ /* set up the hook before initializing plugins which may use it */
+ error_handler_hook= my_message_sql;
+
if (xid_cache_init())
{
sql_print_error("Out of memory");
@@ -3599,6 +3627,13 @@ server.");
using_update_log=1;
}
+ /* call ha_init_key_cache() on all key caches to init them */
+ process_key_caches(&ha_init_key_cache);
+
+ /* Allow storage engine to give real error messages */
+ if (ha_init_errors())
+ DBUG_RETURN(1);
+
if (plugin_init(&defaults_argc, defaults_argv,
(opt_noacl ? PLUGIN_INIT_SKIP_PLUGIN_TABLE : 0) |
(opt_help ? PLUGIN_INIT_SKIP_INITIALIZATION : 0)))
@@ -3765,9 +3800,6 @@ server.");
if (opt_myisam_log)
(void) mi_log(1);
- /* call ha_init_key_cache() on all key caches to init them */
- process_key_caches(&ha_init_key_cache);
-
#if defined(HAVE_MLOCKALL) && defined(MCL_CURRENT) && !defined(EMBEDDED_LIBRARY)
if (locked_in_memory && !getuid())
{
@@ -3957,9 +3989,9 @@ int main(int argc, char **argv)
Peculiar things with ia64 platforms - it seems we only have half the
stack size in reality, so we have to double it here
*/
- pthread_attr_setstacksize(&connection_attrib,thread_stack*2);
+ pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size*2);
#else
- pthread_attr_setstacksize(&connection_attrib,thread_stack);
+ pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size);
#endif
#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE
{
@@ -3970,15 +4002,15 @@ int main(int argc, char **argv)
stack_size/= 2;
#endif
/* We must check if stack_size = 0 as Solaris 2.9 can return 0 here */
- if (stack_size && stack_size < thread_stack)
+ if (stack_size && stack_size < my_thread_stack_size)
{
if (global_system_variables.log_warnings)
sql_print_warning("Asked for %lu thread stack, but got %ld",
- thread_stack, (long) stack_size);
+ my_thread_stack_size, (long) stack_size);
#if defined(__ia64__) || defined(__ia64)
- thread_stack= stack_size*2;
+ my_thread_stack_size= stack_size*2;
#else
- thread_stack= stack_size;
+ my_thread_stack_size= stack_size;
#endif
}
}
@@ -4069,7 +4101,6 @@ we force server id to 2, but this MySQL server will not act as a slave.");
init signals & alarm
After this we can't quit by a simple unireg_abort
*/
- error_handler_hook= my_message_sql;
start_signal_handler(); // Creates pidfile
if (mysql_rm_tmp_tables() || acl_init(opt_noacl) ||
@@ -5221,10 +5252,15 @@ enum options_mysqld
OPT_MAX_LENGTH_FOR_SORT_DATA,
OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE,
OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE,
+
OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE,
OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE,
- OPT_MYISAM_USE_MMAP,
+ OPT_MYISAM_USE_MMAP, OPT_MYISAM_REPAIR_THREADS,
OPT_MYISAM_STATS_METHOD,
+
+ OPT_PAGECACHE_BUFFER_SIZE,
+ OPT_PAGECACHE_DIVISION_LIMIT, OPT_PAGECACHE_AGE_THRESHOLD,
+
OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT,
OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT,
OPT_OPEN_FILES_LIMIT,
@@ -5238,7 +5274,7 @@ enum options_mysqld
OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK,
- OPT_WAIT_TIMEOUT, OPT_MYISAM_REPAIR_THREADS,
+ OPT_WAIT_TIMEOUT,
OPT_ERROR_LOG_FILE,
OPT_DEFAULT_WEEK_FORMAT,
OPT_RANGE_ALLOC_BLOCK_SIZE, OPT_ALLOW_SUSPICIOUS_UDFS,
@@ -5291,7 +5327,7 @@ enum options_mysqld
OPT_SECURE_FILE_PRIV,
OPT_MIN_EXAMINED_ROW_LIMIT,
OPT_LOG_SLOW_SLAVE_STATEMENTS,
- OPT_OLD_MODE
+ OPT_DEBUG_CRC, OPT_OLD_MODE
};
@@ -5416,6 +5452,10 @@ struct my_option my_long_options[] =
#ifndef DBUG_OFF
{"debug", '#', "Debug log.", (uchar**) &default_dbug_option,
(uchar**) &default_dbug_option, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ {"debug-crc-break", OPT_DEBUG_CRC,
+ "Call my_debug_put_break_here() if crc matches this number (for debug).",
+ (uchar**) &opt_my_crc_dbug_check, (uchar**) &opt_my_crc_dbug_check,
+ 0, GET_ULONG, REQUIRED_ARG, 0, 0, ~(ulong) 0L, 0, 0, 0},
#endif
{"default-character-set", 'C', "Set the default character set (deprecated option, use --character-set-server instead).",
(uchar**) &default_character_set_name, (uchar**) &default_character_set_name,
@@ -6218,7 +6258,7 @@ log and this option does nothing anymore.",
"Max packetlength to send/receive from to server.",
(uchar**) &global_system_variables.max_allowed_packet,
(uchar**) &max_system_variables.max_allowed_packet, 0, GET_ULONG,
- REQUIRED_ARG, 1024*1024L, 1024, 1024L*1024L*1024L, MALLOC_OVERHEAD, 1024, 0},
+ REQUIRED_ARG, 1024*1024L, 1024, 1024L*1024L*1024L, 0, 1024, 0},
{"max_binlog_cache_size", OPT_MAX_BINLOG_CACHE_SIZE,
"Can be used to restrict the total size used to cache a multi-transaction query.",
(uchar**) &max_binlog_cache_size, (uchar**) &max_binlog_cache_size, 0,
@@ -6259,7 +6299,7 @@ The minimum value for this variable is 4096.",
"Joins that are probably going to read more than max_join_size records return an error.",
(uchar**) &global_system_variables.max_join_size,
(uchar**) &max_system_variables.max_join_size, 0, GET_HA_ROWS, REQUIRED_ARG,
- ~0L, 1, ~0L, 0, 1, 0},
+ ULONG_MAX, 1, ULONG_MAX, 0, 1, 0},
{"max_length_for_sort_data", OPT_MAX_LENGTH_FOR_SORT_DATA,
"Max number of bytes in sorted records.",
(uchar**) &global_system_variables.max_length_for_sort_data,
@@ -6296,7 +6336,7 @@ The minimum value for this variable is 4096.",
{"max_user_connections", OPT_MAX_USER_CONNECTIONS,
"The maximum number of active connections for a single user (0 = no limit).",
(uchar**) &max_user_connections, (uchar**) &max_user_connections, 0, GET_UINT,
- REQUIRED_ARG, 0, 1, UINT_MAX, 0, 1, 0},
+ REQUIRED_ARG, 0, 0, UINT_MAX, 0, 1, 0},
{"max_write_lock_count", OPT_MAX_WRITE_LOCK_COUNT,
"After this many write locks, allow some read locks to run in between.",
(uchar**) &max_write_lock_count, (uchar**) &max_write_lock_count, 0, GET_ULONG,
@@ -6322,12 +6362,6 @@ The minimum value for this variable is 4096.",
(uchar**) &myisam_data_pointer_size,
(uchar**) &myisam_data_pointer_size, 0, GET_ULONG, REQUIRED_ARG,
6, 2, 7, 0, 1, 0},
- {"myisam_max_extra_sort_file_size", OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE,
- "Deprecated option",
- (uchar**) &global_system_variables.myisam_max_extra_sort_file_size,
- (uchar**) &max_system_variables.myisam_max_extra_sort_file_size,
- 0, GET_ULL, REQUIRED_ARG, (ulonglong) MI_MAX_TEMP_LENGTH,
- 0, (ulonglong) MAX_FILE_SIZE, 0, 1, 0},
{"myisam_max_sort_file_size", OPT_MYISAM_MAX_SORT_FILE_SIZE,
"Don't use the fast sort index method to created index if the temporary file would get bigger than this.",
(uchar**) &global_system_variables.myisam_max_sort_file_size,
@@ -6343,7 +6377,7 @@ The minimum value for this variable is 4096.",
"The buffer that is allocated when sorting the index when doing a REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.",
(uchar**) &global_system_variables.myisam_sort_buff_size,
(uchar**) &max_system_variables.myisam_sort_buff_size, 0,
- GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0},
+ GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ULONG_MAX, 0, 1, 0},
{"myisam_use_mmap", OPT_MYISAM_USE_MMAP,
"Use memory mapping for reading and writing MyISAM tables",
(uchar**) &opt_myisam_use_mmap,
@@ -6375,7 +6409,7 @@ The minimum value for this variable is 4096.",
(uchar**) &global_system_variables.net_write_timeout,
(uchar**) &max_system_variables.net_write_timeout, 0, GET_ULONG,
REQUIRED_ARG, NET_WRITE_TIMEOUT, 1, LONG_TIMEOUT, 0, 1, 0},
- { "old", OPT_OLD_MODE, "Use compatible behavior.",
+ {"old", OPT_OLD_MODE, "Use compatible behavior.",
(uchar**) &global_system_variables.old_mode,
(uchar**) &max_system_variables.old_mode, 0, GET_BOOL, NO_ARG,
0, 0, 0, 0, 0, 0},
@@ -6403,10 +6437,10 @@ The minimum value for this variable is 4096.",
(uchar**) &opt_plugin_load, (uchar**) &opt_plugin_load, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"preload_buffer_size", OPT_PRELOAD_BUFFER_SIZE,
- "The size of the buffer that is allocated when preloading indexes",
- (uchar**) &global_system_variables.preload_buff_size,
- (uchar**) &max_system_variables.preload_buff_size, 0, GET_ULONG,
- REQUIRED_ARG, 32*1024L, 1024, 1024*1024*1024L, 0, 1, 0},
+ "The size of the buffer that is allocated when preloading indexes",
+ (uchar**) &global_system_variables.preload_buff_size,
+ (uchar**) &max_system_variables.preload_buff_size, 0, GET_ULONG,
+ REQUIRED_ARG, 32*1024L, 1024, 1024*1024*1024L, 0, 1, 0},
{"query_alloc_block_size", OPT_QUERY_ALLOC_BLOCK_SIZE,
"Allocation block size for query parsing and execution",
(uchar**) &global_system_variables.query_alloc_block_size,
@@ -6508,7 +6542,7 @@ The minimum value for this variable is 4096.",
"Each thread that needs to do a sort allocates a buffer of this size.",
(uchar**) &global_system_variables.sortbuff_size,
(uchar**) &max_system_variables.sortbuff_size, 0, GET_ULONG, REQUIRED_ARG,
- MAX_SORT_MEMORY, MIN_SORT_MEMORY+MALLOC_OVERHEAD*2, ~0L, MALLOC_OVERHEAD,
+ MAX_SORT_MEMORY, MIN_SORT_MEMORY+MALLOC_OVERHEAD*2, ULONG_MAX, MALLOC_OVERHEAD,
1, 0},
{"sync-binlog", OPT_SYNC_BINLOG,
"Synchronously flush binary log to disk after every #th event. "
@@ -6550,8 +6584,8 @@ The minimum value for this variable is 4096.",
REQUIRED_ARG, 20, 1, 16384, 0, 1, 0},
#endif
{"thread_stack", OPT_THREAD_STACK,
- "The stack size for each thread.", (uchar**) &thread_stack,
- (uchar**) &thread_stack, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK,
+ "The stack size for each thread.", (uchar**) &my_thread_stack_size,
+ (uchar**) &my_thread_stack_size, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK,
1024L*128L, ULONG_MAX, 0, 1024, 0},
{ "time_format", OPT_TIME_FORMAT,
"The TIME format (for future).",
@@ -6565,12 +6599,12 @@ The minimum value for this variable is 4096.",
(uchar**) &max_system_variables.tmp_table_size, 0, GET_ULL,
REQUIRED_ARG, 16*1024*1024L, 1024, MAX_MEM_TABLE_SIZE, 0, 1, 0},
{"transaction_alloc_block_size", OPT_TRANS_ALLOC_BLOCK_SIZE,
- "Allocation block size for various transaction-related structures",
+ "Allocation block size for transactions to be stored in binary log",
(uchar**) &global_system_variables.trans_alloc_block_size,
(uchar**) &max_system_variables.trans_alloc_block_size, 0, GET_ULONG,
REQUIRED_ARG, QUERY_ALLOC_BLOCK_SIZE, 1024, ULONG_MAX, 0, 1024, 0},
{"transaction_prealloc_size", OPT_TRANS_PREALLOC_SIZE,
- "Persistent buffer for various transaction-related structures",
+ "Persistent buffer for transactions to be stored in binary log",
(uchar**) &global_system_variables.trans_prealloc_size,
(uchar**) &max_system_variables.trans_prealloc_size, 0, GET_ULONG,
REQUIRED_ARG, TRANS_ALLOC_PREALLOC_SIZE, 1024, ULONG_MAX, 0, 1024, 0},
@@ -7228,9 +7262,11 @@ static void mysql_init_variables(void)
thread_cache.empty();
key_caches.empty();
if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str,
- default_key_cache_base.length)))
+ default_key_cache_base.length)))
exit(1);
- multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */
+
+ /* set key_cache_hash.default_value = dflt_key_cache */
+ multi_keycache_init();
/* Set directory paths */
strmake(language, LANGUAGE, sizeof(language)-1);
@@ -7245,7 +7281,7 @@ static void mysql_init_variables(void)
master_password= master_host= 0;
master_info_file= (char*) "master.info",
relay_log_info_file= (char*) "relay-log.info";
- master_ssl_key= master_ssl_cert= master_ssl_ca=
+ master_ssl_key= master_ssl_cert= master_ssl_ca=
master_ssl_capath= master_ssl_cipher= 0;
report_user= report_password = report_host= 0; /* TO BE DELETED */
opt_relay_logname= opt_relaylog_index_name= 0;
@@ -7902,7 +7938,7 @@ mysql_getopt_value(const char *keyname, uint key_length,
}
}
}
- return option->value;
+ return option->value;
}
@@ -7991,6 +8027,7 @@ static void get_options(int *argc,char **argv)
/* Set global variables based on startup options */
myisam_block_size=(uint) 1 << my_bit_log2(opt_myisam_block_size);
+ my_crc_dbug_check= opt_my_crc_dbug_check;
/* long_query_time is in microseconds */
global_system_variables.long_query_time= max_system_variables.long_query_time=
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index b8bdb604eea..a1ad097f12c 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2142,9 +2142,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
quick=0;
needed_reg.clear_all();
quick_keys.clear_all();
- if ((specialflag & SPECIAL_SAFE_MODE) && ! force_quick_range ||
- !limit)
- DBUG_RETURN(0); /* purecov: inspected */
if (keys_to_use.is_clear_all())
DBUG_RETURN(0);
records= head->file->stats.records;
@@ -4315,7 +4312,6 @@ static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
}
info->out_rows *= selectivity_mult;
- DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
if (is_cpk_scan)
{
diff --git a/sql/password.c b/sql/password.c
index 1ff67888ea4..43430f37f96 100644
--- a/sql/password.c
+++ b/sql/password.c
@@ -69,41 +69,12 @@
/*
New (MySQL 3.21+) random generation structure initialization
SYNOPSIS
- randominit()
+ my_rnd_init()
rand_st OUT Structure to initialize
seed1 IN First initialization parameter
seed2 IN Second initialization parameter
*/
-void randominit(struct rand_struct *rand_st, ulong seed1, ulong seed2)
-{ /* For mysql 3.21.# */
-#ifdef HAVE_purify
- bzero((char*) rand_st,sizeof(*rand_st)); /* Avoid UMC varnings */
-#endif
- rand_st->max_value= 0x3FFFFFFFL;
- rand_st->max_value_dbl=(double) rand_st->max_value;
- rand_st->seed1=seed1%rand_st->max_value ;
- rand_st->seed2=seed2%rand_st->max_value;
-}
-
-
-/*
- Generate random number.
- SYNOPSIS
- my_rnd()
- rand_st INOUT Structure used for number generation
- RETURN VALUE
- generated pseudo random number
-*/
-
-double my_rnd(struct rand_struct *rand_st)
-{
- rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
- rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
- return (((double) rand_st->seed1)/rand_st->max_value_dbl);
-}
-
-
/*
Generate binary hash from raw text string
Used for Pre-4.1 password handling
@@ -164,7 +135,7 @@ void make_scrambled_password_323(char *to, const char *password)
void scramble_323(char *to, const char *message, const char *password)
{
- struct rand_struct rand_st;
+ struct my_rnd_struct rand_st;
ulong hash_pass[2], hash_message[2];
if (password && password[0])
@@ -173,7 +144,7 @@ void scramble_323(char *to, const char *message, const char *password)
const char *message_end= message + SCRAMBLE_LENGTH_323;
hash_password(hash_pass,password, (uint) strlen(password));
hash_password(hash_message, message, SCRAMBLE_LENGTH_323);
- randominit(&rand_st,hash_pass[0] ^ hash_message[0],
+ my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0],
hash_pass[1] ^ hash_message[1]);
for (; message < message_end; message++)
*to++= (char) (floor(my_rnd(&rand_st)*31)+64);
@@ -206,13 +177,13 @@ my_bool
check_scramble_323(const char *scrambled, const char *message,
ulong *hash_pass)
{
- struct rand_struct rand_st;
+ struct my_rnd_struct rand_st;
ulong hash_message[2];
char buff[16],*to,extra; /* Big enough for check */
const char *pos;
hash_password(hash_message, message, SCRAMBLE_LENGTH_323);
- randominit(&rand_st,hash_pass[0] ^ hash_message[0],
+ my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0],
hash_pass[1] ^ hash_message[1]);
to=buff;
DBUG_ASSERT(sizeof(buff) > SCRAMBLE_LENGTH_323);
@@ -293,7 +264,8 @@ void make_password_from_salt_323(char *to, const ulong *salt)
rand_st INOUT structure used for number generation
*/
-void create_random_string(char *to, uint length, struct rand_struct *rand_st)
+void create_random_string(char *to, uint length,
+ struct my_rnd_struct *rand_st)
{
char *end= to + length;
/* Use pointer arithmetics as it is faster way to do so. */
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 7dce5bf1a46..905b02cec7a 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -252,7 +252,7 @@ static sys_var_bool_ptr sys_local_infile(&vars, "local_infile",
static sys_var_trust_routine_creators
sys_trust_routine_creators(&vars, "log_bin_trust_routine_creators",
&trust_function_creators);
-static sys_var_bool_ptr
+static sys_var_bool_ptr
sys_trust_function_creators(&vars, "log_bin_trust_function_creators",
&trust_function_creators);
static sys_var_bool_ptr
@@ -393,10 +393,10 @@ static sys_var_thd_ulong sys_trans_alloc_block_size(&vars, "transaction_alloc_bl
static sys_var_thd_ulong sys_trans_prealloc_size(&vars, "transaction_prealloc_size",
&SV::trans_prealloc_size,
0, fix_trans_mem_root);
-sys_var_enum_const sys_thread_handling(&vars, "thread_handling",
- &SV::thread_handling,
- &thread_handling_typelib,
- NULL);
+sys_var_enum_const sys_thread_handling(&vars, "thread_handling",
+ &SV::thread_handling,
+ &thread_handling_typelib,
+ NULL);
#ifdef HAVE_QUERY_CACHE
static sys_var_long_ptr sys_query_cache_limit(&vars, "query_cache_limit",
@@ -644,6 +644,7 @@ static sys_var_have_plugin sys_have_csv(&vars, "have_csv", C_STRING_WITH_LEN("cs
static sys_var_have_variable sys_have_dlopen(&vars, "have_dynamic_loading", &have_dlopen);
static sys_var_have_variable sys_have_geometry(&vars, "have_geometry", &have_geometry);
static sys_var_have_plugin sys_have_innodb(&vars, "have_innodb", C_STRING_WITH_LEN("innodb"), MYSQL_STORAGE_ENGINE_PLUGIN);
+static sys_var_have_plugin sys_have_maria(&vars, "have_maria", C_STRING_WITH_LEN("maria"), MYSQL_STORAGE_ENGINE_PLUGIN);
static sys_var_have_plugin sys_have_ndbcluster(&vars, "have_ndbcluster", C_STRING_WITH_LEN("ndbcluster"), MYSQL_STORAGE_ENGINE_PLUGIN);
static sys_var_have_variable sys_have_openssl(&vars, "have_openssl", &have_ssl);
static sys_var_have_variable sys_have_ssl(&vars, "have_ssl", &have_ssl);
@@ -727,7 +728,7 @@ static SHOW_VAR fixed_vars[]= {
#ifdef HAVE_THR_SETCONCURRENCY
{"thread_concurrency", (char*) &concurrency, SHOW_LONG},
#endif
- {"thread_stack", (char*) &thread_stack, SHOW_LONG},
+ {"thread_stack", (char*) &my_thread_stack_size, SHOW_LONG},
};
@@ -1253,6 +1254,13 @@ uchar *sys_var_enum_const::value_ptr(THD *thd, enum_var_type type,
return (uchar*) enum_names->type_names[global_system_variables.*offset];
}
+
+uchar *sys_var_enum_const::value_ptr(THD *thd, enum_var_type type,
+ LEX_STRING *base)
+{
+ return (uchar*) enum_names->type_names[global_system_variables.*offset];
+}
+
bool sys_var_thd_ulong::check(THD *thd, set_var *var)
{
return (get_unsigned(thd, var) ||
@@ -2024,10 +2032,9 @@ KEY_CACHE *get_key_cache(LEX_STRING *cache_name)
if (!cache_name || ! cache_name->length)
cache_name= &default_key_cache_base;
return ((KEY_CACHE*) find_named(&key_caches,
- cache_name->str, cache_name->length, 0));
+ cache_name->str, cache_name->length, 0));
}
-
uchar *sys_var_key_cache_param::value_ptr(THD *thd, enum_var_type type,
LEX_STRING *base)
{
diff --git a/sql/set_var.h b/sql/set_var.h
index 5be54200c7d..723b31eb188 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -452,9 +452,9 @@ public:
{ chain_sys_var(chain); }
sys_var_thd_enum(sys_var_chain *chain, const char *name_arg, ulong SV::*offset_arg,
TYPELIB *typelib, sys_after_update_func func,
- sys_check_func check)
+ sys_check_func check_arg)
:sys_var_thd(name_arg,func), offset(offset_arg), enum_names(typelib),
- check_func(check)
+ check_func(check_arg)
{ chain_sys_var(chain); }
bool check(THD *thd, set_var *var)
{
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 75376c53f68..9e66bfd4b47 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -564,6 +564,7 @@ THD::THD()
cleanup_done= abort_on_warning= no_warnings_for_error= 0;
peer_port= 0; // For SHOW PROCESSLIST
transaction.m_pending_rows_event= 0;
+ transaction.on= 1;
#ifdef SIGNAL_WITH_VIO_CLOSE
active_vio = 0;
#endif
@@ -605,7 +606,7 @@ THD::THD()
tablespace_op=FALSE;
tmp= sql_rnd_with_mutex();
- randominit(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
+ my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
substitute_null_with_insert_id = FALSE;
thr_lock_info_init(&lock_info); /* safety: will be reset after start */
thr_lock_owner_init(&main_lock_id, &lock_info);
diff --git a/sql/sql_class.h b/sql/sql_class.h
index e8f28b19213..3689bb343d5 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -232,18 +232,18 @@ struct system_variables
{
/*
How dynamically allocated system variables are handled:
-
+
The global_system_variables and max_system_variables are "authoritative"
They both should have the same 'version' and 'size'.
When attempting to access a dynamic variable, if the session version
is out of date, then the session version is updated and realloced if
neccessary and bytes copied from global to make up for missing data.
- */
+ */
ulong dynamic_variables_version;
char* dynamic_variables_ptr;
uint dynamic_variables_head; /* largest valid variable offset */
uint dynamic_variables_size; /* how many bytes are in use */
-
+
ulonglong myisam_max_extra_sort_file_size;
ulonglong myisam_max_sort_file_size;
ulonglong max_heap_table_size;
@@ -309,9 +309,9 @@ struct system_variables
my_bool low_priority_updates;
my_bool new_mode;
- /*
+ /*
compatibility option:
- - index usage hints (USE INDEX without a FOR clause) behave as in 5.0
+ - index usage hints (USE INDEX without a FOR clause) behave as in 5.0
*/
my_bool old_mode;
my_bool query_cache_wlock_invalidate;
@@ -535,7 +535,7 @@ class Server_side_cursor;
- prepared, that is, contain placeholders,
- opened as cursors. We maintain 1 to 1 relationship between
statement and cursor - if user wants to create another cursor for his
- query, we create another statement for it.
+ query, we create another statement for it.
To perform some action with statement we reset THD part to the state of
that statement, do the action, and then save back modified state from THD
to the statement. It will be changed in near future, and Statement will
@@ -586,7 +586,7 @@ public:
it. We will see the query_length field as either 0, or the right value
for it.
Assuming that the write and read of an n-bit memory field in an n-bit
- computer is atomic, we can avoid races in the above way.
+ computer is atomic, we can avoid races in the above way.
This printing is needed at least in SHOW PROCESSLIST and SHOW INNODB
STATUS.
*/
@@ -738,7 +738,7 @@ public:
{
return (*priv_host ? priv_host : (char *)"%");
}
-
+
bool set_user(char *user_arg);
#ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -1141,7 +1141,7 @@ public:
String packet; // dynamic buffer for network I/O
String convert_buffer; // buffer for charset conversions
struct sockaddr_in remote; // client socket address
- struct rand_struct rand; // used for authentication
+ struct my_rnd_struct rand; // used for authentication
struct system_variables variables; // Changeable local variables
struct system_status_var status_var; // Per thread statistic vars
struct system_status_var *initial_status_var; /* used by show status */
@@ -1204,7 +1204,7 @@ public:
/*
One thread can hold up to one named user-level lock. This variable
points to a lock object if the lock is present. See item_func.cc and
- chapter 'Miscellaneous functions', for functions GET_LOCK, RELEASE_LOCK.
+ chapter 'Miscellaneous functions', for functions GET_LOCK, RELEASE_LOCK.
*/
User_level_lock *ull;
#ifndef DBUG_OFF
@@ -1223,7 +1223,7 @@ public:
time_t start_time, user_time;
ulonglong connect_utime, thr_create_utime; // track down slow pthread_create
ulonglong start_utime, utime_after_lock;
-
+
thr_lock_type update_lock_default;
Delayed_insert *di;
@@ -1618,7 +1618,7 @@ public:
*/
bool is_slave_error;
bool bootstrap, cleanup_done;
-
+
/** is set if some thread specific value(s) used in a statement. */
bool thread_specific_used;
bool charset_is_system_charset, charset_is_collation_connection;
@@ -1650,10 +1650,10 @@ public:
ulong ulong_value;
ulonglong ulonglong_value;
} sys_var_tmp;
-
+
struct {
- /*
- If true, mysql_bin_log::write(Log_event) call will not write events to
+ /*
+ If true, mysql_bin_log::write(Log_event) call will not write events to
binlog, and maintain 2 below variables instead (use
mysql_bin_log.start_union_events to turn this on)
*/
@@ -1664,13 +1664,13 @@ public:
*/
bool unioned_events;
/*
- If TRUE, at least one mysql_bin_log::write(Log_event e), where
- e.cache_stmt == TRUE call has been made after last
+ If TRUE, at least one mysql_bin_log::write(Log_event e), where
+ e.cache_stmt == TRUE call has been made after last
mysql_bin_log.start_union_events() call.
*/
bool unioned_events_trans;
-
- /*
+
+ /*
'queries' (actually SP statements) that run under inside this binlog
union have thd->query_id >= first_query_id.
*/
@@ -1701,7 +1701,7 @@ public:
killing mysqld) where it's vital to not allocate excessive and not used
memory. Note, that we still don't return error from init_for_queries():
if preallocation fails, we should notice that at the first call to
- alloc_root.
+ alloc_root.
*/
void init_for_queries();
void change_user(void);
@@ -1731,12 +1731,12 @@ public:
The query can be logged row-based or statement-based
*/
ROW_QUERY_TYPE,
-
+
/*
The query has to be logged statement-based
*/
STMT_QUERY_TYPE,
-
+
/*
The query represents a change to a table in the "mysql"
database and is currently mapped to ROW_QUERY_TYPE.
@@ -1744,7 +1744,7 @@ public:
MYSQL_QUERY_TYPE,
QUERY_TYPE_COUNT
};
-
+
int binlog_query(enum_binlog_query_type qtype,
char const *query, ulong query_len,
bool is_trans, bool suppress_use,
@@ -1990,7 +1990,7 @@ public:
if ((temporary_tables == NULL) && (in_sub_stmt == 0) &&
(system_thread != SYSTEM_THREAD_NDBCLUSTER_BINLOG))
{
- current_stmt_binlog_row_based=
+ current_stmt_binlog_row_based=
test(variables.binlog_format == BINLOG_FORMAT_ROW);
}
}
@@ -2370,10 +2370,17 @@ public:
int prepare2(void) { return 0; }
};
+
+#ifdef WITH_MARIA_STORAGE_ENGINE
+#include <maria.h>
+#define ENGINE_COLUMNDEF MARIA_COLUMNDEF
+#else
#include <myisam.h>
+#define ENGINE_COLUMNDEF MI_COLUMNDEF
+#endif
-/*
- Param to create temporary tables when doing SELECT:s
+/*
+ Param to create temporary tables when doing SELECT:s
NOTE
This structure is copied using memcpy as a part of JOIN.
*/
@@ -2392,7 +2399,7 @@ public:
Copy_field *save_copy_field, *save_copy_field_end;
uchar *group_buff;
Item **items_to_copy; /* Fields in tmp table */
- MI_COLUMNDEF *recinfo,*start_recinfo;
+ ENGINE_COLUMNDEF *recinfo, *start_recinfo;
KEY *keyinfo;
ha_rows end_write_records;
uint field_count,sum_func_count,func_count;
@@ -2401,8 +2408,8 @@ public:
uint quick_group;
bool using_indirect_summary_function;
/* If >0 convert all blob fields to varchar(convert_blob_length) */
- uint convert_blob_length;
- CHARSET_INFO *table_charset;
+ uint convert_blob_length;
+ CHARSET_INFO *table_charset;
bool schema_table;
/*
True if GROUP BY and its aggregate functions are already computed
@@ -2536,7 +2543,7 @@ public:
else
db= db_arg;
}
- inline Table_ident(LEX_STRING table_arg)
+ inline Table_ident(LEX_STRING table_arg)
:table(table_arg), sel((SELECT_LEX_UNIT *)0)
{
db.str=0;
@@ -2582,7 +2589,7 @@ class user_var_entry
};
/*
- Unique -- class for unique (removing of duplicates).
+ Unique -- class for unique (removing of duplicates).
Puts all values to the TREE. If the tree becomes too big,
it's dumped to the file. User can request sorted values, or
just iterate through them. In the last case tree merging is performed in
@@ -2616,9 +2623,9 @@ public:
}
bool get(TABLE *table);
- static double get_use_cost(uint *buffer, uint nkeys, uint key_size,
+ static double get_use_cost(uint *buffer, uint nkeys, uint key_size,
ulonglong max_in_memory_size);
- inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size,
+ inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size,
ulonglong max_in_memory_size)
{
register ulonglong max_elems_in_tree=
@@ -2679,7 +2686,7 @@ class multi_update :public select_result_interceptor
uint table_count;
/*
List of tables referenced in the CHECK OPTION condition of
- the updated view excluding the updated table.
+ the updated view excluding the updated table.
*/
List <TABLE> unupdated_check_opt_tables;
Copy_field *copy_field;
diff --git a/sql/sql_crypt.cc b/sql/sql_crypt.cc
index ebd424f00f0..53babf517c1 100644
--- a/sql/sql_crypt.cc
+++ b/sql/sql_crypt.cc
@@ -38,7 +38,7 @@ SQL_CRYPT::SQL_CRYPT(const char *password)
void SQL_CRYPT::crypt_init(ulong *rand_nr)
{
uint i;
- randominit(&rand,rand_nr[0],rand_nr[1]);
+ my_rnd_init(&rand,rand_nr[0],rand_nr[1]);
for (i=0 ; i<=255; i++)
decode_buff[i]= (char) i;
diff --git a/sql/sql_crypt.h b/sql/sql_crypt.h
index f3db9adde25..7d803245b0b 100644
--- a/sql/sql_crypt.h
+++ b/sql/sql_crypt.h
@@ -20,7 +20,7 @@
class SQL_CRYPT :public Sql_alloc
{
- struct rand_struct rand,org_rand;
+ struct my_rnd_struct rand,org_rand;
char decode_buff[256],encode_buff[256];
uint shift;
void crypt_init(ulong *seed);
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 56748772523..1f6216efb2e 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -961,6 +961,7 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok)
DBUG_ENTER("mysql_truncate");
bzero((char*) &create_info,sizeof(create_info));
+
/* If it is a temporary table, close and regenerate it */
if (!dont_send_ok && (table= find_temporary_table(thd, table_list)))
{
@@ -970,7 +971,8 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok)
goto trunc_by_del;
table->file->info(HA_STATUS_AUTO | HA_STATUS_NO_LOCK);
-
+
+ create_info.options|= HA_LEX_CREATE_TMP_TABLE;
close_temporary_table(thd, table, 0, 0); // Don't free share
ha_create_table(thd, share->normalized_path.str,
share->db.str, share->table_name.str, &create_info, 1);
@@ -1001,7 +1003,8 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok)
table_list->db, table_list->table_name);
DBUG_RETURN(TRUE);
}
- if (!ha_check_storage_engine_flag(ha_resolve_by_legacy_type(thd, table_type),
+ if (!ha_check_storage_engine_flag(ha_resolve_by_legacy_type(thd,
+ table_type),
HTON_CAN_RECREATE))
goto trunc_by_del;
@@ -1009,9 +1012,11 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok)
DBUG_RETURN(TRUE);
}
- // Remove the .frm extension AIX 5.2 64-bit compiler bug (BUG#16155): this
- // crashes, replacement works. *(path + path_length - reg_ext_length)=
- // '\0';
+ /*
+ Remove the .frm extension AIX 5.2 64-bit compiler bug (BUG#16155): this
+ crashes, replacement works. *(path + path_length - reg_ext_length)=
+ '\0';
+ */
path[path_length - reg_ext_length] = 0;
VOID(pthread_mutex_lock(&LOCK_open));
error= ha_create_table(thd, path, table_list->db, table_list->table_name,
@@ -1046,12 +1051,15 @@ end:
trunc_by_del:
/* Probably InnoDB table */
ulonglong save_options= thd->options;
+ bool save_binlog_row_based= thd->current_stmt_binlog_row_based;
+
table_list->lock_type= TL_WRITE;
thd->options&= ~(OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT);
ha_enable_transaction(thd, FALSE);
mysql_init_select(thd->lex);
- bool save_binlog_row_based= thd->current_stmt_binlog_row_based;
thd->clear_current_stmt_binlog_row_based();
+
+ /* Delete all rows from table */
error= mysql_delete(thd, table_list, (COND*) 0, (SQL_LIST*) 0,
HA_POS_ERROR, LL(0), TRUE);
ha_enable_transaction(thd, TRUE);
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index f9dbd402de7..6a7356f17ee 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -3516,6 +3516,15 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
thd->binlog_start_trans_and_stmt();
}
+ /*
+ If error during the CREATE SELECT we drop the table, so no need for
+ engines to do logging of insertions (optimization). We don't do it for
+ temporary tables (yet) as re-enabling causes an undesirable commit.
+ */
+ if (((thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) == 0) &&
+ ha_enable_transaction(thd, FALSE))
+ DBUG_RETURN(-1);
+
if (!(table= create_table_from_items(thd, create_info, create_table,
alter_info, &values,
&extra_lock, hook_ptr)))
@@ -3667,8 +3676,10 @@ bool select_create::send_eof()
nevertheless.
*/
if (!table->s->tmp_table)
+ {
+ ha_enable_transaction(thd, TRUE);
ha_commit(thd); // Can fail, but we proceed anyway
-
+ }
table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
if (m_plock)
@@ -3694,6 +3705,9 @@ void select_create::abort()
select_insert::abort();
reenable_binlog(thd);
+ if (table && !table->s->tmp_table)
+ ha_enable_transaction(thd, TRUE);
+
/*
We roll back the statement, including truncating the transaction
cache of the binary log, if the statement failed.
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index ecee3fcb97f..22d9b057e96 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -4883,8 +4883,7 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
(see SQLCOM_GRANT case, mysql_execute_command() function) and
set db_is_pattern according to 'dont_check_global_grants' value.
*/
- bool db_is_pattern= (test(want_access & GRANT_ACL) &&
- dont_check_global_grants);
+ bool db_is_pattern= ((want_access & GRANT_ACL) && dont_check_global_grants);
ulong dummy;
DBUG_ENTER("check_access");
DBUG_PRINT("enter",("db: %s want_access: %lu master_access: %lu",
@@ -5283,10 +5282,10 @@ bool check_stack_overrun(THD *thd, long margin,
long stack_used;
DBUG_ASSERT(thd == current_thd);
if ((stack_used=used_stack(thd->thread_stack,(char*) &stack_used)) >=
- (long) (thread_stack - margin))
+ (long) (my_thread_stack_size - margin))
{
sprintf(errbuff[0],ER(ER_STACK_OVERRUN_NEED_MORE),
- stack_used,thread_stack,margin);
+ stack_used,my_thread_stack_size,margin);
my_message(ER_STACK_OVERRUN_NEED_MORE,errbuff[0],MYF(0));
thd->fatal_error();
return 1;
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 2a86844c8c6..7ed3cd057d5 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -633,7 +633,7 @@ static plugin_ref intern_plugin_lock(LEX *lex, plugin_ref rc CALLER_INFO_PROTO)
*plugin= pi;
#endif
pi->ref_count++;
- DBUG_PRINT("info",("thd: 0x%lx, plugin: \"%s\", ref_count: %d",
+ DBUG_PRINT("info",("thd: 0x%lx plugin: \"%s\" ref_count: %d",
(long) current_thd, pi->name.str, pi->ref_count));
if (lex)
@@ -1937,7 +1937,6 @@ static int check_func_longlong(THD *thd, struct st_mysql_sys_var *var,
struct my_option options;
value->val_int(value, &tmp);
plugin_opt_set_limits(&options, var);
- *(ulonglong *)save= getopt_ull_limit_value(tmp, &options, &fixed);
if (var->flags & PLUGIN_VAR_UNSIGNED)
*(ulonglong *)save= getopt_ull_limit_value((ulonglong) tmp, &options,
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c735f70529e..3267df95b63 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -31,10 +31,16 @@
#include "mysql_priv.h"
#include "sql_select.h"
#include "sql_cursor.h"
-
#include <m_ctype.h>
+#include <my_bit.h>
#include <hash.h>
#include <ft_global.h>
+#ifdef WITH_MARIA_STORAGE_ENGINE
+#include "../storage/maria/ha_maria.h"
+#define TMP_ENGINE_HTON maria_hton
+#else
+#define TMP_ENGINE_HTON myisam_hton
+#endif
const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
"MAYBE_REF","ALL","range","index","fulltext",
@@ -119,8 +125,14 @@ static COND *optimize_cond(JOIN *join, COND *conds,
Item::cond_result *cond_value);
static bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
static bool open_tmp_table(TABLE *table);
-static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
+static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
ulonglong options);
+static bool create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
+ TMP_TABLE_PARAM *param,
+ int error,
+ bool ignore_last_dupp,
+ handlerton *hton,
+ const char *proc_info);
static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
Procedure *proc);
@@ -9623,7 +9635,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
KEY *keyinfo;
KEY_PART_INFO *key_part_info;
Item **copy_func;
- MI_COLUMNDEF *recinfo;
+ ENGINE_COLUMNDEF *recinfo;
uint total_uneven_bit_length= 0;
bool force_copy_fields= param->force_copy_fields;
DBUG_ENTER("create_tmp_table");
@@ -9649,11 +9661,10 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
/*
No need to change table name to lower case as we are only creating
- MyISAM or HEAP tables here
+ MyISAM, Maria or HEAP tables here
*/
fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
-
if (group)
{
if (!param->quick_group)
@@ -9744,8 +9755,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
table->s= share;
init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
share->blob_field= blob_field;
- share->blob_ptr_size= mi_portable_sizeof_char_ptr;
- share->db_low_byte_first=1; // True for HEAP and MyISAM
+ share->blob_ptr_size= portable_sizeof_char_ptr;
+ share->db_low_byte_first=1; // True for HEAP, MyISAM and Maria
share->table_charset= param->table_charset;
share->primary_key= MAX_KEY; // Indicate no primary key
share->keys_for_keyread.init();
@@ -9877,6 +9888,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
*blob_field++= fieldnr;
blob_count++;
}
+ if (new_field->real_type() == MYSQL_TYPE_STRING ||
+ new_field->real_type() == MYSQL_TYPE_VARCHAR)
+ {
+ string_count++;
+ string_total_length+= new_field->pack_length();
+ }
if (item->marker == 4 && item->maybe_null)
{
group_null_items++;
@@ -9913,7 +9930,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
(select_options & (OPTION_BIG_TABLES | SELECT_SMALL_RESULT)) ==
OPTION_BIG_TABLES || (select_options & TMP_TABLE_FORCE_MYISAM))
{
- share->db_plugin= ha_lock_engine(0, myisam_hton);
+ share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
table->file= get_new_handler(share, &table->mem_root,
share->db_type());
if (group &&
@@ -9930,7 +9947,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
if (!table->file)
goto err;
-
if (!using_unique_constraint)
reclength+= group_null_items; // null flag is stored separately
@@ -10066,13 +10082,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
/* Make entry for create table */
recinfo->length=length;
if (field->flags & BLOB_FLAG)
- recinfo->type= (int) FIELD_BLOB;
+ recinfo->type= FIELD_BLOB;
else if (use_packed_rows &&
field->real_type() == MYSQL_TYPE_STRING &&
length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
- recinfo->type=FIELD_SKIP_ENDSPACE;
+ recinfo->type= FIELD_SKIP_ENDSPACE;
+ else if (field->real_type() == MYSQL_TYPE_VARCHAR)
+ recinfo->type= FIELD_VARCHAR;
else
- recinfo->type=FIELD_NORMAL;
+ recinfo->type= FIELD_NORMAL;
+
if (!--hidden_field_count)
null_count=(null_count+7) & ~7; // move to next byte
@@ -10235,9 +10254,9 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
if (thd->is_fatal_error) // If end of memory
goto err; /* purecov: inspected */
share->db_record_offset= 1;
- if (share->db_type() == myisam_hton)
+ if (share->db_type() == TMP_ENGINE_HTON)
{
- if (create_myisam_tmp_table(table,param,select_options))
+ if (create_internal_tmp_table(table,param,select_options))
goto err;
}
if (open_tmp_table(table))
@@ -10306,7 +10325,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
table->s= share;
share->blob_field= blob_field;
share->fields= field_count;
- share->blob_ptr_size= mi_portable_sizeof_char_ptr;
+ share->blob_ptr_size= portable_sizeof_char_ptr;
setup_tmp_table_column_bitmaps(table, bitmaps);
/* Create all fields and calculate the total length of record */
@@ -10399,15 +10418,149 @@ static bool open_tmp_table(TABLE *table)
}
-static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
- ulonglong options)
+#ifdef WITH_MARIA_STORAGE_ENGINE
+
+/* Create internal Maria temporary table */
+
+static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
+ ulonglong options)
+{
+ int error;
+ MARIA_KEYDEF keydef;
+ MARIA_UNIQUEDEF uniquedef;
+ KEY *keyinfo=param->keyinfo;
+ TABLE_SHARE *share= table->s;
+ MARIA_CREATE_INFO create_info;
+ DBUG_ENTER("create_internal_tmp_table");
+
+ if (share->keys)
+ { // Get keys for ni_create
+ bool using_unique_constraint=0;
+ HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
+ sizeof(*seg) * keyinfo->key_parts);
+ if (!seg)
+ goto err;
+
+ bzero(seg, sizeof(*seg) * keyinfo->key_parts);
+ if (keyinfo->key_length >= table->file->max_key_length() ||
+ keyinfo->key_parts > table->file->max_key_parts() ||
+ share->uniques)
+ {
+ /* Can't create a key; Make a unique constraint instead of a key */
+ share->keys= 0;
+ share->uniques= 1;
+ using_unique_constraint=1;
+ bzero((char*) &uniquedef,sizeof(uniquedef));
+ uniquedef.keysegs=keyinfo->key_parts;
+ uniquedef.seg=seg;
+ uniquedef.null_are_equal=1;
+
+ /* Create extra column for hash value */
+ bzero((uchar*) param->recinfo,sizeof(*param->recinfo));
+ param->recinfo->type= FIELD_CHECK;
+ param->recinfo->length= MARIA_UNIQUE_HASH_LENGTH;
+ param->recinfo++;
+ share->reclength+= MARIA_UNIQUE_HASH_LENGTH;
+ }
+ else
+ {
+ /* Create an unique key */
+ bzero((char*) &keydef,sizeof(keydef));
+ keydef.flag=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
+ keydef.keysegs= keyinfo->key_parts;
+ keydef.seg= seg;
+ }
+ for (uint i=0; i < keyinfo->key_parts ; i++,seg++)
+ {
+ Field *field=keyinfo->key_part[i].field;
+ seg->flag= 0;
+ seg->language= field->charset()->number;
+ seg->length= keyinfo->key_part[i].length;
+ seg->start= keyinfo->key_part[i].offset;
+ if (field->flags & BLOB_FLAG)
+ {
+ seg->type=
+ ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
+ HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
+ seg->bit_start= (uint8)(field->pack_length() - share->blob_ptr_size);
+ seg->flag= HA_BLOB_PART;
+ seg->length=0; // Whole blob in unique constraint
+ }
+ else
+ {
+ seg->type= keyinfo->key_part[i].type;
+ /* Tell handler if it can do suffic space compression */
+ if (field->real_type() == MYSQL_TYPE_STRING &&
+ keyinfo->key_part[i].length > 4)
+ seg->flag|= HA_SPACE_PACK;
+ }
+ if (!(field->flags & NOT_NULL_FLAG))
+ {
+ seg->null_bit= field->null_bit;
+ seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
+ /*
+ We are using a GROUP BY on something that contains NULL
+ In this case we have to tell Maria that two NULL should
+ on INSERT be regarded at the same value
+ */
+ if (!using_unique_constraint)
+ keydef.flag|= HA_NULL_ARE_EQUAL;
+ }
+ }
+ }
+ bzero((char*) &create_info,sizeof(create_info));
+
+ if ((options & (OPTION_BIG_TABLES | SELECT_SMALL_RESULT)) ==
+ OPTION_BIG_TABLES)
+ create_info.data_file_length= ~(ulonglong) 0;
+
+ if ((error= maria_create(share->table_name.str,
+ share->reclength < 64 &&
+ !share->blob_fields ? STATIC_RECORD :
+ BLOCK_RECORD,
+ share->keys, &keydef,
+ (uint) (param->recinfo-param->start_recinfo),
+ param->start_recinfo,
+ share->uniques, &uniquedef,
+ &create_info,
+ HA_CREATE_TMP_TABLE)))
+ {
+ table->file->print_error(error,MYF(0)); /* purecov: inspected */
+ table->db_stat=0;
+ goto err;
+ }
+ status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
+ share->db_record_offset= 1;
+ DBUG_RETURN(0);
+ err:
+ DBUG_RETURN(1);
+}
+
+
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+ TMP_TABLE_PARAM *param,
+ int error,
+ bool ignore_last_dupp_key_error)
+{
+ return create_internal_tmp_table_from_heap2(thd, table, param, error,
+ ignore_last_dupp_key_error,
+ maria_hton,
+ "converting HEAP to Maria");
+}
+
+#else
+
+/* Create internal MyISAM temporary table */
+
+static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
+ ulonglong options)
{
int error;
MI_KEYDEF keydef;
MI_UNIQUEDEF uniquedef;
KEY *keyinfo=param->keyinfo;
TABLE_SHARE *share= table->s;
- DBUG_ENTER("create_myisam_tmp_table");
+ DBUG_ENTER("create_internal_tmp_table");
if (share->keys)
{ // Get keys for ni_create
@@ -10510,55 +10663,43 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
}
-void
-free_tmp_table(THD *thd, TABLE *entry)
-{
- MEM_ROOT own_root= entry->mem_root;
- const char *save_proc_info;
- DBUG_ENTER("free_tmp_table");
- DBUG_PRINT("enter",("table: %s",entry->alias));
-
- save_proc_info=thd->proc_info;
- thd_proc_info(thd, "removing tmp table");
-
- if (entry->file)
- {
- if (entry->db_stat)
- entry->file->drop_table(entry->s->table_name.str);
- else
- entry->file->delete_table(entry->s->table_name.str);
- delete entry->file;
- }
-
- /* free blobs */
- for (Field **ptr=entry->field ; *ptr ; ptr++)
- (*ptr)->free();
- free_io_cache(entry);
-
- if (entry->temp_pool_slot != MY_BIT_NONE)
- bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+/**
+ If a HEAP table gets full, create a MyISAM table and copy all rows to this
+*/
- plugin_unlock(0, entry->s->db_plugin);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+ TMP_TABLE_PARAM *param,
+ int error,
+ bool ignore_last_dupp_key_error)
+{
+ return create_internal_tmp_table_from_heap2(thd, table, param, error,
+ ignore_last_dupp_key_error,
+ myisam_hton,
+ "converting HEAP to MyISAM");
+}
- free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
- thd_proc_info(thd, save_proc_info);
+#endif /* WITH_MARIA_STORAGE_ENGINE */
- DBUG_VOID_RETURN;
-}
-/**
- If a HEAP table gets full, create a MyISAM table and copy all rows
- to this.
+/*
+ If a HEAP table gets full, create a internal table in MyISAM or Maria
+ and copy all rows to this
*/
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
- int error, bool ignore_last_dupp_key_error)
+
+static bool
+create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
+ TMP_TABLE_PARAM *param,
+ int error,
+ bool ignore_last_dupp_key_error,
+ handlerton *hton,
+ const char *proc_info)
{
TABLE new_table;
TABLE_SHARE share;
const char *save_proc_info;
int write_err;
- DBUG_ENTER("create_myisam_from_heap");
+ DBUG_ENTER("create_internal_tmp_table_from_heap2");
if (table->s->db_type() != heap_hton ||
error != HA_ERR_RECORD_FILE_FULL)
@@ -10569,15 +10710,15 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
new_table= *table;
share= *table->s;
new_table.s= &share;
- new_table.s->db_plugin= ha_lock_engine(thd, myisam_hton);
+ new_table.s->db_plugin= ha_lock_engine(thd, hton);
if (!(new_table.file= get_new_handler(&share, &new_table.mem_root,
new_table.s->db_type())))
DBUG_RETURN(1); // End of memory
save_proc_info=thd->proc_info;
- thd_proc_info(thd, "converting HEAP to MyISAM");
+ thd_proc_info(thd, proc_info);
- if (create_myisam_tmp_table(&new_table, param,
+ if (create_internal_tmp_table(&new_table, param,
thd->lex->select_lex.options | thd->options))
goto err2;
if (open_tmp_table(&new_table))
@@ -10640,7 +10781,7 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
table->use_all_columns();
if (save_proc_info)
thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
- "Copying to tmp table on disk" : save_proc_info));
+ "Copying to tmp table on disk" : save_proc_info));
DBUG_RETURN(0);
err:
@@ -10658,6 +10799,43 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
}
+void
+free_tmp_table(THD *thd, TABLE *entry)
+{
+ MEM_ROOT own_root= entry->mem_root;
+ const char *save_proc_info;
+ DBUG_ENTER("free_tmp_table");
+ DBUG_PRINT("enter",("table: %s",entry->alias));
+
+ save_proc_info=thd->proc_info;
+ thd_proc_info(thd, "removing tmp table");
+
+ if (entry->file)
+ {
+ if (entry->db_stat)
+ entry->file->drop_table(entry->s->table_name.str);
+ else
+ entry->file->delete_table(entry->s->table_name.str);
+ delete entry->file;
+ }
+
+ /* free blobs */
+ for (Field **ptr=entry->field ; *ptr ; ptr++)
+ (*ptr)->free();
+ free_io_cache(entry);
+
+ if (entry->temp_pool_slot != MY_BIT_NONE)
+ bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+
+ plugin_unlock(0, entry->s->db_plugin);
+
+ free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
+ thd_proc_info(thd, save_proc_info);
+
+ DBUG_VOID_RETURN;
+}
+
+
/**
@details
Rows produced by a join sweep may end up in a temporary table or be sent
@@ -12095,7 +12273,7 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
{
if (!table->file->is_fatal_error(error, HA_CHECK_DUP))
goto end;
- if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
+ if (create_internal_tmp_table_from_heap(join->thd, table, &join->tmp_table_param,
error,1))
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
table->s->uniques=0; // To ensure rows are the same
@@ -12179,7 +12357,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
copy_funcs(join->tmp_table_param.items_to_copy);
if ((error=table->file->write_row(table->record[0])))
{
- if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
+ if (create_internal_tmp_table_from_heap(join->thd, table, &join->tmp_table_param,
error, 0))
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
/* Change method to update rows */
@@ -12274,7 +12452,7 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
if (!join->having || join->having->val_int())
{
int error= table->file->write_row(table->record[0]);
- if (error && create_myisam_from_heap(join->thd, table,
+ if (error && create_internal_tmp_table_from_heap(join->thd, table,
&join->tmp_table_param,
error, 0))
DBUG_RETURN(NESTED_LOOP_ERROR);
@@ -13534,13 +13712,14 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
else if (!found)
{
found=1;
- file->position(record); // Remember position
+ if ((error= file->remember_rnd_pos()))
+ goto err;
}
}
if (!found)
break; // End of file
- /* Restart search on next row */
- error=file->restart_rnd_next(record,file->ref);
+ /* Restart search on saved row */
+ error=file->restart_rnd_next(record);
}
file->extra(HA_EXTRA_NO_CACHE);
@@ -15670,7 +15849,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
if ((write_error= table_arg->file->write_row(table_arg->record[0])))
{
- if (create_myisam_from_heap(thd, table_arg, &tmp_table_param,
+ if (create_internal_tmp_table_from_heap(thd, table_arg, &tmp_table_param,
write_error, 0))
return 1;
}
diff --git a/sql/sql_select.h b/sql/sql_select.h
index dbeace2ffa4..687ce575ebd 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -547,7 +547,7 @@ bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
uint elements, List<Item> &fields);
void copy_fields(TMP_TABLE_PARAM *param);
void copy_funcs(Item **func_ptr);
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
int error, bool ignore_last_dupp_error);
uint find_shortest_key(TABLE *table, const key_map *usable_keys);
Field* create_tmp_field_from_field(THD *thd, Field* org_field,
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 177e84ab0a7..6614672dca6 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -72,6 +72,9 @@ static TYPELIB grant_types = { sizeof(grant_names)/sizeof(char **),
grant_names, NULL};
#endif
+/* Match the values of enum ha_choice */
+static const char *ha_choice_values[] = {"", "0", "1"};
+
static void store_key_options(THD *thd, String *packet, TABLE *table,
KEY *key_info);
@@ -164,15 +167,15 @@ static my_bool show_plugins(THD *thd, plugin_ref plugin,
switch (plug->license) {
case PLUGIN_LICENSE_GPL:
- table->field[9]->store(PLUGIN_LICENSE_GPL_STRING,
+ table->field[9]->store(PLUGIN_LICENSE_GPL_STRING,
strlen(PLUGIN_LICENSE_GPL_STRING), cs);
break;
case PLUGIN_LICENSE_BSD:
- table->field[9]->store(PLUGIN_LICENSE_BSD_STRING,
+ table->field[9]->store(PLUGIN_LICENSE_BSD_STRING,
strlen(PLUGIN_LICENSE_BSD_STRING), cs);
break;
default:
- table->field[9]->store(PLUGIN_LICENSE_PROPRIETARY_STRING,
+ table->field[9]->store(PLUGIN_LICENSE_PROPRIETARY_STRING,
strlen(PLUGIN_LICENSE_PROPRIETARY_STRING), cs);
break;
}
@@ -480,7 +483,7 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
file=dirp->dir_entry+i;
if (dir)
{ /* Return databases */
- if ((file->name[0] == '.' &&
+ if ((file->name[0] == '.' &&
((file->name[1] == '.' && file->name[2] == '\0') ||
file->name[1] == '\0')))
continue; /* . or .. */
@@ -506,7 +509,7 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
file_name_len= filename_to_tablename(file->name, uname, sizeof(uname));
if (wild && wild_compare(uname, wild, 0))
continue;
- if (!(file_name=
+ if (!(file_name=
thd->make_lex_string(file_name, uname, file_name_len, TRUE)))
{
my_dirend(dirp);
@@ -545,7 +548,7 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
continue;
}
#endif
- if (!(file_name=
+ if (!(file_name=
thd->make_lex_string(file_name, uname, file_name_len, TRUE)) ||
files->push_back(file_name))
{
@@ -584,7 +587,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
/*
Clear all messages with 'error' level status and
- issue a warning with 'warning' level status in
+ issue a warning with 'warning' level status in
case of invalid view and last error is ER_VIEW_INVALID
*/
mysql_reset_errors(thd, true);
@@ -774,7 +777,7 @@ mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild)
Field **ptr,*field;
for (ptr=table->field ; (field= *ptr); ptr++)
{
- if (!wild || !wild[0] ||
+ if (!wild || !wild[0] ||
!wild_case_compare(system_charset_info, field->field_name,wild))
{
if (table_list->view)
@@ -984,13 +987,13 @@ static bool get_field_default_value(THD *thd, TABLE *table,
bool has_default;
bool has_now_default;
- /*
+ /*
We are using CURRENT_TIMESTAMP instead of NOW because it is
more standard
*/
- has_now_default= table->timestamp_field == field &&
+ has_now_default= table->timestamp_field == field &&
field->unireg_check != Field::TIMESTAMP_UN_FIELD;
-
+
has_default= (field->type() != FIELD_TYPE_BLOB &&
!(field->flags & NO_DEFAULT_VALUE_FLAG) &&
field->unireg_check != Field::NEXT_NUMBER &&
@@ -1045,11 +1048,11 @@ static bool get_field_default_value(THD *thd, TABLE *table,
to tailor the format of the statement. Can be
NULL, in which case only SQL_MODE is considered
when building the statement.
-
+
NOTE
Currently always return 0, but might return error code in the
future.
-
+
RETURN
0 OK
*/
@@ -1131,7 +1134,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
field->sql_type(type);
packet->append(type.ptr(), type.length(), system_charset_info);
- if (field->has_charset() &&
+ if (field->has_charset() &&
!(thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)))
{
if (field->charset() != share->table_charset)
@@ -1139,8 +1142,8 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
packet->append(STRING_WITH_LEN(" CHARACTER SET "));
packet->append(field->charset()->csname);
}
- /*
- For string types dump collation name only if
+ /*
+ For string types dump collation name only if
collation is not primary for the given charset
*/
if (!(field->charset()->state & MY_CS_PRIMARY))
@@ -1167,11 +1170,11 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
packet->append(def_value.ptr(), def_value.length(), system_charset_info);
}
- if (!limited_mysql_mode && table->timestamp_field == field &&
+ if (!limited_mysql_mode && table->timestamp_field == field &&
field->unireg_check != Field::TIMESTAMP_DN_FIELD)
packet->append(STRING_WITH_LEN(" ON UPDATE CURRENT_TIMESTAMP"));
- if (field->unireg_check == Field::NEXT_NUMBER &&
+ if (field->unireg_check == Field::NEXT_NUMBER &&
!(thd->variables.sql_mode & MODE_NO_FIELD_OPTIONS))
packet->append(STRING_WITH_LEN(" AUTO_INCREMENT"));
@@ -1184,6 +1187,8 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
key_info= table->key_info;
bzero((char*) &create_info, sizeof(create_info));
+ /* Allow update_create_info to update row type */
+ create_info.row_type= share->row_type;
file->update_create_info(&create_info);
primary_key= share->primary_key;
@@ -1318,7 +1323,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
packet->append(buff, (uint) (end - buff));
}
-
+
if (share->table_charset &&
!(thd->variables.sql_mode & MODE_MYSQL323) &&
!(thd->variables.sql_mode & MODE_MYSQL40))
@@ -1368,19 +1373,25 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
packet->append(STRING_WITH_LEN(" PACK_KEYS=1"));
if (share->db_create_options & HA_OPTION_NO_PACK_KEYS)
packet->append(STRING_WITH_LEN(" PACK_KEYS=0"));
+ /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compability */
if (share->db_create_options & HA_OPTION_CHECKSUM)
packet->append(STRING_WITH_LEN(" CHECKSUM=1"));
+ if (share->page_checksum != HA_CHOICE_UNDEF)
+ {
+ packet->append(STRING_WITH_LEN(" PAGE_CHECKSUM="));
+ packet->append(ha_choice_values[(uint) share->page_checksum], 1);
+ }
if (share->db_create_options & HA_OPTION_DELAY_KEY_WRITE)
packet->append(STRING_WITH_LEN(" DELAY_KEY_WRITE=1"));
- if (share->row_type != ROW_TYPE_DEFAULT)
+ if (create_info.row_type != ROW_TYPE_DEFAULT)
{
packet->append(STRING_WITH_LEN(" ROW_FORMAT="));
- packet->append(ha_row_type[(uint) share->row_type]);
+ packet->append(ha_row_type[(uint) create_info.row_type]);
}
if (share->transactional != HA_CHOICE_UNDEF)
{
packet->append(STRING_WITH_LEN(" TRANSACTIONAL="));
- packet->append(share->transactional == HA_CHOICE_YES ? "1" : "0", 1);
+ packet->append(ha_choice_values[(uint) share->transactional], 1);
}
if (table->s->key_block_size)
{
@@ -1483,7 +1494,7 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff)
/*
Append DEFINER clause to the given buffer.
-
+
SYNOPSIS
append_definer()
thd [in] thread handle
@@ -1512,7 +1523,7 @@ static void append_algorithm(TABLE_LIST *table, String *buff)
/*
Append DEFINER clause to the given buffer.
-
+
SYNOPSIS
append_definer()
thd [in] thread handle
@@ -1672,8 +1683,8 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
"%s:%u", tmp_sctx->host_or_ip, tmp->peer_port);
}
else
- thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ?
- tmp_sctx->host_or_ip :
+ thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ?
+ tmp_sctx->host_or_ip :
tmp_sctx->host ? tmp_sctx->host : "");
if ((thd_info->db=tmp->db)) // Safe test
thd_info->db=thd->strdup(thd_info->db);
@@ -1702,7 +1713,7 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
thd_info->query=0;
if (tmp->query)
{
- /*
+ /*
query_length is always set to 0 when we set query = NULL; see
the comment in sql_class.h why this prevents crashes in possible
races with query_length
@@ -1949,7 +1960,7 @@ void reset_status_vars()
/* Note that SHOW_LONG_NOFLUSH variables are not reset */
if (ptr->type == SHOW_LONG)
*(ulong*) ptr->value= 0;
- }
+ }
}
/*
@@ -2195,14 +2206,14 @@ void calc_sum_of_all_status(STATUS_VAR *to)
I_List_iterator<THD> it(threads);
THD *tmp;
-
+
/* Get global values as base */
*to= global_status_var;
-
+
/* Add to this status from existing threads */
while ((tmp= it++))
add_to_status(to, &tmp->status_var);
-
+
VOID(pthread_mutex_unlock(&LOCK_thread_count));
DBUG_VOID_RETURN;
}
@@ -2237,7 +2248,7 @@ bool schema_table_store_record(THD *thd, TABLE *table)
int error;
if ((error= table->file->ha_write_row(table->record[0])))
{
- if (create_myisam_from_heap(thd, table,
+ if (create_internal_tmp_table_from_heap(thd, table,
table->pos_in_table_list->schema_table_param,
error, 0))
return 1;
@@ -2259,17 +2270,17 @@ int make_table_list(THD *thd, SELECT_LEX *sel,
/**
- @brief Get lookup value from the part of 'WHERE' condition
+ @brief Get lookup value from the part of 'WHERE' condition
- @details This function gets lookup value from
- the part of 'WHERE' condition if it's possible and
+ @details This function gets lookup value from
+ the part of 'WHERE' condition if it's possible and
fill appropriate lookup_field_vals struct field
with this value.
@param[in] thd thread handler
@param[in] item_func part of WHERE condition
@param[in] table I_S table
- @param[in, out] lookup_field_vals Struct which holds lookup values
+ @param[in, out] lookup_field_vals Struct which holds lookup values
@return
0 success
@@ -2277,7 +2288,7 @@ int make_table_list(THD *thd, SELECT_LEX *sel,
*/
bool get_lookup_value(THD *thd, Item_func *item_func,
- TABLE_LIST *table,
+ TABLE_LIST *table,
LOOKUP_FIELD_VALUES *lookup_field_vals)
{
ST_SCHEMA_TABLE *schema_table= table->schema_table;
@@ -2343,16 +2354,16 @@ bool get_lookup_value(THD *thd, Item_func *item_func,
/**
- @brief Calculates lookup values from 'WHERE' condition
+ @brief Calculates lookup values from 'WHERE' condition
@details This function calculates lookup value(database name, table name)
- from 'WHERE' condition if it's possible and
+ from 'WHERE' condition if it's possible and
fill lookup_field_vals struct fields with these values.
@param[in] thd thread handler
@param[in] cond WHERE condition
@param[in] table I_S table
- @param[in, out] lookup_field_vals Struct which holds lookup values
+ @param[in, out] lookup_field_vals Struct which holds lookup values
@return
0 success
@@ -2501,7 +2512,7 @@ static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table)
@param[in] thd thread handler
@param[in] cond WHERE condition
@param[in] tables I_S table
- @param[in, out] lookup_field_values Struct which holds lookup values
+ @param[in, out] lookup_field_values Struct which holds lookup values
@return
0 success
@@ -2563,7 +2574,7 @@ enum enum_schema_tables get_schema_table_idx(ST_SCHEMA_TABLE *schema_table)
idx_field_vals idx_field_vals->db_name contains db name or
wild string
with_i_schema returns 1 if we added 'IS' name to list
- otherwise returns 0
+ otherwise returns 0
RETURN
zero success
@@ -2587,7 +2598,7 @@ int make_db_list(THD *thd, List<LEX_STRING> *files,
LIKE clause (see also get_index_field_values() function)
*/
if (!lookup_field_vals->db_value.str ||
- !wild_case_compare(system_charset_info,
+ !wild_case_compare(system_charset_info,
INFORMATION_SCHEMA_NAME.str,
lookup_field_vals->db_value.str))
{
@@ -2631,7 +2642,7 @@ int make_db_list(THD *thd, List<LEX_STRING> *files,
}
-struct st_add_schema_table
+struct st_add_schema_table
{
List<LEX_STRING> *files;
const char *wild;
@@ -2695,7 +2706,7 @@ int schema_tables_add(THD *thd, List<LEX_STRING> *files, const char *wild)
else if (wild_compare(tmp_schema_table->table_name, wild, 0))
continue;
}
- if ((file_name=
+ if ((file_name=
thd->make_lex_string(file_name, tmp_schema_table->table_name,
strlen(tmp_schema_table->table_name), TRUE)) &&
!files->push_back(file_name))
@@ -2751,7 +2762,7 @@ make_table_name_list(THD *thd, List<LEX_STRING> *table_names, LEX *lex,
}
}
else
- {
+ {
if (table_names->push_back(&lookup_field_vals->table_value))
return 1;
/*
@@ -2812,7 +2823,7 @@ make_table_name_list(THD *thd, List<LEX_STRING> *table_names, LEX *lex,
@retval 1 error
*/
-static int
+static int
fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
ST_SCHEMA_TABLE *schema_table,
Open_tables_state *open_tables_state_backup)
@@ -2839,7 +2850,7 @@ fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
Let us set fake sql_command so views won't try to merge
themselves into main statement. If we don't do this,
SELECT * from information_schema.xxxx will cause problems.
- SQLCOM_SHOW_FIELDS is used because it satisfies 'only_view_structure()'
+ SQLCOM_SHOW_FIELDS is used because it satisfies 'only_view_structure()'
*/
lex->sql_command= SQLCOM_SHOW_FIELDS;
res= open_normal_and_derived_tables(thd, show_table_list,
@@ -2849,11 +2860,11 @@ fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
get_all_tables() returns 1 on failure and 0 on success thus
return only these and not the result code of ::process_table()
- We should use show_table_list->alias instead of
+ We should use show_table_list->alias instead of
show_table_list->table_name because table_name
could be changed during opening of I_S tables. It's safe
- to use alias because alias contains original table name
- in this case(this part of code is used only for
+ to use alias because alias contains original table name
+ in this case(this part of code is used only for
'show columns' & 'show statistics' commands).
*/
table_name= thd->make_lex_string(&tmp_lex_string1, show_table_list->alias,
@@ -2863,7 +2874,7 @@ fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
show_table_list->db_length, FALSE);
else
db_name= &show_table_list->view_db;
-
+
error= test(schema_table->process_table(thd, show_table_list,
table, res, db_name,
@@ -2901,7 +2912,7 @@ static int fill_schema_table_names(THD *thd, TABLE *table,
{
enum legacy_db_type not_used;
char path[FN_REFLEN];
- (void) build_table_filename(path, sizeof(path), db_name->str,
+ (void) build_table_filename(path, sizeof(path), db_name->str,
table_name->str, reg_ext, 0);
switch (mysql_frm_type(thd, path, &not_used)) {
case FRMTYPE_ERROR:
@@ -2989,7 +3000,7 @@ static uint get_table_open_method(TABLE_LIST *tables,
*/
static int fill_schema_table_from_frm(THD *thd,TABLE *table,
- ST_SCHEMA_TABLE *schema_table,
+ ST_SCHEMA_TABLE *schema_table,
LEX_STRING *db_name,
LEX_STRING *table_name,
enum enum_schema_tables schema_table_idx)
@@ -3016,7 +3027,7 @@ static int fill_schema_table_from_frm(THD *thd,TABLE *table,
res= 0;
goto err;
}
-
+
if (share->is_view)
{
if (schema_table->i_s_requested_object & OPEN_TABLE_ONLY)
@@ -3028,7 +3039,7 @@ static int fill_schema_table_from_frm(THD *thd,TABLE *table,
else if (schema_table->i_s_requested_object & OPEN_VIEW_FULL)
{
/*
- tell get_all_tables() to fall back to
+ tell get_all_tables() to fall back to
open_normal_and_derived_tables()
*/
res= 1;
@@ -3097,7 +3108,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
List<LEX_STRING> db_names;
List_iterator_fast<LEX_STRING> it(db_names);
COND *partial_cond= 0;
- uint derived_tables= lex->derived_tables;
+ uint derived_tables= lex->derived_tables;
int error= 1;
Open_tables_state open_tables_state_backup;
bool save_view_prepare_mode= lex->view_prepare_mode;
@@ -3118,7 +3129,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
*/
thd->reset_n_backup_open_tables_state(&open_tables_state_backup);
- /*
+ /*
this branch processes SHOW FIELDS, SHOW INDEXES commands.
see sql_parse.cc, prepare_schema_table() function where
this values are initialized
@@ -3142,7 +3153,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
if (!lookup_field_vals.wild_db_value && !lookup_field_vals.wild_table_value)
{
- /*
+ /*
if lookup value is empty string then
it's impossible table name or db name
*/
@@ -3160,7 +3171,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
!lookup_field_vals.wild_db_value)
tables->has_db_lookup_value= TRUE;
if (lookup_field_vals.table_value.length &&
- !lookup_field_vals.wild_table_value)
+ !lookup_field_vals.wild_table_value)
tables->has_table_lookup_value= TRUE;
if (tables->has_db_lookup_value && tables->has_table_lookup_value)
@@ -3184,7 +3195,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
while ((db_name= it++))
{
#ifndef NO_EMBEDDED_ACCESS_CHECKS
- if (!check_access(thd,SELECT_ACL, db_name->str,
+ if (!check_access(thd,SELECT_ACL, db_name->str,
&thd->col_access, 0, 1, with_i_schema) ||
sctx->master_access & (DB_ACLS | SHOW_DB_ACL) ||
acl_get(sctx->host, sctx->ip, sctx->priv_user, db_name->str, 0) ||
@@ -3214,7 +3225,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
{
/*
If table is I_S.tables and open_table_method is 0 (eg SKIP_OPEN)
- we can skip table opening and we don't have lookup value for
+ we can skip table opening and we don't have lookup value for
table name or lookup value is wild string(table name list is
already created by make_table_name_list() function).
*/
@@ -3236,7 +3247,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
}
else
{
- if (!(table_open_method & ~OPEN_FRM_ONLY) &&
+ if (!(table_open_method & ~OPEN_FRM_ONLY) &&
!with_i_schema)
{
if (!fill_schema_table_from_frm(thd, table, schema_table, db_name,
@@ -3290,10 +3301,10 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
else
{
/*
- We should use show_table_list->alias instead of
+ We should use show_table_list->alias instead of
show_table_list->table_name because table_name
could be changed during opening of I_S tables. It's safe
- to use alias because alias contains original table name
+ to use alias because alias contains original table name
in this case.
*/
thd->make_lex_string(&tmp_lex_string, show_table_list->alias,
@@ -3494,12 +3505,16 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
ptr=strmov(ptr," pack_keys=1");
if (share->db_create_options & HA_OPTION_NO_PACK_KEYS)
ptr=strmov(ptr," pack_keys=0");
+ /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compability */
if (share->db_create_options & HA_OPTION_CHECKSUM)
ptr=strmov(ptr," checksum=1");
+ if (share->page_checksum != HA_CHOICE_UNDEF)
+ ptr= strxmov(ptr, " page_checksum=",
+ ha_choice_values[(uint) share->page_checksum], NullS);
if (share->db_create_options & HA_OPTION_DELAY_KEY_WRITE)
ptr=strmov(ptr," delay_key_write=1");
if (share->row_type != ROW_TYPE_DEFAULT)
- ptr=strxmov(ptr, " row_format=",
+ ptr=strxmov(ptr, " row_format=",
ha_row_type[(uint) share->row_type],
NullS);
if (share->transactional != HA_CHOICE_UNDEF)
@@ -3509,13 +3524,16 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
NullS);
}
#ifdef WITH_PARTITION_STORAGE_ENGINE
- if (show_table->s->db_type() == partition_hton &&
- show_table->part_info != NULL &&
+ if (show_table->s->db_type() == partition_hton &&
+ show_table->part_info != NULL &&
show_table->part_info->no_parts > 0)
ptr= strmov(ptr, " partitioned");
#endif
+ if (share->transactional != HA_CHOICE_UNDEF)
+ ptr= strxmov(ptr, " transactional=",
+ ha_choice_values[(uint) share->transactional], NullS);
table->field[19]->store(option_buff+1,
- (ptr == option_buff ? 0 :
+ (ptr == option_buff ? 0 :
(uint) (ptr-option_buff)-1), cs);
tmp_buff= (share->table_charset ?
@@ -3630,7 +3648,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
/*
I.e. we are in SELECT FROM INFORMATION_SCHEMA.COLUMS
rather than in SHOW COLUMNS
- */
+ */
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
thd->main_da.sql_errno(), thd->main_da.message());
thd->clear_error();
@@ -3668,7 +3686,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
uint col_access;
check_access(thd,SELECT_ACL | EXTRA_ACL, db_name->str,
&tables->grant.privilege, 0, 0, test(tables->schema_table));
- col_access= get_column_grant(thd, &tables->grant,
+ col_access= get_column_grant(thd, &tables->grant,
db_name->str, table_name->str,
field->field_name) & COL_ACLS;
if (!tables->schema_table && !col_access)
@@ -3691,7 +3709,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
cs);
table->field[4]->store((longlong) count, TRUE);
field->sql_type(type);
- table->field[14]->store(type.ptr(), type.length(), cs);
+ table->field[14]->store(type.ptr(), type.length(), cs);
tmp_buff= strchr(type.ptr(), '(');
table->field[7]->store(type.ptr(),
(tmp_buff ? tmp_buff - type.ptr() :
@@ -3713,7 +3731,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
uint32 octet_max_length= field->max_display_length();
if (is_blob && octet_max_length != (uint32) 4294967295U)
octet_max_length /= field->charset()->mbmaxlen;
- longlong char_max_len= is_blob ?
+ longlong char_max_len= is_blob ?
(longlong) octet_max_length / field->charset()->mbminlen :
(longlong) octet_max_length / field->charset()->mbmaxlen;
table->field[8]->store(char_max_len, TRUE);
@@ -3746,7 +3764,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
field_length= field->max_display_length();
decimals= -1; // return NULL
break;
- case MYSQL_TYPE_FLOAT:
+ case MYSQL_TYPE_FLOAT:
case MYSQL_TYPE_DOUBLE:
field_length= field->field_length;
if (decimals == NOT_FIXED_DEC)
@@ -3812,7 +3830,7 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond)
for (cs= all_charsets ; cs < all_charsets+255 ; cs++)
{
CHARSET_INFO *tmp_cs= cs[0];
- if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) &&
+ if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) &&
(tmp_cs->state & MY_CS_AVAILABLE) &&
!(tmp_cs->state & MY_CS_HIDDEN) &&
!(wild && wild[0] &&
@@ -3900,7 +3918,7 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond)
for (cl= all_charsets; cl < all_charsets+255 ;cl ++)
{
CHARSET_INFO *tmp_cl= cl[0];
- if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
+ if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
!my_charset_same(tmp_cs, tmp_cl))
continue;
if (!(wild && wild[0] &&
@@ -3934,13 +3952,13 @@ int fill_schema_coll_charset_app(THD *thd, TABLE_LIST *tables, COND *cond)
{
CHARSET_INFO **cl;
CHARSET_INFO *tmp_cs= cs[0];
- if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) ||
+ if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) ||
!(tmp_cs->state & MY_CS_PRIMARY))
continue;
for (cl= all_charsets; cl < all_charsets+255 ;cl ++)
{
CHARSET_INFO *tmp_cl= cl[0];
- if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
+ if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
!my_charset_same(tmp_cs,tmp_cl))
continue;
restore_record(table, s->default_values);
@@ -4006,7 +4024,7 @@ bool store_schema_proc(THD *thd, TABLE *table, TABLE *proc_table,
table->field[10]->store(STRING_WITH_LEN("SQL"), cs);
get_field(thd->mem_root, proc_table->field[6], &tmp_string);
table->field[11]->store(tmp_string.ptr(), tmp_string.length(), cs);
- table->field[12]->store(sp_data_access_name[enum_idx].str,
+ table->field[12]->store(sp_data_access_name[enum_idx].str,
sp_data_access_name[enum_idx].length , cs);
get_field(thd->mem_root, proc_table->field[7], &tmp_string);
table->field[14]->store(tmp_string.ptr(), tmp_string.length(), cs);
@@ -4299,10 +4317,10 @@ static int get_schema_views_record(THD *thd, TABLE_LIST *tables,
if (schema_table_store_record(thd, table))
DBUG_RETURN(1);
if (res && thd->is_error())
- push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
thd->main_da.sql_errno(), thd->main_da.message());
}
- if (res)
+ if (res)
thd->clear_error();
DBUG_RETURN(0);
}
@@ -4343,7 +4361,7 @@ static int get_schema_constraints_record(THD *thd, TABLE_LIST *tables,
TABLE *show_table= tables->table;
KEY *key_info=show_table->key_info;
uint primary_key= show_table->s->primary_key;
- show_table->file->info(HA_STATUS_VARIABLE |
+ show_table->file->info(HA_STATUS_VARIABLE |
HA_STATUS_NO_LOCK |
HA_STATUS_TIME);
for (uint i=0 ; i < show_table->s->keys ; i++, key_info++)
@@ -4372,7 +4390,7 @@ static int get_schema_constraints_record(THD *thd, TABLE_LIST *tables,
List_iterator_fast<FOREIGN_KEY_INFO> it(f_key_list);
while ((f_key_info=it++))
{
- if (store_constraints(thd, table, db_name, table_name,
+ if (store_constraints(thd, table, db_name, table_name,
f_key_info->forein_id->str,
strlen(f_key_info->forein_id->str),
"FOREIGN KEY", 11))
@@ -4531,7 +4549,7 @@ static int get_schema_key_column_usage_record(THD *thd,
TABLE *show_table= tables->table;
KEY *key_info=show_table->key_info;
uint primary_key= show_table->s->primary_key;
- show_table->file->info(HA_STATUS_VARIABLE |
+ show_table->file->info(HA_STATUS_VARIABLE |
HA_STATUS_NO_LOCK |
HA_STATUS_TIME);
for (uint i=0 ; i < show_table->s->keys ; i++, key_info++)
@@ -4548,8 +4566,8 @@ static int get_schema_key_column_usage_record(THD *thd,
restore_record(table, s->default_values);
store_key_column_usage(table, db_name, table_name,
key_info->name,
- strlen(key_info->name),
- key_part->field->field_name,
+ strlen(key_info->name),
+ key_part->field->field_name,
strlen(key_part->field->field_name),
(longlong) f_idx);
if (schema_table_store_record(thd, table))
@@ -4585,7 +4603,7 @@ static int get_schema_key_column_usage_record(THD *thd,
system_charset_info);
table->field[9]->set_notnull();
table->field[10]->store(f_key_info->referenced_table->str,
- f_key_info->referenced_table->length,
+ f_key_info->referenced_table->length,
system_charset_info);
table->field[10]->set_notnull();
table->field[11]->store(r_info->str, r_info->length,
@@ -4753,7 +4771,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
tmp_res.append(partition_keywords[PKW_KEY].str,
partition_keywords[PKW_KEY].length);
else
- tmp_res.append(partition_keywords[PKW_HASH].str,
+ tmp_res.append(partition_keywords[PKW_HASH].str,
partition_keywords[PKW_HASH].length);
table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs);
break;
@@ -4789,7 +4807,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
tmp_res.append(partition_keywords[PKW_KEY].str,
partition_keywords[PKW_KEY].length);
else
- tmp_res.append(partition_keywords[PKW_HASH].str,
+ tmp_res.append(partition_keywords[PKW_HASH].str,
partition_keywords[PKW_HASH].length);
table->field[8]->store(tmp_res.ptr(), tmp_res.length(), cs);
table->field[8]->set_notnull();
@@ -4868,7 +4886,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
/* SUBPARTITION_ORDINAL_POSITION */
table->field[6]->store((longlong) ++subpart_pos, TRUE);
table->field[6]->set_notnull();
-
+
store_schema_partitions_record(thd, table, show_table, subpart_elem,
file, part_id);
part_id++;
@@ -5086,7 +5104,7 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
else
sch_table->field[ISE_ON_COMPLETION]->
store(STRING_WITH_LEN("PRESERVE"), scs);
-
+
number_to_datetime(et.created, &time, 0, &not_used);
DBUG_ASSERT(not_used==0);
sch_table->field[ISE_CREATED]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
@@ -5207,7 +5225,7 @@ int fill_status(THD *thd, TABLE_LIST *tables, COND *cond)
tmp1= &tmp;
}
else
- {
+ {
option_type= OPT_SESSION;
tmp1= &thd->status_var;
}
@@ -5262,7 +5280,7 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
{
List<FOREIGN_KEY_INFO> f_key_list;
TABLE *show_table= tables->table;
- show_table->file->info(HA_STATUS_VARIABLE |
+ show_table->file->info(HA_STATUS_VARIABLE |
HA_STATUS_NO_LOCK |
HA_STATUS_TIME);
@@ -5276,16 +5294,16 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
table->field[9]->store(table_name->str, table_name->length, cs);
table->field[2]->store(f_key_info->forein_id->str,
f_key_info->forein_id->length, cs);
- table->field[4]->store(f_key_info->referenced_db->str,
+ table->field[4]->store(f_key_info->referenced_db->str,
f_key_info->referenced_db->length, cs);
- table->field[10]->store(f_key_info->referenced_table->str,
+ table->field[10]->store(f_key_info->referenced_table->str,
f_key_info->referenced_table->length, cs);
- table->field[5]->store(f_key_info->referenced_key_name->str,
+ table->field[5]->store(f_key_info->referenced_key_name->str,
f_key_info->referenced_key_name->length, cs);
table->field[6]->store(STRING_WITH_LEN("NONE"), cs);
- table->field[7]->store(f_key_info->update_method->str,
+ table->field[7]->store(f_key_info->update_method->str,
f_key_info->update_method->length, cs);
- table->field[8]->store(f_key_info->delete_method->str,
+ table->field[8]->store(f_key_info->delete_method->str,
f_key_info->delete_method->length, cs);
if (schema_table_store_record(thd, table))
DBUG_RETURN(1);
@@ -5294,7 +5312,7 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
DBUG_RETURN(0);
}
-struct schema_table_ref
+struct schema_table_ref
{
const char *table_name;
ST_SCHEMA_TABLE *schema_table;
@@ -5361,7 +5379,7 @@ ST_SCHEMA_TABLE *find_schema_table(THD *thd, const char* table_name)
}
schema_table_a.table_name= table_name;
- if (plugin_foreach(thd, find_schema_table_in_plugin,
+ if (plugin_foreach(thd, find_schema_table_in_plugin,
MYSQL_INFORMATION_SCHEMA_PLUGIN, &schema_table_a))
DBUG_RETURN(schema_table_a.schema_table);
@@ -5435,7 +5453,7 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list)
break;
case MYSQL_TYPE_FLOAT:
case MYSQL_TYPE_DOUBLE:
- if ((item= new Item_float(fields_info->field_name, 0.0, NOT_FIXED_DEC,
+ if ((item= new Item_float(fields_info->field_name, 0.0, NOT_FIXED_DEC,
fields_info->field_length)) == NULL)
DBUG_RETURN(NULL);
break;
@@ -5488,7 +5506,7 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list)
tmp_table_param->schema_table= 1;
SELECT_LEX *select_lex= thd->lex->current_select;
if (!(table= create_tmp_table(thd, tmp_table_param,
- field_list, (ORDER*) 0, 0, 0,
+ field_list, (ORDER*) 0, 0, 0,
(select_lex->options | thd->options |
TMP_TABLE_ALL_COLUMNS),
HA_POS_ERROR, table_list->alias)))
@@ -5833,7 +5851,7 @@ bool get_schema_tables_result(JOIN *join,
thd->no_warnings_for_error= 1;
for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++)
- {
+ {
if (!tab->table || !tab->table->pos_in_table_list)
break;
@@ -5952,17 +5970,17 @@ ST_FIELD_INFO tables_fields_info[]=
{"ROW_FORMAT", 10, MYSQL_TYPE_STRING, 0, 1, "Row_format", OPEN_FULL_TABLE},
{"TABLE_ROWS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Rows", OPEN_FULL_TABLE},
- {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+ {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Avg_row_length", OPEN_FULL_TABLE},
- {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+ {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_length", OPEN_FULL_TABLE},
{"MAX_DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Max_data_length", OPEN_FULL_TABLE},
- {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+ {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Index_length", OPEN_FULL_TABLE},
{"DATA_FREE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_free", OPEN_FULL_TABLE},
- {"AUTO_INCREMENT", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG, 0,
+ {"AUTO_INCREMENT", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Auto_increment", OPEN_FULL_TABLE},
{"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create_time", OPEN_FULL_TABLE},
{"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update_time", OPEN_FULL_TABLE},
@@ -6427,9 +6445,9 @@ ST_FIELD_INFO files_fields_info[]=
{"EXTENT_SIZE", 4, MYSQL_TYPE_LONGLONG, 0, 0, 0, SKIP_OPEN_TABLE},
{"INITIAL_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
- {"MAXIMUM_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
+ {"MAXIMUM_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
- {"AUTOEXTEND_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
+ {"AUTOEXTEND_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
{"CREATION_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, 0, SKIP_OPEN_TABLE},
{"LAST_UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, 0, SKIP_OPEN_TABLE},
@@ -6441,20 +6459,20 @@ ST_FIELD_INFO files_fields_info[]=
{"ROW_FORMAT", 10, MYSQL_TYPE_STRING, 0, 1, "Row_format", SKIP_OPEN_TABLE},
{"TABLE_ROWS", 21 , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Rows", SKIP_OPEN_TABLE},
- {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
+ {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Avg_row_length", SKIP_OPEN_TABLE},
- {"DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
+ {"DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_length", SKIP_OPEN_TABLE},
- {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
+ {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Max_data_length", SKIP_OPEN_TABLE},
- {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
+ {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Index_length", SKIP_OPEN_TABLE},
- {"DATA_FREE", 21 , MYSQL_TYPE_LONGLONG, 0,
+ {"DATA_FREE", 21 , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_free", SKIP_OPEN_TABLE},
{"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create_time", SKIP_OPEN_TABLE},
{"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update_time", SKIP_OPEN_TABLE},
{"CHECK_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Check_time", SKIP_OPEN_TABLE},
- {"CHECKSUM", 21 , MYSQL_TYPE_LONGLONG, 0,
+ {"CHECKSUM", 21 , MYSQL_TYPE_LONGLONG, 0,
(MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Checksum", SKIP_OPEN_TABLE},
{"STATUS", 20, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
{"EXTRA", 255, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
@@ -6503,13 +6521,13 @@ ST_FIELD_INFO referential_constraints_fields_info[]=
ST_SCHEMA_TABLE schema_tables[]=
{
- {"CHARACTER_SETS", charsets_fields_info, create_schema_table,
+ {"CHARACTER_SETS", charsets_fields_info, create_schema_table,
fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0},
- {"COLLATIONS", collation_fields_info, create_schema_table,
+ {"COLLATIONS", collation_fields_info, create_schema_table,
fill_schema_collation, make_old_format, 0, -1, -1, 0, 0},
{"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info,
create_schema_table, fill_schema_coll_charset_app, 0, 0, -1, -1, 0, 0},
- {"COLUMNS", columns_fields_info, create_schema_table,
+ {"COLUMNS", columns_fields_info, create_schema_table,
get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0,
OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL},
{"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table,
@@ -6541,7 +6559,7 @@ ST_SCHEMA_TABLE schema_tables[]=
{"REFERENTIAL_CONSTRAINTS", referential_constraints_fields_info,
create_schema_table, get_all_tables, 0, get_referential_constraints_record,
1, 9, 0, OPEN_TABLE_ONLY},
- {"ROUTINES", proc_fields_info, create_schema_table,
+ {"ROUTINES", proc_fields_info, create_schema_table,
fill_schema_proc, make_proc_old_format, 0, -1, -1, 0, 0},
{"SCHEMATA", schema_fields_info, create_schema_table,
fill_schema_schemata, make_schemata_old_format, 0, 1, -1, 0, 0},
@@ -6551,12 +6569,12 @@ ST_SCHEMA_TABLE schema_tables[]=
fill_status, make_old_format, 0, -1, -1, 0, 0},
{"SESSION_VARIABLES", variables_fields_info, create_schema_table,
fill_variables, make_old_format, 0, -1, -1, 0, 0},
- {"STATISTICS", stat_fields_info, create_schema_table,
+ {"STATISTICS", stat_fields_info, create_schema_table,
get_all_tables, make_old_format, get_schema_stat_record, 1, 2, 0,
OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
- {"STATUS", variables_fields_info, create_schema_table, fill_status,
+ {"STATUS", variables_fields_info, create_schema_table, fill_status,
make_old_format, 0, -1, -1, 1, 0},
- {"TABLES", tables_fields_info, create_schema_table,
+ {"TABLES", tables_fields_info, create_schema_table,
get_all_tables, make_old_format, get_schema_tables_record, 1, 2, 0,
OPTIMIZE_I_S_TABLE},
{"TABLE_CONSTRAINTS", table_constraints_fields_info, create_schema_table,
@@ -6568,11 +6586,11 @@ ST_SCHEMA_TABLE schema_tables[]=
{"TRIGGERS", triggers_fields_info, create_schema_table,
get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
OPEN_TABLE_ONLY},
- {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table,
+ {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table,
fill_schema_user_privileges, 0, 0, -1, -1, 0, 0},
{"VARIABLES", variables_fields_info, create_schema_table, fill_variables,
make_old_format, 0, -1, -1, 1, 0},
- {"VIEWS", view_fields_info, create_schema_table,
+ {"VIEWS", view_fields_info, create_schema_table,
get_all_tables, 0, get_schema_views_record, 1, 2, 0,
OPEN_VIEW_ONLY|OPTIMIZE_I_S_TABLE},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
@@ -6598,8 +6616,8 @@ int initialize_schema_table(st_plugin_int *plugin)
{
schema_table->create_table= create_schema_table;
schema_table->old_format= make_old_format;
- schema_table->idx_field1= -1,
- schema_table->idx_field2= -1;
+ schema_table->idx_field1= -1,
+ schema_table->idx_field2= -1;
/* Make the name available to the init() function. */
schema_table->table_name= plugin->name.str;
@@ -6610,7 +6628,7 @@ int initialize_schema_table(st_plugin_int *plugin)
plugin->name.str);
goto err;
}
-
+
/* Make sure the plugin name is not set inside the init() function. */
schema_table->table_name= plugin->name.str;
}
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 1e9322f7f5b..f54b085eeda 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -34,7 +34,9 @@
the callback function 'unpack_addon_fields'.
*/
-typedef struct st_sort_addon_field { /* Sort addon packed field */
+typedef struct st_sort_addon_field
+{
+ /* Sort addon packed field */
Field *field; /* Original field */
uint offset; /* Offset from the last sorted field */
uint null_offset; /* Offset to to null bit from the last sorted field */
@@ -42,14 +44,6 @@ typedef struct st_sort_addon_field { /* Sort addon packed field */
uint8 null_bit; /* Null bit mask for the field */
} SORT_ADDON_FIELD;
-typedef struct st_buffpek { /* Struktur om sorteringsbuffrarna */
- my_off_t file_pos; /* Where we are in the sort file */
- uchar *base,*key; /* key pointers */
- ha_rows count; /* Number of rows in table */
- ulong mem_count; /* numbers of keys in memory */
- ulong max_keys; /* Max keys in buffert */
-} BUFFPEK;
-
struct BUFFPEK_COMPARE_CONTEXT
{
qsort_cmp2 key_compare;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 5bd7d446cbd..8e3e1b7dd8a 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -3203,8 +3203,9 @@ bool mysql_create_table_no_lock(THD *thd,
if (check_engine(thd, table_name, create_info))
DBUG_RETURN(TRUE);
db_options= create_info->table_options;
- if (create_info->row_type == ROW_TYPE_DYNAMIC)
- db_options|=HA_OPTION_PACK_RECORD;
+ if (create_info->row_type != ROW_TYPE_FIXED &&
+ create_info->row_type != ROW_TYPE_DEFAULT)
+ db_options|= HA_OPTION_PACK_RECORD;
alias= table_case_name(create_info, table_name);
if (!(file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root,
create_info->db_type)))
@@ -3744,8 +3745,9 @@ mysql_rename_table(handlerton *base, const char *old_db,
wait_while_table_is_used()
thd Thread handler
table Table to remove from cache
- function HA_EXTRA_PREPARE_FOR_DELETE if table is to be deleted
+ function HA_EXTRA_PREPARE_FOR_DROP if table is to be deleted
HA_EXTRA_FORCE_REOPEN if table is not be used
+ HA_EXTRA_PREPARE_FOR_REANME if table is to be renamed
NOTES
When returning, the table will be unusable for other threads until
the table is closed.
@@ -3755,7 +3757,7 @@ mysql_rename_table(handlerton *base, const char *old_db,
Win32 clients must also have a WRITE LOCK on the table !
*/
-void wait_while_table_is_used(THD *thd, TABLE *table,
+void wait_while_table_is_used(THD *thd,TABLE *table,
enum ha_extra_function function)
{
DBUG_ENTER("wait_while_table_is_used");
@@ -3764,8 +3766,7 @@ void wait_while_table_is_used(THD *thd, TABLE *table,
table->db_stat, table->s->version));
safe_mutex_assert_owner(&LOCK_open);
-
- VOID(table->file->extra(function));
+
/* Mark all tables that are in use as 'old' */
mysql_lock_abort(thd, table, TRUE); /* end threads waiting on lock */
@@ -3773,6 +3774,7 @@ void wait_while_table_is_used(THD *thd, TABLE *table,
remove_table_from_cache(thd, table->s->db.str,
table->s->table_name.str,
RTFC_WAIT_OTHER_THREAD_FLAG);
+ VOID(table->file->extra(function));
DBUG_VOID_RETURN;
}
@@ -3797,7 +3799,7 @@ void close_cached_table(THD *thd, TABLE *table)
{
DBUG_ENTER("close_cached_table");
- wait_while_table_is_used(thd, table, HA_EXTRA_PREPARE_FOR_DELETE);
+ wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN);
/* Close lock if this is not got with LOCK TABLES */
if (thd->lock)
{
@@ -5180,8 +5182,7 @@ compare_tables(TABLE *table,
}
/* Don't pack rows in old tables if the user has requested this. */
- if (create_info->row_type == ROW_TYPE_DYNAMIC ||
- (new_field->flags & BLOB_FLAG) ||
+ if ((new_field->flags & BLOB_FLAG) ||
new_field->sql_type == MYSQL_TYPE_VARCHAR &&
create_info->row_type != ROW_TYPE_FIXED)
create_info->table_options|= HA_OPTION_PACK_RECORD;
@@ -6682,7 +6683,7 @@ view_err:
if (lower_case_table_names)
my_casedn_str(files_charset_info, old_name);
- wait_while_table_is_used(thd, table, HA_EXTRA_PREPARE_FOR_DELETE);
+ wait_while_table_is_used(thd, table, HA_EXTRA_PREPARE_FOR_RENAME);
close_data_files_and_morph_locks(thd, db, table_name);
error=0;
@@ -7159,7 +7160,6 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list)
table_list->table= NULL;
bzero((char*) &create_info, sizeof(create_info));
- create_info.db_type= 0;
create_info.row_type=ROW_TYPE_NOT_USED;
create_info.default_table_charset=default_charset_info;
/* Force alter table to recreate table */
@@ -7251,6 +7251,9 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
for (uint i= 0; i < t->s->fields; i++ )
{
Field *f= t->field[i];
+ if (! thd->variables.old_mode &&
+ f->is_real_null(0))
+ continue;
if ((f->type() == MYSQL_TYPE_BLOB) ||
(f->type() == MYSQL_TYPE_VARCHAR))
{
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 0fe299d4505..f1d7e4a7312 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -459,7 +459,7 @@ void mysql_print_status()
VOID(my_getwd(current_dir, sizeof(current_dir),MYF(0)));
printf("Current dir: %s\n", current_dir);
printf("Running threads: %d Stack size: %ld\n", thread_count,
- (long) thread_stack);
+ (long) my_thread_stack_size);
thr_print_locks(); // Write some debug info
#ifndef DBUG_OFF
print_cached_tables();
@@ -536,7 +536,7 @@ Estimated memory (with thread stack): %ld\n",
(int) info.uordblks,
(int) info.fordblks,
(int) info.keepcost,
- (long) (thread_count * thread_stack + info.hblkhd + info.arena));
+ (long) (thread_count * my_thread_stack_size + info.hblkhd + info.arena));
#endif
Events::dump_internal_status();
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index a48cff82715..90d7b4dfc60 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -63,9 +63,9 @@ bool select_union::send_data(List<Item> &values)
if ((error= table->file->ha_write_row(table->record[0])))
{
- /* create_myisam_from_heap will generate error if needed */
+ /* create_internal_tmp_table_from_heap will generate error if needed */
if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
- create_myisam_from_heap(thd, table, &tmp_table_param, error, 1))
+ create_internal_tmp_table_from_heap(thd, table, &tmp_table_param, error, 1))
return 1;
}
return 0;
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 78dea6b7cdb..6d0d5933971 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -1678,7 +1678,7 @@ bool multi_update::send_data(List<Item> &not_used_values)
if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE)
{
if (error &&
- create_myisam_from_heap(thd, tmp_table,
+ create_internal_tmp_table_from_heap(thd, tmp_table,
tmp_table_param + offset, error, 1))
{
do_update= 0;
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 9fa4f8585f5..42fc5b6cbe1 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -487,6 +487,7 @@ Item* handle_sql2003_note184_exception(THD *thd, Item* left, bool equal,
enum enum_tx_isolation tx_isolation;
enum Cast_target cast_type;
enum Item_udftype udf_type;
+ enum ha_choice choice;
CHARSET_INFO *charset;
thr_lock_type lock_type;
interval_type interval, interval_time_st;
@@ -881,6 +882,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token OWNER_SYM
%token PACK_KEYS_SYM
%token PAGE_SYM
+%token PAGE_CHECKSUM_SYM
%token PARAM_MARKER
%token PARSER_SYM
%token PARTIAL /* SQL-2003-N */
@@ -1021,6 +1023,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token TABLESPACE
%token TABLE_REF_PRIORITY
%token TABLE_SYM /* SQL-2003-R */
+%token TABLE_CHECKSUM_SYM
%token TEMPORARY /* SQL-2003-N */
%token TEMPTABLE_SYM
%token TERMINATED
@@ -1156,6 +1159,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%type <ulonglong_number>
ulonglong_num real_ulonglong_num size_number
+%type <choice> choice
+
%type <p_elem_value>
part_bit_expr
@@ -4263,6 +4268,16 @@ create_table_option:
Lex->create_info.table_options|= $3 ? HA_OPTION_CHECKSUM : HA_OPTION_NO_CHECKSUM;
Lex->create_info.used_fields|= HA_CREATE_USED_CHECKSUM;
}
+ | TABLE_CHECKSUM_SYM opt_equal ulong_num
+ {
+ Lex->create_info.table_options|= $3 ? HA_OPTION_CHECKSUM : HA_OPTION_NO_CHECKSUM;
+ Lex->create_info.used_fields|= HA_CREATE_USED_CHECKSUM;
+ }
+ | PAGE_CHECKSUM_SYM opt_equal choice
+ {
+ Lex->create_info.used_fields|= HA_CREATE_USED_PAGE_CHECKSUM;
+ Lex->create_info.page_checksum= $3;
+ }
| DELAY_KEY_WRITE_SYM opt_equal ulong_num
{
Lex->create_info.table_options|= $3 ? HA_OPTION_DELAY_KEY_WRITE : HA_OPTION_NO_DELAY_KEY_WRITE;
@@ -4322,11 +4337,10 @@ create_table_option:
Lex->create_info.used_fields|= HA_CREATE_USED_KEY_BLOCK_SIZE;
Lex->create_info.key_block_size= $3;
}
- | TRANSACTIONAL_SYM opt_equal ulong_num
+ | TRANSACTIONAL_SYM opt_equal choice
{
- Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL;
- Lex->create_info.transactional= ($3 != 0 ? HA_CHOICE_YES :
- HA_CHOICE_NO);
+ Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL;
+ Lex->create_info.transactional= $3;
}
;
@@ -8200,6 +8214,11 @@ dec_num:
| FLOAT_NUM
;
+choice:
+ ulong_num { $$= $1 != 0 ? HA_CHOICE_YES : HA_CHOICE_NO; }
+ | DEFAULT { $$= HA_CHOICE_UNDEF; }
+ ;
+
procedure_clause:
/* empty */
| PROCEDURE ident /* Procedure name */
@@ -10502,6 +10521,7 @@ keyword_sp:
| ONE_SYM {}
| PACK_KEYS_SYM {}
| PAGE_SYM {}
+ | PAGE_CHECKSUM_SYM {}
| PARTIAL {}
| PARTITIONING_SYM {}
| PARTITIONS_SYM {}
@@ -10570,6 +10590,7 @@ keyword_sp:
| SWAPS_SYM {}
| SWITCHES_SYM {}
| TABLES {}
+ | TABLE_CHECKSUM_SYM {}
| TABLESPACE {}
| TEMPORARY {}
| TEMPTABLE_SYM {}
diff --git a/sql/table.cc b/sql/table.cc
index cacb3a94582..4a9b8d6ca5f 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -711,7 +711,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
if (!head[32]) // New frm file in 3.23
{
share->avg_row_length= uint4korr(head+34);
- share->transactional= (ha_choice) head[39];
+ share->transactional= (ha_choice) (head[39] & 3);
+ share->page_checksum= (ha_choice) ((head[39] >> 2) & 3);
share->row_type= (row_type) head[40];
share->table_charset= get_charset((uint) head[38],MYF(0));
share->null_field_first= 1;
@@ -2447,7 +2448,9 @@ File create_frm(THD *thd, const char *name, const char *db,
int2store(fileinfo+16,reclength);
int4store(fileinfo+18,create_info->max_rows);
int4store(fileinfo+22,create_info->min_rows);
+ /* fileinfo[26] is set in mysql_create_frm() */
fileinfo[27]=2; // Use long pack-fields
+ /* fileinfo[28 & 29] is set to key_info_length in mysql_create_frm() */
create_info->table_options|=HA_OPTION_LONG_BLOB_PTR; // Use portable blob pointers
int2store(fileinfo+30,create_info->table_options);
fileinfo[32]=0; // No filename anymore
@@ -2455,9 +2458,10 @@ File create_frm(THD *thd, const char *name, const char *db,
int4store(fileinfo+34,create_info->avg_row_length);
fileinfo[38]= (create_info->default_table_charset ?
create_info->default_table_charset->number : 0);
- fileinfo[39]= (uchar) create_info->transactional;
+ fileinfo[39]= (uchar) ((uint) create_info->transactional |
+ ((uint) create_info->page_checksum << 2));
fileinfo[40]= (uchar) create_info->row_type;
- /* Next few bytes were for RAID support */
+ /* Next few bytes where for RAID support */
fileinfo[41]= 0;
fileinfo[42]= 0;
fileinfo[43]= 0;
@@ -2508,6 +2512,8 @@ void update_create_info_from_table(HA_CREATE_INFO *create_info, TABLE *table)
create_info->default_table_charset= share->table_charset;
create_info->table_charset= 0;
create_info->comment= share->comment;
+ create_info->transactional= share->transactional;
+ create_info->page_checksum= share->page_checksum;
DBUG_VOID_RETURN;
}
diff --git a/sql/table.h b/sql/table.h
index 284885658e0..24948d0a076 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -311,6 +311,7 @@ typedef struct st_table_share
enum row_type row_type; /* How rows are stored */
enum tmp_table_type tmp_table;
enum ha_choice transactional;
+ enum ha_choice page_checksum;
uint ref_count; /* How many TABLE objects uses this */
uint open_count; /* Number of tables in open list */
diff --git a/sql/unireg.cc b/sql/unireg.cc
index dbdefd8d5b1..e5f230841f6 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -320,8 +320,10 @@ bool mysql_create_frm(THD *thd, const char *file_name,
my_free(keybuff, MYF(0));
if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) &&
- my_sync(file, MYF(MY_WME)))
- goto err2;
+ (my_sync(file, MYF(MY_WME)) ||
+ my_sync_dir_by_file(file_name, MYF(MY_WME))))
+ goto err2;
+
if (my_close(file,MYF(MY_WME)))
goto err3;
@@ -505,7 +507,7 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo,
int2store(pos+6, key->block_size);
pos+=8;
key_parts+=key->key_parts;
- DBUG_PRINT("loop", ("flags: %lu key_parts: %d at 0x%lx",
+ DBUG_PRINT("loop", ("flags: %lu key_parts: %d key_part: 0x%lx",
key->flags, key->key_parts,
(long) key->key_part));
for (key_part=key->key_part,key_part_end=key_part+key->key_parts ;
diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc
index c9fab79a4c5..56d77c2b8b1 100644
--- a/storage/csv/ha_tina.cc
+++ b/storage/csv/ha_tina.cc
@@ -445,7 +445,7 @@ ha_tina::ha_tina(handlerton *hton, TABLE_SHARE *table_arg)
*/
current_position(0), next_position(0), local_saved_data_file_length(0),
file_buff(0), chain_alloced(0), chain_size(DEFAULT_CHAIN_LENGTH),
- local_data_file_version(0), records_is_known(0)
+ local_data_file_version(0), records_is_known(0), curr_lock_type(F_UNLCK)
{
/* Set our original buffers from pre-allocated memory */
buffer.set((char*)byte_buffer, IO_SIZE, &my_charset_bin);
@@ -1454,6 +1454,14 @@ int ha_tina::delete_all_rows()
DBUG_RETURN(rc);
}
+int ha_tina::external_lock(THD *thd __attribute__((unused)), int lock_type)
+{
+ if (lock_type==F_UNLCK && curr_lock_type == F_WRLCK)
+ update_status();
+ curr_lock_type= lock_type;
+ return 0;
+}
+
/*
Called by the database to lock the table. Keep in mind that this
is an internal lock.
@@ -1468,7 +1476,7 @@ THR_LOCK_DATA **ha_tina::store_lock(THD *thd,
return to;
}
-/*
+/*
Create a table. You do not want to leave the table open after a call to
this (the database will call ::open() if it needs to).
*/
diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h
index 5ce09783b9b..9a9c2399745 100644
--- a/storage/csv/ha_tina.h
+++ b/storage/csv/ha_tina.h
@@ -84,6 +84,8 @@ class ha_tina: public handler
bool records_is_known;
private:
+ int curr_lock_type;
+
bool get_write_pos(off_t *end_pos, tina_set *closest_hole);
int open_update_temp_file_if_needed();
int init_tina_writer();
@@ -154,6 +156,8 @@ public:
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
+ int external_lock(THD *thd, int lock_type);
+
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt
new file mode 100644
index 00000000000..17d3aa53839
--- /dev/null
+++ b/storage/maria/CMakeLists.txt
@@ -0,0 +1,64 @@
+# Copyright (C) 2007 MySQL AB
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+INCLUDE("${PROJECT_SOURCE_DIR}/win/mysql_manifest.cmake")
+
+SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
+SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
+
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
+ ${CMAKE_SOURCE_DIR}/sql
+ ${CMAKE_SOURCE_DIR}/regex
+ ${CMAKE_SOURCE_DIR}/extra/yassl/include)
+SET(MARIA_SOURCES ft_maria.c ha_maria.cc lockman.c ma_bitmap.c ma_blockrec.c
+ ma_cache.c ma_changed.c ma_check.c ma_checkpoint.c ma_checksum.c
+ ma_close.c ma_commit.c ma_control_file.c ma_create.c ma_dbug.c
+ ma_delete.c ma_delete_all.c ma_delete_table.c ma_dynrec.c
+ ma_extra.c ma_ft_boolean_search.c ma_ft_eval.c ma_ft_nlq_search.c
+ ma_ft_parser.c ma_ft_stem.c ma_ft_test1.c ma_ft_update.c ma_info.c
+ ma_init.c ma_key.c ma_keycache.c ma_least_recently_dirtied.c
+ ma_locking.c ma_loghandler.c ma_open.c ma_packrec.c ma_page.c
+ ma_pagecache.c ma_pagecaches.c ma_panic.c ma_preload.c ma_range.c
+ ma_recovery.c ma_rename.c ma_rfirst.c ma_rkey.c ma_rlast.c
+ ma_rnext.c ma_rnext_same.c ma_rprev.c ma_rrnd.c ma_rsame.c
+ ma_rsamepos.c ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c
+ ma_rt_test.c ma_scan.c ma_search.c ma_sort.c ma_sp_key.c
+ ma_sp_test.c ma_static.c ma_statrec.c
+ ma_unique.c ma_update.c ma_write.c tablockman.c trnman.c
+ ha_maria.h maria_def.h)
+
+IF(NOT SOURCE_SUBLIBS)
+
+ ADD_LIBRARY(maria ${MARIA_SOURCES})
+
+ADD_EXECUTABLE(maria_ftdump maria_ftdump.c)
+TARGET_LINK_LIBRARIES(maria_ftdump maria myisam mysys debug dbug strings zlib wsock32)
+
+ADD_EXECUTABLE(maria_chk maria_chk.c)
+TARGET_LINK_LIBRARIES(maria_chk maria myisam mysys debug dbug strings zlib wsock32)
+
+ADD_EXECUTABLE(maria_read_log maria_read_log.c)
+TARGET_LINK_LIBRARIES(maria_read_log maria myisam mysys debug dbug strings zlib wsock32)
+
+ADD_EXECUTABLE(maria_pack maria_pack.c)
+TARGET_LINK_LIBRARIES(maria_pack maria myisam mysys debug dbug strings zlib wsock32)
+
+IF(EMBED_MANIFESTS)
+ MYSQL_EMBED_MANIFEST("maria_ftdump" "asInvoker")
+ MYSQL_EMBED_MANIFEST("maria_chk" "asInvoker")
+ MYSQL_EMBED_MANIFEST("maria_read_log" "asInvoker")
+ MYSQL_EMBED_MANIFEST("maria_pack" "asInvoker")
+ENDIF(EMBED_MANIFESTS)
+
+ENDIF(NOT SOURCE_SUBLIBS)
diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am
new file mode 100644
index 00000000000..8c3fdd6dd38
--- /dev/null
+++ b/storage/maria/Makefile.am
@@ -0,0 +1,185 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+MYSQLDATAdir = $(localstatedir)
+MYSQLSHAREdir = $(pkgdatadir)
+MYSQLBASEdir= $(prefix)
+MYSQLLIBdir= $(pkglibdir)
+INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \
+ -I$(top_srcdir)/regex \
+ -I$(top_srcdir)/sql \
+ -I$(srcdir)
+WRAPLIBS=
+
+LDADD =
+
+DEFS = @DEFS@
+
+# "." is needed first because tests in unittest need libmaria
+SUBDIRS = . unittest
+
+EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt plug.in ma_test_recovery
+pkgdata_DATA = ma_test_all ma_test_all.res ma_test_recovery
+pkglib_LIBRARIES = libmaria.a
+bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_read_log
+maria_chk_DEPENDENCIES= $(LIBRARIES)
+# Only reason to link with libmyisam.a here is that it's where some fulltext
+# pieces are (but soon we'll remove fulltext dependencies from Maria).
+# For now, it imposes that storage/myisam be built before storage/maria.
+maria_chk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+maria_pack_DEPENDENCIES=$(LIBRARIES)
+maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+maria_read_log_DEPENDENCIES=$(LIBRARIES)
+maria_read_log_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test
+noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \
+ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \
+ ma_ft_eval.h trnman.h lockman.h tablockman.h \
+ ma_control_file.h ha_maria.h ma_blockrec.h \
+ ma_loghandler.h ma_loghandler_lsn.h ma_pagecache.h \
+ ma_checkpoint.h ma_recovery.h ma_commit.h \
+ trnman_public.h ma_check_standalone.h ma_key_recover.h
+ma_test1_DEPENDENCIES= $(LIBRARIES)
+ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_test2_DEPENDENCIES= $(LIBRARIES)
+ma_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_test3_DEPENDENCIES= $(LIBRARIES)
+ma_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+#ma_ft_test1_DEPENDENCIES= $(LIBRARIES)
+#ma_ft_eval_DEPENDENCIES= $(LIBRARIES)
+maria_ftdump_DEPENDENCIES= $(LIBRARIES)
+maria_ftdump_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_rt_test_DEPENDENCIES= $(LIBRARIES)
+ma_rt_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_sp_test_DEPENDENCIES= $(LIBRARIES)
+ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \
+ ma_rnext.c ma_rnext_same.c \
+ ma_search.c ma_page.c ma_key_recover.c ma_key.c \
+ ma_locking.c \
+ ma_rrnd.c ma_scan.c ma_cache.c \
+ ma_statrec.c ma_packrec.c ma_dynrec.c \
+ ma_blockrec.c ma_bitmap.c \
+ ma_update.c ma_write.c ma_unique.c \
+ ma_delete.c \
+ ma_rprev.c ma_rfirst.c ma_rlast.c ma_rsame.c \
+ ma_rsamepos.c ma_panic.c ma_close.c ma_create.c\
+ ma_range.c ma_dbug.c ma_checksum.c \
+ ma_changed.c ma_static.c ma_delete_all.c \
+ ma_delete_table.c ma_rename.c ma_check.c \
+ ma_keycache.c ma_preload.c ma_ft_parser.c \
+ ma_ft_update.c ma_ft_boolean_search.c \
+ ma_ft_nlq_search.c ft_maria.c ma_sort.c \
+ ha_maria.cc trnman.c lockman.c tablockman.c \
+ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \
+ ma_sp_key.c ma_control_file.c ma_loghandler.c \
+ ma_pagecache.c ma_pagecaches.c \
+ ma_checkpoint.c ma_recovery.c ma_commit.c \
+ ma_pagecrc.c
+CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA?
+
+SUFFIXES = .sh
+
+.sh:
+ @RM@ -f $@ $@-t
+ @SED@ \
+ -e 's!@''bindir''@!$(bindir)!g' \
+ -e 's!@''scriptdir''@!$(bindir)!g' \
+ -e 's!@''prefix''@!$(prefix)!g' \
+ -e 's!@''datadir''@!$(datadir)!g' \
+ -e 's!@''localstatedir''@!$(localstatedir)!g' \
+ -e 's!@''libexecdir''@!$(libexecdir)!g' \
+ -e 's!@''CC''@!@CC@!'\
+ -e 's!@''CXX''@!@CXX@!'\
+ -e 's!@''GXX''@!@GXX@!'\
+ -e 's!@''PERL''@!@PERL@!' \
+ -e 's!@''CFLAGS''@!@SAVE_CFLAGS@!'\
+ -e 's!@''CXXFLAGS''@!@SAVE_CXXFLAGS@!'\
+ -e 's!@''LDFLAGS''@!@SAVE_LDFLAGS@!'\
+ -e 's!@''VERSION''@!@VERSION@!' \
+ -e 's!@''MYSQL_SERVER_SUFFIX''@!@MYSQL_SERVER_SUFFIX@!' \
+ -e 's!@''COMPILATION_COMMENT''@!@COMPILATION_COMMENT@!' \
+ -e 's!@''MACHINE_TYPE''@!@MACHINE_TYPE@!' \
+ -e 's!@''HOSTNAME''@!@HOSTNAME@!' \
+ -e 's!@''SYSTEM_TYPE''@!@SYSTEM_TYPE@!' \
+ -e 's!@''CHECK_PID''@!@CHECK_PID@!' \
+ -e 's!@''FIND_PROC''@!@FIND_PROC@!' \
+ -e 's!@''MYSQLD_DEFAULT_SWITCHES''@!@MYSQLD_DEFAULT_SWITCHES@!' \
+ -e 's!@''MYSQL_UNIX_ADDR''@!@MYSQL_UNIX_ADDR@!' \
+ -e 's!@''TARGET_LINUX''@!@TARGET_LINUX@!' \
+ -e "s!@""CONF_COMMAND""@!@CONF_COMMAND@!" \
+ -e 's!@''MYSQLD_USER''@!@MYSQLD_USER@!' \
+ -e 's!@''sysconfdir''@!@sysconfdir@!' \
+ -e 's!@''SHORT_MYSQL_INTRO''@!@SHORT_MYSQL_INTRO@!' \
+ -e 's!@''SHARED_LIB_VERSION''@!@SHARED_LIB_VERSION@!' \
+ -e 's!@''MYSQL_BASE_VERSION''@!@MYSQL_BASE_VERSION@!' \
+ -e 's!@''MYSQL_NO_DASH_VERSION''@!@MYSQL_NO_DASH_VERSION@!' \
+ -e 's!@''MYSQL_TCP_PORT''@!@MYSQL_TCP_PORT@!' \
+ -e 's!@''PERL_DBI_VERSION''@!@PERL_DBI_VERSION@!' \
+ -e 's!@''PERL_DBD_VERSION''@!@PERL_DBD_VERSION@!' \
+ -e 's!@''PERL_DATA_DUMPER''@!@PERL_DATA_DUMPER@!' \
+ $< > $@-t
+ @CHMOD@ +x $@-t
+ @MV@ $@-t $@
+
+tags:
+ etags *.h *.c *.cc
+
+unittests = unittest
+
+test:
+ perl $(top_srcdir)/unittest/unit.pl run $(unittests)
+
+test-verbose:
+ HARNESS_VERBOSE=1 perl $(top_srcdir)/unittest/unit.pl run $(unittests)
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
diff --git a/storage/maria/ft_maria.c b/storage/maria/ft_maria.c
new file mode 100644
index 00000000000..1b082f904d0
--- /dev/null
+++ b/storage/maria/ft_maria.c
@@ -0,0 +1,48 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/*
+ This function is for interface functions between fulltext and maria
+*/
+
+#include "ma_ftdefs.h"
+
+FT_INFO *maria_ft_init_search(uint flags, void *info, uint keynr,
+ uchar *query, uint query_len, CHARSET_INFO *cs,
+ uchar *record)
+{
+ FT_INFO *res;
+ if (flags & FT_BOOL)
+ res= maria_ft_init_boolean_search((MARIA_HA *) info, keynr, query,
+ query_len, cs);
+ else
+ res= maria_ft_init_nlq_search((MARIA_HA *) info, keynr, query, query_len,
+ flags, record);
+ return res;
+}
+
+const struct _ft_vft _ma_ft_vft_nlq = {
+ maria_ft_nlq_read_next, maria_ft_nlq_find_relevance,
+ maria_ft_nlq_close_search, maria_ft_nlq_get_relevance,
+ maria_ft_nlq_reinit_search
+};
+const struct _ft_vft _ma_ft_vft_boolean = {
+ maria_ft_boolean_read_next, maria_ft_boolean_find_relevance,
+ maria_ft_boolean_close_search, maria_ft_boolean_get_relevance,
+ maria_ft_boolean_reinit_search
+};
+
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
new file mode 100644
index 00000000000..cd13f19d646
--- /dev/null
+++ b/storage/maria/ha_maria.cc
@@ -0,0 +1,2746 @@
+/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#define MYSQL_SERVER 1
+#include "mysql_priv.h"
+#include <mysql/plugin.h>
+#include <m_ctype.h>
+#include <my_dir.h>
+#include <myisampack.h>
+#include <my_bit.h>
+#include "ha_maria.h"
+#include "trnman_public.h"
+
+C_MODE_START
+#include "maria_def.h"
+#include "ma_rt_index.h"
+#include "ma_blockrec.h"
+#include "ma_checkpoint.h"
+#include "ma_recovery.h"
+C_MODE_END
+
+/*
+ Note that in future versions, only *transactional* Maria tables can
+ rollback, so this flag should be up or down conditionally.
+*/
+#define MARIA_CANNOT_ROLLBACK HA_NO_TRANSACTIONS
+#ifdef MARIA_CANNOT_ROLLBACK
+#define trans_register_ha(A, B, C) do { /* nothing */ } while(0)
+#endif
+
+ulong pagecache_division_limit, pagecache_age_threshold;
+ulonglong pagecache_buffer_size;
+
+/**
+ @todo For now there is no way for a user to set a different value of
+ maria_recover_options, i.e. auto-check-and-repair is always disabled.
+ We could enable it. As the auto-repair is initiated when opened from the
+ SQL layer (open_unireg_entry(), check_and_repair()), it does not happen
+ when Maria's Recovery internally opens the table to apply log records to
+ it, which is good. It would happen only after Recovery, if the table is
+ still corrupted.
+*/
+ulong maria_recover_options= HA_RECOVER_NONE;
+handlerton *maria_hton;
+
+/* bits in maria_recover_options */
+const char *maria_recover_names[]=
+{
+ "DEFAULT", "BACKUP", "FORCE", "QUICK", NullS
+};
+TYPELIB maria_recover_typelib=
+{
+ array_elements(maria_recover_names) - 1, "",
+ maria_recover_names, NULL
+};
+
+const char *maria_stats_method_names[]=
+{
+ "nulls_unequal", "nulls_equal",
+ "nulls_ignored", NullS
+};
+TYPELIB maria_stats_method_typelib=
+{
+ array_elements(maria_stats_method_names) - 1, "",
+ maria_stats_method_names, NULL
+};
+
+/* transactions log purge mode */
+const char *maria_translog_purge_type_names[]=
+{
+ "immediate", "external", "at_flush", NullS
+};
+TYPELIB maria_translog_purge_type_typelib=
+{
+ array_elements(maria_translog_purge_type_names) - 1, "",
+ maria_translog_purge_type_names, NULL
+};
+const char *maria_sync_log_dir_names[]=
+{
+ "NEVER", "NEWFILE", "ALWAYS", NullS
+};
+
+TYPELIB maria_sync_log_dir_typelib=
+{
+ array_elements(maria_sync_log_dir_names) - 1, "",
+ maria_sync_log_dir_names, NULL
+};
+
+/** @brief Interval between background checkpoints in seconds */
+static ulong checkpoint_interval;
+static void update_checkpoint_interval(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, void *save);
+static void update_log_file_size(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, void *save);
+
+static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Block size to be used for MARIA index pages.", 0, 0,
+ MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
+ MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
+
+static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval,
+ PLUGIN_VAR_RQCMDARG,
+ "Interval between automatic checkpoints, in seconds;"
+ " 0 means 'no automatic checkpoints'.",
+ NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1);
+
+static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0,
+ "Maintain page checksums (can be overridden per table "
+ "with PAGE_CHECKSUM clause in CREATE TABLE)", 0, 0, 1);
+
+static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size,
+ PLUGIN_VAR_RQCMDARG,
+ "Limit for transaction log size",
+ NULL, update_log_file_size, TRANSLOG_FILE_SIZE,
+ TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);
+
+static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type,
+ PLUGIN_VAR_RQCMDARG,
+ "Specifies how maria transactional log will be purged. "
+ "Possible values of name are \"immediate\", \"external\" "
+ "and \"at_flush\"",
+ NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
+ &maria_translog_purge_type_typelib);
+
+static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
+ maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
+ "Don't use the fast sort index method to created index if the "
+ "temporary file would get bigger than this.",
+ 0, 0, MAX_FILE_SIZE, 0, MAX_FILE_SIZE, 1024*1024);
+
+static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
+ pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
+ "This characterizes the number of hits a hot block has to be untouched "
+ "until it is considered aged enough to be downgraded to a warm block. "
+ "This specifies the percentage ratio of that number of hits to the "
+ "total number of blocks in the page cache.", 0, 0,
+ 300, 100, ~0L, 100);
+
+static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "The size of the buffer used for index blocks for Maria tables. "
+ "Increase this to get better index handling (for all reads and multiple "
+ "writes) to as much as you can afford.", 0, 0,
+ KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~(ulong) 0, IO_SIZE);
+
+static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
+ PLUGIN_VAR_RQCMDARG,
+ "The minimum percentage of warm blocks in key cache", 0, 0,
+ 100, 1, 100, 1);
+
+static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
+ "Number of threads to use when repairing maria tables. The value of 1 "
+ "disables parallel repair.",
+ 0, 0, 1, 1, ~0L, 1);
+
+static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
+ "The buffer that is allocated when sorting the index when doing a "
+ "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.",
+ 0, 0, 8192*1024, 4, ~0L, 1);
+
+static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
+ "Specifies how maria index statistics collection code should threat "
+ "NULLs. Possible values of name are \"nulls_unequal\", \"nulls_equal\", "
+ "and \"nulls_ignored\".", 0, 0, 0, &maria_stats_method_typelib);
+
+static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG,
+ "Controls syncing directory after log file growth and new file "
+ "creation. Possible values of are \"never\", \"newfile\" and "
+ "\"always\")", NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
+ &maria_sync_log_dir_typelib);
+
+/*****************************************************************************
+** MARIA tables
+*****************************************************************************/
+
+static handler *maria_create_handler(handlerton *hton,
+ TABLE_SHARE * table,
+ MEM_ROOT *mem_root)
+{
+ return new (mem_root) ha_maria(hton, table);
+}
+
+
+// collect errors printed by maria_check routines
+
+static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
+ const char *fmt, va_list args)
+{
+ THD *thd= (THD *) param->thd;
+ Protocol *protocol= thd->protocol;
+ uint length, msg_length;
+ char msgbuf[MARIA_MAX_MSG_BUF];
+ char name[NAME_LEN * 2 + 2];
+
+ msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
+ msgbuf[sizeof(msgbuf) - 1]= 0; // healthy paranoia
+
+ DBUG_PRINT(msg_type, ("message: %s", msgbuf));
+
+ if (!thd->vio_ok())
+ {
+ sql_print_error(msgbuf);
+ return;
+ }
+
+ if (param->testflag &
+ (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
+ {
+ my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME));
+ return;
+ }
+ length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
+ NullS) - name);
+ /*
+ TODO: switch from protocol to push_warning here. The main reason we didn't
+ it yet is parallel repair. Due to following trace:
+ ma_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.
+
+ Also we likely need to lock mutex here (in both cases with protocol and
+ push_warning).
+ */
+ protocol->prepare_for_resend();
+ protocol->store(name, length, system_charset_info);
+ protocol->store(param->op_name, system_charset_info);
+ protocol->store(msg_type, system_charset_info);
+ protocol->store(msgbuf, msg_length, system_charset_info);
+ if (protocol->write())
+ sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
+ msgbuf);
+ return;
+}
+
+
+/*
+ Convert TABLE object to Maria key and column definition
+
+ SYNOPSIS
+ table2maria()
+ table_arg in TABLE object.
+ keydef_out out Maria key definition.
+ recinfo_out out Maria column definition.
+ records_out out Number of fields.
+
+ DESCRIPTION
+ This function will allocate and initialize Maria key and column
+ definition for further use in ma_create or for a check for underlying
+ table conformance in merge engine.
+
+ RETURN VALUE
+ 0 OK
+ # error code
+*/
+
+static int table2maria(TABLE *table_arg, data_file_type row_type,
+ MARIA_KEYDEF **keydef_out,
+ MARIA_COLUMNDEF **recinfo_out, uint *records_out,
+ MARIA_CREATE_INFO *create_info)
+{
+ uint i, j, recpos, minpos, fieldpos, temp_length, length;
+ enum ha_base_keytype type= HA_KEYTYPE_BINARY;
+ uchar *record;
+ KEY *pos;
+ MARIA_KEYDEF *keydef;
+ MARIA_COLUMNDEF *recinfo, *recinfo_pos;
+ HA_KEYSEG *keyseg;
+ TABLE_SHARE *share= table_arg->s;
+ uint options= share->db_options_in_use;
+ DBUG_ENTER("table2maria");
+
+ if (row_type == BLOCK_RECORD)
+ options|= HA_OPTION_PACK_RECORD;
+
+ if (!(my_multi_malloc(MYF(MY_WME),
+ recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF),
+ keydef_out, share->keys * sizeof(MARIA_KEYDEF),
+ &keyseg,
+ (share->key_parts + share->keys) * sizeof(HA_KEYSEG),
+ NullS)))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
+ keydef= *keydef_out;
+ recinfo= *recinfo_out;
+ pos= table_arg->key_info;
+ for (i= 0; i < share->keys; i++, pos++)
+ {
+ keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL));
+ keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
+ (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
+ pos->algorithm;
+ keydef[i].block_length= pos->block_size;
+ keydef[i].seg= keyseg;
+ keydef[i].keysegs= pos->key_parts;
+ for (j= 0; j < pos->key_parts; j++)
+ {
+ Field *field= pos->key_part[j].field;
+ type= field->key_type();
+ keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;
+
+ if (options & HA_OPTION_PACK_KEYS ||
+ (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
+ HA_SPACE_PACK_USED)))
+ {
+ if (pos->key_part[j].length > 8 &&
+ (type == HA_KEYTYPE_TEXT ||
+ type == HA_KEYTYPE_NUM ||
+ (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
+ {
+ /* No blobs here */
+ if (j == 0)
+ keydef[i].flag|= HA_PACK_KEY;
+ if (!(field->flags & ZEROFILL_FLAG) &&
+ (field->type() == MYSQL_TYPE_STRING ||
+ field->type() == MYSQL_TYPE_VAR_STRING ||
+ ((int) (pos->key_part[j].length - field->decimals())) >= 4))
+ keydef[i].seg[j].flag|= HA_SPACE_PACK;
+ }
+ else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
+ keydef[i].flag|= HA_BINARY_PACK_KEY;
+ }
+ keydef[i].seg[j].type= (int) type;
+ keydef[i].seg[j].start= pos->key_part[j].offset;
+ keydef[i].seg[j].length= pos->key_part[j].length;
+ keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end=
+ keydef[i].seg[j].bit_length= 0;
+ keydef[i].seg[j].bit_pos= 0;
+ keydef[i].seg[j].language= field->charset()->number;
+
+ if (field->null_ptr)
+ {
+ keydef[i].seg[j].null_bit= field->null_bit;
+ keydef[i].seg[j].null_pos= (uint) (field->null_ptr-
+ (uchar*) table_arg->record[0]);
+ }
+ else
+ {
+ keydef[i].seg[j].null_bit= 0;
+ keydef[i].seg[j].null_pos= 0;
+ }
+ if (field->type() == MYSQL_TYPE_BLOB ||
+ field->type() == MYSQL_TYPE_GEOMETRY)
+ {
+ keydef[i].seg[j].flag|= HA_BLOB_PART;
+ /* save number of bytes used to pack length */
+ keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
+ share->blob_ptr_size);
+ }
+ else if (field->type() == MYSQL_TYPE_BIT)
+ {
+ keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
+ keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
+ keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
+ (uchar*) table_arg->record[0]);
+ }
+ }
+ keyseg+= pos->key_parts;
+ }
+ if (table_arg->found_next_number_field)
+ keydef[share->next_number_index].flag|= HA_AUTO_KEY;
+ record= table_arg->record[0];
+ recpos= 0;
+ recinfo_pos= recinfo;
+ create_info->null_bytes= table_arg->s->null_bytes;
+
+ while (recpos < (uint) share->reclength)
+ {
+ Field **field, *found= 0;
+ minpos= share->reclength;
+ length= 0;
+
+ for (field= table_arg->field; *field; field++)
+ {
+ if ((fieldpos= (*field)->offset(record)) >= recpos &&
+ fieldpos <= minpos)
+ {
+ /* skip null fields */
+ if (!(temp_length= (*field)->pack_length_in_rec()))
+ continue; /* Skip null-fields */
+ if (! found || fieldpos < minpos ||
+ (fieldpos == minpos && temp_length < length))
+ {
+ minpos= fieldpos;
+ found= *field;
+ length= temp_length;
+ }
+ }
+ }
+ DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d",
+ (long) found, recpos, minpos, length));
+ if (!found)
+ break;
+
+ if (found->flags & BLOB_FLAG)
+ recinfo_pos->type= FIELD_BLOB;
+ else if (found->type() == MYSQL_TYPE_VARCHAR)
+ recinfo_pos->type= FIELD_VARCHAR;
+ else if (!(options & HA_OPTION_PACK_RECORD) ||
+ (found->zero_pack() && (found->flags & PRI_KEY_FLAG)))
+ recinfo_pos->type= FIELD_NORMAL;
+ else if (found->zero_pack())
+ recinfo_pos->type= FIELD_SKIP_ZERO;
+ else
+ recinfo_pos->type= ((length <= 3 ||
+ (found->flags & ZEROFILL_FLAG)) ?
+ FIELD_NORMAL :
+ found->type() == MYSQL_TYPE_STRING ||
+ found->type() == MYSQL_TYPE_VAR_STRING ?
+ FIELD_SKIP_ENDSPACE :
+ FIELD_SKIP_PRESPACE);
+ if (found->null_ptr)
+ {
+ recinfo_pos->null_bit= found->null_bit;
+ recinfo_pos->null_pos= (uint) (found->null_ptr -
+ (uchar*) table_arg->record[0]);
+ }
+ else
+ {
+ recinfo_pos->null_bit= 0;
+ recinfo_pos->null_pos= 0;
+ }
+ (recinfo_pos++)->length= (uint16) length;
+ recpos= minpos + length;
+ DBUG_PRINT("loop", ("length: %d type: %d",
+ recinfo_pos[-1].length,recinfo_pos[-1].type));
+ }
+ *records_out= (uint) (recinfo_pos - recinfo);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Check for underlying table conformance
+
+ SYNOPSIS
+ maria_check_definition()
+ t1_keyinfo in First table key definition
+ t1_recinfo in First table record definition
+ t1_keys in Number of keys in first table
+ t1_recs in Number of records in first table
+ t2_keyinfo in Second table key definition
+ t2_recinfo in Second table record definition
+ t2_keys in Number of keys in second table
+ t2_recs in Number of records in second table
+ strict in Strict check switch
+
+ DESCRIPTION
+ This function compares two Maria definitions. By intention it was done
+ to compare merge table definition against underlying table definition.
+ It may also be used to compare dot-frm and MAI definitions of Maria
+ table as well to compare different Maria table definitions.
+
+ For merge table it is not required that number of keys in merge table
+ must exactly match number of keys in underlying table. When calling this
+ function for underlying table conformance check, 'strict' flag must be
+ set to false, and converted merge definition must be passed as t1_*.
+
+ Otherwise 'strict' flag must be set to 1 and it is not required to pass
+ converted dot-frm definition as t1_*.
+
+ RETURN VALUE
+ 0 - Equal definitions.
+ 1 - Different definitions.
+
+ TODO
+ - compare FULLTEXT keys;
+ - compare SPATIAL keys;
+ - compare FIELD_SKIP_ZERO which is converted to FIELD_NORMAL correctly
+ (should be correctly detected in table2maria).
+*/
+
+int maria_check_definition(MARIA_KEYDEF *t1_keyinfo,
+ MARIA_COLUMNDEF *t1_recinfo,
+ uint t1_keys, uint t1_recs,
+ MARIA_KEYDEF *t2_keyinfo,
+ MARIA_COLUMNDEF *t2_recinfo,
+ uint t2_keys, uint t2_recs, bool strict)
+{
+ uint i, j;
+ DBUG_ENTER("maria_check_definition");
+ if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys))
+ {
+ DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u",
+ t1_keys, t2_keys));
+ DBUG_RETURN(1);
+ }
+ if (t1_recs != t2_recs)
+ {
+ DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u",
+ t1_recs, t2_recs));
+ DBUG_RETURN(1);
+ }
+ for (i= 0; i < t1_keys; i++)
+ {
+ HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg;
+ HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg;
+ if (t1_keyinfo[i].flag & HA_FULLTEXT && t2_keyinfo[i].flag & HA_FULLTEXT)
+ continue;
+ else if (t1_keyinfo[i].flag & HA_FULLTEXT ||
+ t2_keyinfo[i].flag & HA_FULLTEXT)
+ {
+ DBUG_PRINT("error", ("Key %d has different definition", i));
+ DBUG_PRINT("error", ("t1_fulltext= %d, t2_fulltext=%d",
+ test(t1_keyinfo[i].flag & HA_FULLTEXT),
+ test(t2_keyinfo[i].flag & HA_FULLTEXT)));
+ DBUG_RETURN(1);
+ }
+ if (t1_keyinfo[i].flag & HA_SPATIAL && t2_keyinfo[i].flag & HA_SPATIAL)
+ continue;
+ else if (t1_keyinfo[i].flag & HA_SPATIAL ||
+ t2_keyinfo[i].flag & HA_SPATIAL)
+ {
+ DBUG_PRINT("error", ("Key %d has different definition", i));
+ DBUG_PRINT("error", ("t1_spatial= %d, t2_spatial=%d",
+ test(t1_keyinfo[i].flag & HA_SPATIAL),
+ test(t2_keyinfo[i].flag & HA_SPATIAL)));
+ DBUG_RETURN(1);
+ }
+ if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs ||
+ t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg)
+ {
+ DBUG_PRINT("error", ("Key %d has different definition", i));
+ DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d",
+ t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg));
+ DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d",
+ t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg));
+ DBUG_RETURN(1);
+ }
+ for (j= t1_keyinfo[i].keysegs; j--;)
+ {
+ if (t1_keysegs[j].type != t2_keysegs[j].type ||
+ t1_keysegs[j].language != t2_keysegs[j].language ||
+ t1_keysegs[j].null_bit != t2_keysegs[j].null_bit ||
+ t1_keysegs[j].length != t2_keysegs[j].length)
+ {
+ DBUG_PRINT("error", ("Key segment %d (key %d) has different "
+ "definition", j, i));
+ DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, "
+ "t1_length=%d",
+ t1_keysegs[j].type, t1_keysegs[j].language,
+ t1_keysegs[j].null_bit, t1_keysegs[j].length));
+ DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, "
+ "t2_length=%d",
+ t2_keysegs[j].type, t2_keysegs[j].language,
+ t2_keysegs[j].null_bit, t2_keysegs[j].length));
+
+ DBUG_RETURN(1);
+ }
+ }
+ }
+
+ for (i= 0; i < t1_recs; i++)
+ {
+ MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i];
+ MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i];
+ /*
+ FIELD_SKIP_ZERO can be changed to FIELD_NORMAL in maria_create,
+ see NOTE1 in ma_create.c
+ */
+ if ((t1_rec->type != t2_rec->type &&
+ !(t1_rec->type == (int) FIELD_SKIP_ZERO &&
+ t1_rec->length == 1 &&
+ t2_rec->type == (int) FIELD_NORMAL)) ||
+ t1_rec->length != t2_rec->length ||
+ t1_rec->null_bit != t2_rec->null_bit)
+ {
+ DBUG_PRINT("error", ("Field %d has different definition", i));
+ DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d",
+ t1_rec->type, t1_rec->length, t1_rec->null_bit));
+ DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d",
+ t2_rec->type, t2_rec->length, t2_rec->null_bit));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+extern "C" {
+
+volatile int *_ma_killed_ptr(HA_CHECK *param)
+{
+ /* In theory Unsafe conversion, but should be ok for now */
+ return (int*) &(((THD *) (param->thd))->killed);
+}
+
+
+void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_error");
+ param->error_printed |= 1;
+ param->out_flag |= O_DATA_LOST;
+ va_start(args, fmt);
+ _ma_check_print_msg(param, "error", fmt, args);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+
+void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_info");
+ va_start(args, fmt);
+ _ma_check_print_msg(param, "info", fmt, args);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+
+void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_warning");
+ param->warning_printed= 1;
+ param->out_flag |= O_DATA_LOST;
+ va_start(args, fmt);
+ _ma_check_print_msg(param, "warning", fmt, args);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+}
+
+
+ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
+handler(hton, table_arg), file(0),
+int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
+ HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
+ HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
+ HA_FILE_BASED | HA_CAN_GEOMETRY | MARIA_CANNOT_ROLLBACK |
+ HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS |
+ HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT),
+can_enable_indexes(1)
+{}
+
+
+handler *ha_maria::clone(MEM_ROOT *mem_root)
+{
+ ha_maria *new_handler= static_cast <ha_maria *>(handler::clone(mem_root));
+ if (new_handler)
+ new_handler->file->state= file->state;
+ return new_handler;
+}
+
+
+static const char *ha_maria_exts[]=
+{
+ MARIA_NAME_IEXT,
+ MARIA_NAME_DEXT,
+ NullS
+};
+
+
+const char **ha_maria::bas_ext() const
+{
+ return ha_maria_exts;
+}
+
+
+const char *ha_maria::index_type(uint key_number)
+{
+ return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
+ "FULLTEXT" :
+ (table->key_info[key_number].flags & HA_SPATIAL) ?
+ "SPATIAL" :
+ (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
+ "RTREE" : "BTREE");
+}
+
+
+double ha_maria::scan_time()
+{
+ if (file->s->data_file_type == BLOCK_RECORD)
+ return ulonglong2double(stats.data_file_length - file->s->block_size) / max(file->s->block_size / 2, IO_SIZE) + 2;
+ return handler::scan_time();
+}
+
+/*
+ We need to be able to store at least two keys on an index page as the
+ splitting algorithms depends on this. (With only one key on a page
+ we also can't use any compression, which may make the index file much
+ larger)
+ We use HA_MAX_KEY_BUFF as this is a stack restriction imposed by the
+ handler interface.
+
+ We also need to reserve place for a record pointer (8) and 3 bytes
+ per key segment to store the length of the segment + possible null bytes.
+ These extra bytes are required here so that maria_create() will surely
+ accept any keys created which the returned key data storage length.
+*/
+
+uint ha_maria::max_supported_key_length() const
+{
+ uint tmp= (maria_max_key_length() - 8 - HA_MAX_KEY_SEG*3);
+ return min(HA_MAX_KEY_BUFF, tmp);
+}
+
+
+#ifdef HAVE_REPLICATION
+int ha_maria::net_read_dump(NET * net)
+{
+ int data_fd= file->dfile.file;
+ int error= 0;
+
+ my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
+ for (;;)
+ {
+ ulong packet_len= my_net_read(net);
+ if (!packet_len)
+ break; // end of file
+ if (packet_len == packet_error)
+ {
+ sql_print_error("ha_maria::net_read_dump - read error ");
+ error= -1;
+ goto err;
+ }
+ if (my_write(data_fd, (uchar *) net->read_pos, (uint) packet_len,
+ MYF(MY_WME | MY_FNABP)))
+ {
+ error= errno;
+ goto err;
+ }
+ }
+err:
+ return error;
+}
+
+
+int ha_maria::dump(THD * thd, int fd)
+{
+ MARIA_SHARE *share= file->s;
+ NET *net= &thd->net;
+ uint block_size= share->block_size;
+ my_off_t bytes_to_read= share->state.state.data_file_length;
+ int data_fd= file->dfile.file;
+ uchar *buf= (uchar *) my_malloc(block_size, MYF(MY_WME));
+ if (!buf)
+ return ENOMEM;
+
+ int error= 0;
+ my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
+ for (; bytes_to_read > 0;)
+ {
+ size_t bytes= my_read(data_fd, buf, block_size, MYF(MY_WME));
+ if (bytes == MY_FILE_ERROR)
+ {
+ error= errno;
+ goto err;
+ }
+
+ if (fd >= 0)
+ {
+ if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP)))
+ {
+ error= errno ? errno : EPIPE;
+ goto err;
+ }
+ }
+ else
+ {
+ if (my_net_write(net, buf, bytes))
+ {
+ error= errno ? errno : EPIPE;
+ goto err;
+ }
+ }
+ bytes_to_read -= bytes;
+ }
+
+ if (fd < 0)
+ {
+ if (my_net_write(net, (uchar*) "", 0))
+ error= errno ? errno : EPIPE;
+ net_flush(net);
+ }
+
+err:
+ my_free((uchar*) buf, MYF(0));
+ return error;
+}
+#endif /* HAVE_REPLICATION */
+
+ /* Name is here without an extension */
+
+int ha_maria::open(const char *name, int mode, uint test_if_locked)
+{
+ uint i;
+
+#ifdef NOT_USED
+ /*
+ If the user wants to have memory mapped data files, add an
+ open_flag. Do not memory map temporary tables because they are
+ expected to be inserted and thus extended a lot. Memory mapping is
+ efficient for files that keep their size, but very inefficient for
+ growing files. Using an open_flag instead of calling ma_extra(...
+ HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the
+ mapping is not repeated for every open, but just done on the initial
+ open, when the MyISAM share is created. Every time the server
+ requires to open a new instance of a table it calls this method. We
+ will always supply HA_OPEN_MMAP for a permanent table. However, the
+ Maria storage engine will ignore this flag if this is a secondary
+ open of a table that is in use by other threads already (if the
+ Maria share exists already).
+ */
+ if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
+ test_if_locked|= HA_OPEN_MMAP;
+#endif
+
+ if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
+ return (my_errno ? my_errno : -1);
+
+ if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
+ VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0));
+
+ info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
+ VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0));
+ if ((data_file_type= file->s->data_file_type) != STATIC_RECORD)
+ int_table_flags |= HA_REC_NOT_IN_SEQ;
+ if (!file->s->base.born_transactional)
+ {
+ /*
+ INSERT DELAYED cannot work with transactional tables (because it cannot
+ stand up to "when client gets ok the data is safe on disk": the record
+ may not even be inserted). In the future, we could enable it back (as a
+ client doing INSERT DELAYED knows the specificities; but we then should
+ make sure to regularly commit in the delayed_insert thread).
+ */
+ int_table_flags|= HA_CAN_INSERT_DELAYED;
+ }
+ if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ int_table_flags |= HA_HAS_CHECKSUM;
+
+ for (i= 0; i < table->s->keys; i++)
+ {
+ plugin_ref parser= table->key_info[i].parser;
+ if (table->key_info[i].flags & HA_USES_PARSER)
+ file->s->keyinfo[i].parser=
+ (struct st_mysql_ftparser *)plugin_decl(parser)->info;
+ table->key_info[i].block_size= file->s->keyinfo[i].block_length;
+ }
+ my_errno= 0;
+ return my_errno;
+}
+
+
+int ha_maria::close(void)
+{
+ MARIA_HA *tmp= file;
+ file= 0;
+ return maria_close(tmp);
+}
+
+
+int ha_maria::write_row(uchar * buf)
+{
+ ha_statistic_increment(&SSV::ha_write_count);
+
+ /* If we have a timestamp column, update it to the current time */
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
+ table->timestamp_field->set_time();
+
+ /*
+ If we have an auto_increment column and we are writing a changed row
+ or a new row, then update the auto_increment value in the record.
+ */
+ if (table->next_number_field && buf == table->record[0])
+ {
+ int error;
+ if ((error= update_auto_increment()))
+ return error;
+ }
+ return maria_write(file, buf);
+}
+
+
+int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
+{
+ if (!file)
+ return HA_ADMIN_INTERNAL_ERROR;
+ int error;
+ HA_CHECK param;
+ MARIA_SHARE *share= file->s;
+ const char *old_proc_info= thd->proc_info;
+
+ thd->proc_info= "Checking table";
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "check";
+ param.db_name= table->s->db.str;
+ param.table_name= table->alias;
+ param.testflag= check_opt->flags | T_CHECK | T_SILENT;
+ param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
+
+ if (!(table->db_stat & HA_READ_ONLY))
+ param.testflag |= T_STATISTICS;
+ param.using_global_keycache= 1;
+
+ if (!maria_is_crashed(file) &&
+ (((param.testflag & T_CHECK_ONLY_CHANGED) &&
+ !(share->state.changed & (STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR)) &&
+ share->state.open_count == 0) ||
+ ((param.testflag & T_FAST) && (share->state.open_count ==
+ (uint) (share->global_changed ? 1 :
+ 0)))))
+ return HA_ADMIN_ALREADY_DONE;
+
+ error= maria_chk_status(&param, file); // Not fatal
+ error= maria_chk_size(&param, file);
+ if (!error)
+ error |= maria_chk_del(&param, file, param.testflag);
+ if (!error)
+ error= maria_chk_key(&param, file);
+ if (!error)
+ {
+ if ((!(param.testflag & T_QUICK) &&
+ ((share->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
+ (param.testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
+ {
+ uint old_testflag= param.testflag;
+ param.testflag |= T_MEDIUM;
+ if (!(error= init_io_cache(&param.read_cache, file->dfile.file,
+ my_default_record_cache_size, READ_CACHE,
+ share->pack.header_length, 1, MYF(MY_WME))))
+ {
+ error= maria_chk_data_link(&param, file, param.testflag & T_EXTEND);
+ end_io_cache(&(param.read_cache));
+ }
+ param.testflag= old_testflag;
+ }
+ }
+ if (!error)
+ {
+ if ((share->state.changed & (STATE_CHANGED |
+ STATE_CRASHED_ON_REPAIR |
+ STATE_CRASHED | STATE_NOT_ANALYZED)) ||
+ (param.testflag & T_STATISTICS) || maria_is_crashed(file))
+ {
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ pthread_mutex_lock(&share->intern_lock);
+ share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ if (!(table->db_stat & HA_READ_ONLY))
+ error= maria_update_state_info(&param, file, UPDATE_TIME | UPDATE_OPEN_COUNT |
+ UPDATE_STAT);
+ pthread_mutex_unlock(&share->intern_lock);
+ info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
+ HA_STATUS_CONST);
+ }
+ }
+ else if (!maria_is_crashed(file) && !thd->killed)
+ {
+ maria_mark_crashed(file);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+
+ thd->proc_info= old_proc_info;
+ return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
+}
+
+
+/*
+ Analyze the key distribution in the table
+ As the table may be only locked for read, we have to take into account that
+ two threads may do an analyze at the same time!
+*/
+
+int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
+{
+ int error= 0;
+ HA_CHECK param;
+ MARIA_SHARE *share= file->s;
+
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "analyze";
+ param.db_name= table->s->db.str;
+ param.table_name= table->alias;
+ param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
+ T_DONT_CHECK_CHECKSUM);
+ param.using_global_keycache= 1;
+ param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
+
+ if (!(share->state.changed & STATE_NOT_ANALYZED))
+ return HA_ADMIN_ALREADY_DONE;
+
+ error= maria_chk_key(&param, file);
+ if (!error)
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ error= maria_update_state_info(&param, file, UPDATE_STAT);
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+ else if (!maria_is_crashed(file) && !thd->killed)
+ maria_mark_crashed(file);
+ return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
+}
+
+
+int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ HA_CHECK_OPT tmp_check_opt;
+ char *backup_dir= thd->lex->backup_dir;
+ char src_path[FN_REFLEN], dst_path[FN_REFLEN];
+ char table_name[FN_REFLEN];
+ int error;
+ const char *errmsg;
+ DBUG_ENTER("restore");
+
+ VOID(tablename_to_filename(table->s->table_name.str, table_name,
+ sizeof(table_name)));
+
+ if (fn_format_relative_to_data_home(src_path, table_name, backup_dir,
+ MARIA_NAME_DEXT))
+ DBUG_RETURN(HA_ADMIN_INVALID);
+
+ strxmov(dst_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
+ if (my_copy(src_path, dst_path, MYF(MY_WME)))
+ {
+ error= HA_ADMIN_FAILED;
+ errmsg= "Failed in my_copy (Error %d)";
+ goto err;
+ }
+
+ tmp_check_opt.init();
+ tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK;
+ DBUG_RETURN(repair(thd, &tmp_check_opt));
+
+err:
+ {
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "restore";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg, my_errno);
+ DBUG_RETURN(error);
+ }
+}
+
+
+int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ char *backup_dir= thd->lex->backup_dir;
+ char src_path[FN_REFLEN], dst_path[FN_REFLEN];
+ char table_name[FN_REFLEN];
+ int error;
+ const char *errmsg;
+ DBUG_ENTER("ha_maria::backup");
+
+ VOID(tablename_to_filename(table->s->table_name.str, table_name,
+ sizeof(table_name)));
+
+ if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
+ reg_ext))
+ {
+ errmsg= "Failed in fn_format() for .frm file (errno: %d)";
+ error= HA_ADMIN_INVALID;
+ goto err;
+ }
+
+ strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS);
+ if (my_copy(src_path, dst_path,
+ MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
+ {
+ error= HA_ADMIN_FAILED;
+ errmsg= "Failed copying .frm file (errno: %d)";
+ goto err;
+ }
+
+ /* Change extension */
+ if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
+ MARIA_NAME_DEXT))
+ {
+ errmsg= "Failed in fn_format() for .MYD file (errno: %d)";
+ error= HA_ADMIN_INVALID;
+ goto err;
+ }
+
+ strxmov(src_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
+ if (_ma_flush_table_files(file, MARIA_FLUSH_DATA, FLUSH_FORCE_WRITE,
+ FLUSH_KEEP))
+ {
+ error= HA_ADMIN_FAILED;
+ errmsg= "Failed in flush (Error %d)";
+ goto err;
+ }
+ if (my_copy(src_path, dst_path,
+ MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
+ {
+ errmsg= "Failed copying .MYD file (errno: %d)";
+ error= HA_ADMIN_FAILED;
+ goto err;
+ }
+ DBUG_RETURN(HA_ADMIN_OK);
+
+err:
+ {
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "backup";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg, my_errno);
+ DBUG_RETURN(error);
+ }
+}
+
+
+int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ HA_CHECK param;
+ ha_rows start_records;
+
+ if (!file)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "repair";
+ param.testflag= ((check_opt->flags & ~(T_EXTEND)) |
+ T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
+ (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
+ param.sort_buffer_length= check_opt->sort_buffer_size;
+ start_records= file->state->records;
+ while ((error= repair(thd, param, 0)) && param.retry_repair)
+ {
+ param.retry_repair= 0;
+ if (test_all_bits(param.testflag,
+ (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
+ {
+ param.testflag &= ~T_RETRY_WITHOUT_QUICK;
+ sql_print_information("Retrying repair of: '%s' without quick",
+ table->s->path.str);
+ continue;
+ }
+ param.testflag &= ~T_QUICK;
+ if ((param.testflag & T_REP_BY_SORT))
+ {
+ param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP;
+ sql_print_information("Retrying repair of: '%s' with keycache",
+ table->s->path.str);
+ continue;
+ }
+ break;
+ }
+ if (!error && start_records != file->state->records &&
+ !(check_opt->flags & T_VERY_SILENT))
+ {
+ char llbuff[22], llbuff2[22];
+ sql_print_information("Found %s of %s rows when repairing '%s'",
+ llstr(file->state->records, llbuff),
+ llstr(start_records, llbuff2),
+ table->s->path.str);
+ }
+ return error;
+}
+
+int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ if (!file)
+ return HA_ADMIN_INTERNAL_ERROR;
+ HA_CHECK param;
+
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "optimize";
+ param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
+ T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
+ param.sort_buffer_length= check_opt->sort_buffer_size;
+ if ((error= repair(thd, param, 1)) && param.retry_repair)
+ {
+ sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
+ my_errno, param.db_name, param.table_name);
+ param.testflag &= ~T_REP_BY_SORT;
+ error= repair(thd, param, 1);
+ }
+ return error;
+}
+
+
+int ha_maria::repair(THD *thd, HA_CHECK &param, bool do_optimize)
+{
+ int error= 0;
+ uint local_testflag= param.testflag;
+ bool optimize_done= !do_optimize, statistics_done= 0;
+ const char *old_proc_info= thd->proc_info;
+ char fixed_name[FN_REFLEN];
+ MARIA_SHARE *share= file->s;
+ ha_rows rows= file->state->records;
+ DBUG_ENTER("ha_maria::repair");
+
+ /*
+ Normally this method is entered with a properly opened table. If the
+ repair fails, it can be repeated with more elaborate options. Under
+ special circumstances it can happen that a repair fails so that it
+ closed the data file and cannot re-open it. In this case file->dfile
+ is set to -1. We must not try another repair without an open data
+ file. (Bug #25289)
+ */
+ if (file->dfile.file == -1)
+ {
+ sql_print_information("Retrying repair of: '%s' failed. "
+ "Please try REPAIR EXTENDED or maria_chk",
+ table->s->path.str);
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+
+ param.db_name= table->s->db.str;
+ param.table_name= table->alias;
+ param.tmpfile_createflag= O_RDWR | O_TRUNC;
+ param.using_global_keycache= 1;
+ param.thd= thd;
+ param.tmpdir= &mysql_tmpdir_list;
+ param.out_flag= 0;
+ strmov(fixed_name, file->s->open_file_name);
+
+ // Don't lock tables if we have used LOCK TABLE
+ if (!thd->locked_tables &&
+ maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
+ {
+ _ma_check_print_error(&param, ER(ER_CANT_LOCK), my_errno);
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+
+ if (!do_optimize ||
+ ((file->state->del ||
+ ((file->s->data_file_type != BLOCK_RECORD) &&
+ share->state.split != file->state->records)) &&
+ (!(param.testflag & T_QUICK) ||
+ (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_OPTIMIZED_ROWS)))))
+ {
+ ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
+ maria_get_mask_all_keys_active(share->base.keys) :
+ share->state.key_map);
+ uint save_testflag= param.testflag;
+ if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
+ (local_testflag & T_REP_BY_SORT))
+ {
+ local_testflag |= T_STATISTICS;
+ param.testflag |= T_STATISTICS; // We get this for free
+ statistics_done= 1;
+ /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */
+ if (THDVAR(thd,repair_threads) > 1 &&
+ file->s->data_file_type != BLOCK_RECORD)
+ {
+ char buf[40];
+ /* TODO: respect maria_repair_threads variable */
+ my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
+ thd->proc_info= buf;
+ param.testflag|= T_REP_PARALLEL;
+ error= maria_repair_parallel(&param, file, fixed_name,
+ param.testflag & T_QUICK);
+ thd->proc_info= "Repair done"; // to reset proc_info, as
+ // it was pointing to local buffer
+ }
+ else
+ {
+ thd->proc_info= "Repair by sorting";
+ param.testflag|= T_REP_BY_SORT;
+ error= maria_repair_by_sort(&param, file, fixed_name,
+ param.testflag & T_QUICK);
+ }
+ }
+ else
+ {
+ thd->proc_info= "Repair with keycache";
+ param.testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL);
+ error= maria_repair(&param, file, fixed_name, param.testflag & T_QUICK);
+ /**
+ @todo RECOVERY BUG we do things with the index file
+ (maria_sort_index() after the above which already has logged the
+ record and bumped create_rename_lsn. Is it ok?
+ */
+ }
+ param.testflag= save_testflag;
+ optimize_done= 1;
+ }
+ if (!error)
+ {
+ if ((local_testflag & T_SORT_INDEX) &&
+ (share->state.changed & STATE_NOT_SORTED_PAGES))
+ {
+ optimize_done= 1;
+ thd->proc_info= "Sorting index";
+ error= maria_sort_index(&param, file, fixed_name);
+ }
+ if (!statistics_done && (local_testflag & T_STATISTICS))
+ {
+ if (share->state.changed & STATE_NOT_ANALYZED)
+ {
+ optimize_done= 1;
+ thd->proc_info= "Analyzing";
+ error= maria_chk_key(&param, file);
+ }
+ else
+ local_testflag &= ~T_STATISTICS; // Don't update statistics
+ }
+ }
+ thd->proc_info= "Saving state";
+ pthread_mutex_lock(&share->intern_lock);
+ if (!error)
+ {
+ if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
+ {
+ share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+ /*
+ the following 'if', thought conceptually wrong,
+ is a useful optimization nevertheless.
+ */
+ if (file->state != &file->s->state.state)
+ file->s->state.state= *file->state;
+ if (file->s->base.auto_key)
+ _ma_update_auto_increment_key(&param, file, 1);
+ if (optimize_done)
+ error= maria_update_state_info(&param, file,
+ UPDATE_TIME | UPDATE_OPEN_COUNT |
+ (local_testflag &
+ T_STATISTICS ? UPDATE_STAT : 0));
+ info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
+ HA_STATUS_CONST);
+ if (rows != file->state->records && !(param.testflag & T_VERY_SILENT))
+ {
+ char llbuff[22], llbuff2[22];
+ _ma_check_print_warning(&param, "Number of rows changed from %s to %s",
+ llstr(rows, llbuff),
+ llstr(file->state->records, llbuff2));
+ }
+ }
+ else
+ {
+ maria_mark_crashed_on_repair(file);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ maria_update_state_info(&param, file, 0);
+ }
+ pthread_mutex_unlock(&share->intern_lock);
+ thd->proc_info= old_proc_info;
+ if (!thd->locked_tables)
+ {
+ /**
+ @todo RECOVERY BUG find why this is needed. Monty says it's because a
+ new non-transactional table is created by maria_repair(): find how this
+ new table's state influences the old one's.
+ */
+ _ma_reenable_logging_for_table(file->s);
+ maria_lock_database(file, F_UNLCK);
+ }
+ DBUG_RETURN(error ? HA_ADMIN_FAILED :
+ !optimize_done ? HA_ADMIN_ALREADY_DONE : HA_ADMIN_OK);
+}
+
+
+/*
+ Assign table indexes to a specific key cache.
+*/
+
+int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
+{
+#if 0 && NOT_IMPLEMENTED
+ PAGECACHE *new_pagecache= check_opt->pagecache;
+ const char *errmsg= 0;
+ int error= HA_ADMIN_OK;
+ ulonglong map;
+ TABLE_LIST *table_list= table->pos_in_table_list;
+ DBUG_ENTER("ha_maria::assign_to_keycache");
+
+
+ table->keys_in_use_for_query.clear_all();
+
+ if (table_list->process_index_hints(table))
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ map= ~(ulonglong) 0;
+ if (!table->keys_in_use_for_query.is_clear_all())
+ /* use all keys if there's no list specified by the user through hints */
+ map= table->keys_in_use_for_query.to_ulonglong();
+
+ if ((error= maria_assign_to_pagecache(file, map, new_pagecache)))
+ {
+ char buf[STRING_BUFFER_USUAL_SIZE];
+ my_snprintf(buf, sizeof(buf),
+ "Failed to flush to index file (errno: %d)", error);
+ errmsg= buf;
+ error= HA_ADMIN_CORRUPT;
+ }
+
+ if (error != HA_ADMIN_OK)
+ {
+ /* Send error to user */
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "assign_to_keycache";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg);
+ }
+ DBUG_RETURN(error);
+#else
+ return HA_ADMIN_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/*
+ Preload pages of the index file for a table into the key cache.
+*/
+
+int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ const char *errmsg;
+ ulonglong map;
+ TABLE_LIST *table_list= table->pos_in_table_list;
+ my_bool ignore_leaves= table_list->ignore_leaves;
+ char buf[ERRMSGSIZE+20];
+
+ DBUG_ENTER("ha_maria::preload_keys");
+
+ table->keys_in_use_for_query.clear_all();
+
+ if (table_list->process_index_hints(table))
+ DBUG_RETURN(HA_ADMIN_FAILED);
+
+ map= ~(ulonglong) 0;
+ /* Check validity of the index references */
+ if (!table->keys_in_use_for_query.is_clear_all())
+ /* use all keys if there's no list specified by the user through hints */
+ map= table->keys_in_use_for_query.to_ulonglong();
+
+ maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
+ (void*) &thd->variables.preload_buff_size);
+
+ if ((error= maria_preload(file, map, ignore_leaves)))
+ {
+ switch (error) {
+ case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
+ errmsg= "Indexes use different block sizes";
+ break;
+ case HA_ERR_OUT_OF_MEM:
+ errmsg= "Failed to allocate buffer";
+ break;
+ default:
+ my_snprintf(buf, ERRMSGSIZE,
+ "Failed to read from index file (errno: %d)", my_errno);
+ errmsg= buf;
+ }
+ error= HA_ADMIN_FAILED;
+ goto err;
+ }
+
+ DBUG_RETURN(HA_ADMIN_OK);
+
+err:
+ {
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "preload_keys";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg);
+ DBUG_RETURN(error);
+ }
+}
+
+
+/*
+ Disable indexes, making it persistent if requested.
+
+ SYNOPSIS
+ disable_indexes()
+ mode mode of operation:
+ HA_KEY_SWITCH_NONUNIQ disable all non-unique keys
+ HA_KEY_SWITCH_ALL disable all keys
+ HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
+ HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent
+
+ IMPLEMENTATION
+ HA_KEY_SWITCH_NONUNIQ is not implemented.
+ HA_KEY_SWITCH_ALL_SAVE is not implemented.
+
+ RETURN
+ 0 ok
+ HA_ERR_WRONG_COMMAND mode not implemented.
+*/
+
+int ha_maria::disable_indexes(uint mode)
+{
+ int error;
+
+ if (mode == HA_KEY_SWITCH_ALL)
+ {
+ /* call a storage engine function to switch the key map */
+ error= maria_disable_indexes(file);
+ }
+ else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
+ {
+ maria_extra(file, HA_EXTRA_NO_KEYS, 0);
+ info(HA_STATUS_CONST); // Read new key info
+ error= 0;
+ }
+ else
+ {
+ /* mode not implemented */
+ error= HA_ERR_WRONG_COMMAND;
+ }
+ return error;
+}
+
+
+/*
+ Enable indexes, making it persistent if requested.
+
+ SYNOPSIS
+ enable_indexes()
+ mode mode of operation:
+ HA_KEY_SWITCH_NONUNIQ enable all non-unique keys
+ HA_KEY_SWITCH_ALL enable all keys
+ HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
+ HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent
+
+ DESCRIPTION
+ Enable indexes, which might have been disabled by disable_index() before.
+ The modes without _SAVE work only if both data and indexes are empty,
+ since the MARIA repair would enable them persistently.
+ To be sure in these cases, call handler::delete_all_rows() before.
+
+ IMPLEMENTATION
+ HA_KEY_SWITCH_NONUNIQ is not implemented.
+ HA_KEY_SWITCH_ALL_SAVE is not implemented.
+
+ RETURN
+ 0 ok
+ !=0 Error, among others:
+ HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry.
+ HA_ERR_WRONG_COMMAND mode not implemented.
+*/
+
+int ha_maria::enable_indexes(uint mode)
+{
+ int error;
+
+ if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
+ {
+ /* All indexes are enabled already. */
+ return 0;
+ }
+
+ if (mode == HA_KEY_SWITCH_ALL)
+ {
+ error= maria_enable_indexes(file);
+ /*
+ Do not try to repair on error,
+ as this could make the enabled state persistent,
+ but mode==HA_KEY_SWITCH_ALL forbids it.
+ */
+ }
+ else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
+ {
+ THD *thd= current_thd;
+ HA_CHECK param;
+ const char *save_proc_info= thd->proc_info;
+ thd->proc_info= "Creating index";
+ maria_chk_init(&param);
+ param.op_name= "recreating_index";
+ param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
+ T_CREATE_MISSING_KEYS);
+ param.myf_rw &= ~MY_WAIT_IF_FULL;
+ param.sort_buffer_length= THDVAR(thd,sort_buffer_size);
+ param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
+ param.tmpdir= &mysql_tmpdir_list;
+ if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param.retry_repair)
+ {
+ sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, retrying",
+ my_errno, param.db_name, param.table_name);
+ /* Repairing by sort failed. Now try standard repair method. */
+ param.testflag &= ~(T_REP_BY_SORT | T_QUICK);
+ error= (repair(thd, param, 0) != HA_ADMIN_OK);
+ /*
+ If the standard repair succeeded, clear all error messages which
+ might have been set by the first repair. They can still be seen
+ with SHOW WARNINGS then.
+ */
+#ifndef EMBEDDED_LIBRARY
+ if (!error)
+ thd->clear_error();
+#endif /* EMBEDDED_LIBRARY */
+ }
+ info(HA_STATUS_CONST);
+ thd->proc_info= save_proc_info;
+ }
+ else
+ {
+ /* mode not implemented */
+ error= HA_ERR_WRONG_COMMAND;
+ }
+ return error;
+}
+
+
+/*
+ Test if indexes are disabled.
+
+
+ SYNOPSIS
+ indexes_are_disabled()
+ no parameters
+
+
+ RETURN
+ 0 indexes are not disabled
+ 1 all indexes are disabled
+ [2 non-unique indexes are disabled - NOT YET IMPLEMENTED]
+*/
+
+int ha_maria::indexes_are_disabled(void)
+{
+ return maria_indexes_are_disabled(file);
+}
+
+
+/*
+ prepare for a many-rows insert operation
+ e.g. - disable indexes (if they can be recreated fast) or
+ activate special bulk-insert optimizations
+
+ SYNOPSIS
+ start_bulk_insert(rows)
+ rows Rows to be inserted
+ 0 if we don't know
+
+ NOTICE
+ Do not forget to call end_bulk_insert() later!
+*/
+
+void ha_maria::start_bulk_insert(ha_rows rows)
+{
+ DBUG_ENTER("ha_maria::start_bulk_insert");
+ THD *thd= current_thd;
+ ulong size= min(thd->variables.read_buff_size,
+ (ulong) (table->s->avg_row_length * rows));
+ DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu",
+ (ulong) rows, size));
+
+ /* don't enable row cache if too few rows */
+ if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
+ maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size);
+
+ can_enable_indexes= (maria_is_all_keys_active(file->s->state.key_map,
+ file->s->base.keys));
+
+ if (!(specialflag & SPECIAL_SAFE_MODE))
+ {
+ /*
+ Only disable old index if the table was empty and we are inserting
+ a lot of rows.
+ We should not do this for only a few rows as this is slower and
+ we don't want to update the key statistics based of only a few rows.
+ */
+ if (file->state->records == 0 && can_enable_indexes &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES))
+ maria_disable_non_unique_index(file, rows);
+ else if (!file->bulk_insert &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
+ {
+ maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows);
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ end special bulk-insert optimizations,
+ which have been activated by start_bulk_insert().
+
+ SYNOPSIS
+ end_bulk_insert()
+ no arguments
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
+
+int ha_maria::end_bulk_insert()
+{
+ int err;
+ DBUG_ENTER("ha_maria::end_bulk_insert");
+ maria_end_bulk_insert(file);
+ err= maria_extra(file, HA_EXTRA_NO_CACHE, 0);
+ DBUG_RETURN(err ? err : can_enable_indexes ?
+ enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE) : 0);
+}
+
+
+bool ha_maria::check_and_repair(THD *thd)
+{
+ int error= 0;
+ int marked_crashed;
+ char *old_query;
+ uint old_query_length;
+ HA_CHECK_OPT check_opt;
+ DBUG_ENTER("ha_maria::check_and_repair");
+
+ check_opt.init();
+ check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
+ // Don't use quick if deleted rows
+ if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
+ check_opt.flags |= T_QUICK;
+ sql_print_warning("Checking table: '%s'", table->s->path.str);
+
+ old_query= thd->query;
+ old_query_length= thd->query_length;
+ pthread_mutex_lock(&LOCK_thread_count);
+ thd->query= table->s->table_name.str;
+ thd->query_length= table->s->table_name.length;
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ if ((marked_crashed= maria_is_crashed(file)) || check(thd, &check_opt))
+ {
+ sql_print_warning("Recovering table: '%s'", table->s->path.str);
+ check_opt.flags=
+ ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
+ (marked_crashed ? 0 : T_QUICK) |
+ (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
+ T_AUTO_REPAIR);
+ if (repair(thd, &check_opt))
+ error= 1;
+ }
+ pthread_mutex_lock(&LOCK_thread_count);
+ thd->query= old_query;
+ thd->query_length= old_query_length;
+ pthread_mutex_unlock(&LOCK_thread_count);
+ DBUG_RETURN(error);
+}
+
+
+bool ha_maria::is_crashed() const
+{
+ return (file->s->state.changed & STATE_CRASHED ||
+ (my_disable_locking && file->s->state.open_count));
+}
+
+
+int ha_maria::update_row(const uchar * old_data, uchar * new_data)
+{
+ ha_statistic_increment(&SSV::ha_update_count);
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+ table->timestamp_field->set_time();
+ return maria_update(file, old_data, new_data);
+}
+
+
+int ha_maria::delete_row(const uchar * buf)
+{
+ ha_statistic_increment(&SSV::ha_delete_count);
+ return maria_delete(file, buf);
+}
+
+
+int ha_maria::index_read_map(uchar * buf, const uchar * key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag)
+{
+ DBUG_ASSERT(inited == INDEX);
+ ha_statistic_increment(&SSV::ha_read_key_count);
+ int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag)
+{
+ ha_statistic_increment(&SSV::ha_read_key_count);
+ int error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
+ key_part_map keypart_map)
+{
+ DBUG_ENTER("ha_maria::index_read_last_map");
+ DBUG_ASSERT(inited == INDEX);
+ ha_statistic_increment(&SSV::ha_read_key_count);
+ int error= maria_rkey(file, buf, active_index, key, keypart_map,
+ HA_READ_PREFIX_LAST);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ DBUG_RETURN(error);
+}
+
+
+int ha_maria::index_next(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ ha_statistic_increment(&SSV::ha_read_next_count);
+ int error= maria_rnext(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_prev(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ ha_statistic_increment(&SSV::ha_read_prev_count);
+ int error= maria_rprev(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_first(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ ha_statistic_increment(&SSV::ha_read_first_count);
+ int error= maria_rfirst(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_last(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ ha_statistic_increment(&SSV::ha_read_last_count);
+ int error= maria_rlast(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_next_same(uchar * buf,
+ const uchar *key __attribute__ ((unused)),
+ uint length __attribute__ ((unused)))
+{
+ DBUG_ASSERT(inited == INDEX);
+ ha_statistic_increment(&SSV::ha_read_next_count);
+ int error= maria_rnext_same(file, buf);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::rnd_init(bool scan)
+{
+ if (scan)
+ return maria_scan_init(file);
+ return maria_reset(file); // Free buffers
+}
+
+
+int ha_maria::rnd_end()
+{
+ /* Safe to call even if we don't have started a scan */
+ maria_scan_end(file);
+ return 0;
+}
+
+
+int ha_maria::rnd_next(uchar *buf)
+{
+ ha_statistic_increment(&SSV::ha_read_rnd_next_count);
+ int error= maria_scan(file, buf);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::remember_rnd_pos()
+{
+ return (*file->s->scan_remember_pos)(file, &remember_pos);
+}
+
+
+int ha_maria::restart_rnd_next(uchar *buf)
+{
+ (*file->s->scan_restore_pos)(file, remember_pos);
+ return rnd_next(buf);
+}
+
+
+int ha_maria::rnd_pos(uchar *buf, uchar *pos)
+{
+ ha_statistic_increment(&SSV::ha_read_rnd_count);
+ int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+void ha_maria::position(const uchar *record)
+{
+ my_off_t row_position= maria_position(file);
+ my_store_ptr(ref, ref_length, row_position);
+}
+
+
+int ha_maria::info(uint flag)
+{
+ MARIA_INFO maria_info;
+ char name_buff[FN_REFLEN];
+
+ (void) maria_status(file, &maria_info, flag);
+ if (flag & HA_STATUS_VARIABLE)
+ {
+ stats.records= maria_info.records;
+ stats.deleted= maria_info.deleted;
+ stats.data_file_length= maria_info.data_file_length;
+ stats.index_file_length= maria_info.index_file_length;
+ stats.delete_length= maria_info.delete_length;
+ stats.check_time= maria_info.check_time;
+ stats.mean_rec_length= maria_info.mean_reclength;
+ }
+ if (flag & HA_STATUS_CONST)
+ {
+ TABLE_SHARE *share= table->s;
+ stats.max_data_file_length= maria_info.max_data_file_length;
+ stats.max_index_file_length= maria_info.max_index_file_length;
+ stats.create_time= maria_info.create_time;
+ ref_length= maria_info.reflength;
+ share->db_options_in_use= maria_info.options;
+ stats.block_size= maria_block_size;
+
+ /* Update share */
+ if (share->tmp_table == NO_TMP_TABLE)
+ pthread_mutex_lock(&share->mutex);
+ share->keys_in_use.set_prefix(share->keys);
+ share->keys_in_use.intersect_extended(maria_info.key_map);
+ share->keys_for_keyread.intersect(share->keys_in_use);
+ share->db_record_offset= maria_info.record_offset;
+ if (share->key_parts)
+ {
+ ulong *to= table->key_info[0].rec_per_key, *end;
+ double *from= maria_info.rec_per_key;
+ for (end= to+ share->key_parts ; to < end ; to++, from++)
+ *to= (ulong) (*from + 0.5);
+ }
+ if (share->tmp_table == NO_TMP_TABLE)
+ pthread_mutex_unlock(&share->mutex);
+
+ /*
+ Set data_file_name and index_file_name to point at the symlink value
+ if table is symlinked (Ie; Real name is not same as generated name)
+ */
+ data_file_name= index_file_name= 0;
+ fn_format(name_buff, file->s->open_file_name, "", MARIA_NAME_DEXT,
+ MY_APPEND_EXT | MY_UNPACK_FILENAME);
+ if (strcmp(name_buff, maria_info.data_file_name))
+ data_file_name=maria_info.data_file_name;
+ fn_format(name_buff, file->s->open_file_name, "", MARIA_NAME_IEXT,
+ MY_APPEND_EXT | MY_UNPACK_FILENAME);
+ if (strcmp(name_buff, maria_info.index_file_name))
+ index_file_name=maria_info.index_file_name;
+ }
+ if (flag & HA_STATUS_ERRKEY)
+ {
+ errkey= maria_info.errkey;
+ my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos);
+ }
+ /* Faster to always update, than to do it based on flag */
+ stats.update_time= maria_info.update_time;
+ stats.auto_increment_value= maria_info.auto_increment;
+
+ return 0;
+}
+
+
+int ha_maria::extra(enum ha_extra_function operation)
+{
+ if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
+ return 0;
+ return maria_extra(file, operation, 0);
+}
+
+int ha_maria::reset(void)
+{
+ return maria_reset(file);
+}
+
+/* To be used with WRITE_CACHE and EXTRA_CACHE */
+
+int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
+{
+ if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
+ return 0;
+ return maria_extra(file, operation, (void*) &cache_size);
+}
+
+
+int ha_maria::delete_all_rows()
+{
+ return maria_delete_all_rows(file);
+}
+
+
+int ha_maria::delete_table(const char *name)
+{
+ return maria_delete_table(name);
+}
+
+#define THD_TRN (*(TRN **)thd_ha_data(thd, maria_hton))
+
+int ha_maria::external_lock(THD *thd, int lock_type)
+{
+ TRN *trn= THD_TRN;
+ DBUG_ENTER("ha_maria::external_lock");
+ /*
+ We don't test now_transactional because it may vary between lock/unlock
+ and thus confuse our reference counting.
+ It is critical to skip non-transactional tables: user-visible temporary
+ tables get an external_lock() when read/written for the first time, but no
+ corresponding unlock (they just stay locked and are later dropped while
+ locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp"
+ would never commit as its "locked_tables" count would stay 1.
+ When Maria has has_transactions()==TRUE, open_temporary_table()
+ (sql_base.cc) will use TRANSACTIONAL_TMP_TABLE and thus the
+ external_lock(F_UNLCK) will happen and we can then allow the user to
+ create transactional temporary tables.
+ */
+ if (!file->s->base.born_transactional)
+ goto skip_transaction;
+ if (lock_type != F_UNLCK)
+ {
+ if (!trn) /* no transaction yet - open it now */
+ {
+ trn= trnman_new_trn(& thd->mysys_var->mutex,
+ & thd->mysys_var->suspend,
+ thd->thread_stack + STACK_DIRECTION *
+ (my_thread_stack_size - STACK_MIN_SIZE));
+ if (unlikely(!trn))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+
+ DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn));
+ THD_TRN= trn;
+ if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
+ trans_register_ha(thd, TRUE, maria_hton);
+ }
+ this->file->trn= trn;
+ if (!trnman_increment_locked_tables(trn))
+ {
+ trans_register_ha(thd, FALSE, maria_hton);
+ trnman_new_statement(trn);
+ }
+ if (!thd->transaction.on)
+ {
+ /*
+ No need to log REDOs/UNDOs. If this is an internal temporary table
+ which will be renamed to a permanent table (like in ALTER TABLE),
+ the rename happens after unlocking so will be durable (and the table
+ will get its create_rename_lsn).
+ Note: if we wanted to enable users to have an old backup and apply
+ tons of archived logs to roll-forward, we could then not disable
+ REDOs/UNDOs in this case.
+ */
+ DBUG_PRINT("info", ("Disabling logging for table"));
+ _ma_tmp_disable_logging_for_table(file, TRUE);
+ }
+ }
+ else
+ {
+ _ma_reenable_logging_for_table(file->s);
+ this->file->trn= 0; /* TODO: remove it also in commit and rollback */
+ if (trn && trnman_has_locked_tables(trn))
+ {
+ if (!trnman_decrement_locked_tables(trn))
+ {
+ /* autocommit ? rollback a transaction */
+#ifdef MARIA_CANNOT_ROLLBACK
+ if (ma_commit(trn))
+ DBUG_RETURN(1);
+ THD_TRN= 0;
+#else
+ if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
+ {
+ trnman_rollback_trn(trn);
+ DBUG_PRINT("info", ("THD_TRN set to 0x0"));
+ THD_TRN= 0;
+ }
+#endif
+ }
+ }
+ }
+skip_transaction:
+ DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ?
+ lock_type : ((lock_type == F_UNLCK) ?
+ F_UNLCK : F_EXTRA_LCK)));
+}
+
+int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
+{
+ TRN *trn= THD_TRN;
+ if (file->s->base.born_transactional)
+ {
+ DBUG_ASSERT(trn); // this may be called only after external_lock()
+ DBUG_ASSERT(trnman_has_locked_tables(trn));
+ DBUG_ASSERT(lock_type != F_UNLCK);
+ /*
+ As external_lock() was already called, don't increment locked_tables.
+ Note that we call the function below possibly several times when
+ statement starts (once per table). This is ok as long as that function
+ does cheap operations. Otherwise, we will need to do it only on first
+ call to start_stmt().
+ */
+ trnman_new_statement(trn);
+ }
+ return 0;
+}
+
+THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
+ THR_LOCK_DATA **to,
+ enum thr_lock_type lock_type)
+{
+ if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
+ file->lock.type= lock_type;
+ *to++= &file->lock;
+ return to;
+}
+
+
+void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
+{
+ ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
+ if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
+ {
+ create_info->auto_increment_value= stats.auto_increment_value;
+ }
+ create_info->data_file_name= data_file_name;
+ create_info->index_file_name= index_file_name;
+ /* We need to restore the row type as Maria can change it */
+ if (create_info->row_type != ROW_TYPE_DEFAULT &&
+ !(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT))
+ create_info->row_type= get_row_type();
+}
+
+
+enum row_type ha_maria::get_row_type() const
+{
+ switch (file->s->data_file_type) {
+ case STATIC_RECORD: return ROW_TYPE_FIXED;
+ case DYNAMIC_RECORD: return ROW_TYPE_DYNAMIC;
+ case BLOCK_RECORD: return ROW_TYPE_PAGE;
+ case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED;
+ default: return ROW_TYPE_NOT_USED;
+ }
+}
+
+
+static enum data_file_type maria_row_type(HA_CREATE_INFO *info,
+ my_bool ignore_transactional)
+{
+ if (info->transactional == HA_CHOICE_YES && ! ignore_transactional)
+ return BLOCK_RECORD;
+ switch (info->row_type) {
+ case ROW_TYPE_FIXED: return STATIC_RECORD;
+ case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD;
+ default: return BLOCK_RECORD;
+ }
+}
+
+
+int ha_maria::create(const char *name, register TABLE *table_arg,
+ HA_CREATE_INFO *ha_create_info)
+{
+ int error;
+ uint create_flags= 0, record_count, i;
+ char buff[FN_REFLEN];
+ MARIA_KEYDEF *keydef;
+ MARIA_COLUMNDEF *recinfo;
+ MARIA_CREATE_INFO create_info;
+ TABLE_SHARE *share= table_arg->s;
+ uint options= share->db_options_in_use;
+ enum data_file_type row_type;
+ DBUG_ENTER("ha_maria::create");
+
+ for (i= 0; i < share->keys; i++)
+ {
+ if (table_arg->key_info[i].flags & HA_USES_PARSER)
+ {
+ create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
+ break;
+ }
+ }
+ /* Note: BLOCK_RECORD is used if table is transactional */
+ row_type= maria_row_type(ha_create_info, 0);
+ if (ha_create_info->transactional == HA_CHOICE_YES &&
+ ha_create_info->row_type != ROW_TYPE_PAGE &&
+ ha_create_info->row_type != ROW_TYPE_NOT_USED &&
+ ha_create_info->row_type != ROW_TYPE_DEFAULT)
+ push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "Row format set to PAGE because of TRANSACTIONAL=1 option");
+
+ bzero((char*) &create_info, sizeof(create_info));
+ if ((error= table2maria(table_arg, row_type, &keydef, &recinfo,
+ &record_count, &create_info)))
+ DBUG_RETURN(error); /* purecov: inspected */
+ create_info.max_rows= share->max_rows;
+ create_info.reloc_rows= share->min_rows;
+ create_info.with_auto_increment= share->next_number_key_offset == 0;
+ create_info.auto_increment= (ha_create_info->auto_increment_value ?
+ ha_create_info->auto_increment_value -1 :
+ (ulonglong) 0);
+ create_info.data_file_length= ((ulonglong) share->max_rows *
+ share->avg_row_length);
+ create_info.data_file_name= ha_create_info->data_file_name;
+ create_info.index_file_name= ha_create_info->index_file_name;
+
+ /*
+ Table is transactional:
+ - If the user specify that table is transactional (in this case
+ row type is forced to BLOCK_RECORD)
+ - If they specify BLOCK_RECORD without specifying transactional behaviour
+
+ Shouldn't this test be pushed down to maria_create()? Because currently,
+ ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has
+ born_transactional==1, which confuses some recovery-related code.
+ */
+ create_info.transactional= (row_type == BLOCK_RECORD &&
+ ha_create_info->transactional != HA_CHOICE_NO);
+
+ if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE)
+ create_flags|= HA_CREATE_TMP_TABLE;
+ if (ha_create_info->options & HA_CREATE_KEEP_FILES)
+ create_flags|= HA_CREATE_KEEP_FILES;
+ if (options & HA_OPTION_PACK_RECORD)
+ create_flags|= HA_PACK_RECORD;
+ if (options & HA_OPTION_CHECKSUM)
+ create_flags|= HA_CREATE_CHECKSUM;
+ if (options & HA_OPTION_DELAY_KEY_WRITE)
+ create_flags|= HA_CREATE_DELAY_KEY_WRITE;
+ if ((ha_create_info->page_checksum == HA_CHOICE_UNDEF && maria_page_checksums) ||
+ ha_create_info->page_checksum == HA_CHOICE_YES)
+ create_flags|= HA_CREATE_PAGE_CHECKSUM;
+
+ /* TODO: Check that the following fn_format is really needed */
+ error=
+ maria_create(fn_format(buff, name, "", "",
+ MY_UNPACK_FILENAME | MY_APPEND_EXT),
+ row_type, share->keys, keydef,
+ record_count, recinfo,
+ 0, (MARIA_UNIQUEDEF *) 0,
+ &create_info, create_flags);
+
+ my_free((uchar*) recinfo, MYF(0));
+ DBUG_RETURN(error);
+}
+
+
+int ha_maria::rename_table(const char *from, const char *to)
+{
+ return maria_rename(from, to);
+}
+
+
+void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *first_value,
+ ulonglong *nb_reserved_values)
+{
+ ulonglong nr;
+ int error;
+ uchar key[HA_MAX_KEY_LENGTH];
+
+ if (!table->s->next_number_key_offset)
+ { // Autoincrement at key-start
+ ha_maria::info(HA_STATUS_AUTO);
+ *first_value= stats.auto_increment_value;
+ /* Maria has only table-level lock for now, so reserves to +inf */
+ *nb_reserved_values= ULONGLONG_MAX;
+ return;
+ }
+
+ /* it's safe to call the following if bulk_insert isn't on */
+ maria_flush_bulk_insert(file, table->s->next_number_index);
+
+ (void) extra(HA_EXTRA_KEYREAD);
+ key_copy(key, table->record[0],
+ table->key_info + table->s->next_number_index,
+ table->s->next_number_key_offset);
+ error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
+ key, make_prev_keypart_map(table->s->next_number_keypart),
+ HA_READ_PREFIX_LAST);
+ if (error)
+ nr= 1;
+ else
+ {
+ /* Get data from record[1] */
+ nr= ((ulonglong) table->next_number_field->
+ val_int_offset(table->s->rec_buff_length) + 1);
+ }
+ extra(HA_EXTRA_NO_KEYREAD);
+ *first_value= nr;
+ /*
+ MySQL needs to call us for next row: assume we are inserting ("a",null)
+ here, we return 3, and next this statement will want to insert ("b",null):
+ there is no reason why ("b",3+1) would be the good row to insert: maybe it
+ already exists, maybe 3+1 is too large...
+ */
+ *nb_reserved_values= 1;
+}
+
+
+/*
+ Find out how many rows there is in the given range
+
+ SYNOPSIS
+ records_in_range()
+ inx Index to use
+ min_key Start of range. Null pointer if from first key
+ max_key End of range. Null pointer if to last key
+
+ NOTES
+ min_key.flag can have one of the following values:
+ HA_READ_KEY_EXACT Include the key in the range
+ HA_READ_AFTER_KEY Don't include key in range
+
+ max_key.flag can have one of the following values:
+ HA_READ_BEFORE_KEY Don't include key in range
+ HA_READ_AFTER_KEY Include all 'end_key' values in the range
+
+ RETURN
+ HA_POS_ERROR Something is wrong with the index tree.
+ 0 There is no matching keys in the given range
+ number > 0 There is approximately 'number' matching rows in
+ the range.
+*/
+
+ha_rows ha_maria::records_in_range(uint inx, key_range *min_key,
+ key_range *max_key)
+{
+ return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key);
+}
+
+
+int ha_maria::ft_read(uchar * buf)
+{
+ int error;
+
+ if (!ft_handler)
+ return -1;
+
+ thread_safe_increment(table->in_use->status_var.ha_read_next_count,
+ &LOCK_status); // why ?
+
+ error= ft_handler->please->read_next(ft_handler, (char*) buf);
+
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+uint ha_maria::checksum() const
+{
+ return (uint) file->state->checksum;
+}
+
+
+bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
+ uint table_changes)
+{
+ uint options= table->s->db_options_in_use;
+
+ if (create_info->auto_increment_value != stats.auto_increment_value ||
+ create_info->data_file_name != data_file_name ||
+ create_info->index_file_name != index_file_name ||
+ (maria_row_type(create_info, 1) != data_file_type &&
+ create_info->row_type != ROW_TYPE_DEFAULT) ||
+ table_changes == IS_EQUAL_NO ||
+ table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet
+ return COMPATIBLE_DATA_NO;
+
+ if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)) !=
+ (create_info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)))
+ return COMPATIBLE_DATA_NO;
+ return COMPATIBLE_DATA_YES;
+}
+
+
+static int maria_hton_panic(handlerton *hton, ha_panic_function flag)
+{
+ /* If no background checkpoints, we need to do one now */
+ return ((checkpoint_interval == 0) ?
+ ma_checkpoint_execute(CHECKPOINT_FULL, FALSE) : 0) | maria_panic(flag);
+}
+
+
+static int maria_commit(handlerton *hton __attribute__ ((unused)),
+ THD *thd, bool all)
+{
+ TRN *trn= THD_TRN;
+ DBUG_ENTER("maria_commit");
+ trnman_reset_locked_tables(trn);
+ /* statement or transaction ? */
+ if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
+ DBUG_RETURN(0); // end of statement
+ DBUG_PRINT("info", ("THD_TRN set to 0x0"));
+ THD_TRN= 0;
+ DBUG_RETURN(ma_commit(trn)); // end of transaction
+}
+
+
+static int maria_rollback(handlerton *hton __attribute__ ((unused)),
+ THD *thd, bool all)
+{
+ TRN *trn= THD_TRN;
+ DBUG_ENTER("maria_rollback");
+ trnman_reset_locked_tables(trn);
+ /* statement or transaction ? */
+ if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
+ {
+ trnman_rollback_statement(trn);
+ DBUG_RETURN(0); // end of statement
+ }
+ DBUG_PRINT("info", ("THD_TRN set to 0x0"));
+ THD_TRN= 0;
+ DBUG_RETURN(trnman_rollback_trn(trn) ?
+ HA_ERR_OUT_OF_MEM : 0); // end of transaction
+}
+
+
+
+/**
+ @brief flush log handler
+
+ @param hton maria handlerton (unused)
+
+ @retval FALSE OK
+ @retval TRUE Error
+*/
+
+bool maria_flush_logs(handlerton *hton)
+{
+ return test(translog_purge_at_flush());
+}
+
+
+#define SHOW_MSG_LEN (FN_REFLEN + 20)
+/**
+ @brief show status handler
+
+ @param hton maria handlerton
+ @param thd thread handler
+ @param print print function
+ @param stat type of status
+*/
+
+bool maria_show_status(handlerton *hton,
+ THD *thd,
+ stat_print_fn *print,
+ enum ha_stat_type stat)
+{
+ char engine_name[]= "maria";
+ switch (stat)
+ {
+ case HA_ENGINE_LOGS:
+ {
+ TRANSLOG_ADDRESS horizon= translog_get_horizon();
+ uint32 last_file= LSN_FILE_NO(horizon);
+ uint32 first_needed= translog_get_first_needed_file();
+ uint32 first_file= translog_get_first_file(horizon);
+ uint32 i;
+ const char unknown[]= "unknown";
+ const char needed[]= "in use";
+ const char unneeded[]= "free";
+ char path[FN_REFLEN];
+
+ if (first_file == 0)
+ {
+ const char error[]= "error";
+ print(thd, engine_name, sizeof(engine_name),
+ STRING_WITH_LEN(""), error, sizeof(error));
+ break;
+ }
+
+ for (i= first_file; i <= last_file; i++)
+ {
+ char *file;
+ const char *status;
+ uint length, status_len;
+ MY_STAT stat_buff, *stat;
+ const char error[]= "can't stat";
+ char object[SHOW_MSG_LEN];
+ file= translog_filename_by_fileno(i, path);
+ if (!(stat= my_stat(file, &stat_buff, MYF(MY_WME))))
+ {
+ status= error;
+ status_len= sizeof(error);
+ length= snprintf(object, SHOW_MSG_LEN, "Size unknown ; %s", file);
+ }
+ else
+ {
+ if (first_needed == 0)
+ {
+ status= unknown;
+ status_len= sizeof(unknown);
+ }
+ else if (i < first_needed)
+ {
+ status= unneeded;
+ status_len= sizeof(unneeded);
+ }
+ else
+ {
+ status= needed;
+ status_len= sizeof(needed);
+ }
+ length= snprintf(object, SHOW_MSG_LEN, "Size %12lu ; %s",
+ (ulong) stat->st_size, file);
+ }
+
+ print(thd, engine_name, sizeof(engine_name),
+ object, length, status, status_len);
+ }
+ break;
+ }
+ case HA_ENGINE_STATUS:
+ case HA_ENGINE_MUTEX:
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int ha_maria_init(void *p)
+{
+ int res;
+ maria_hton= (handlerton *)p;
+ maria_hton->state= SHOW_OPTION_YES;
+ maria_hton->db_type= DB_TYPE_UNKNOWN;
+ maria_hton->create= maria_create_handler;
+ maria_hton->panic= maria_hton_panic;
+ maria_hton->commit= maria_commit;
+ maria_hton->rollback= maria_rollback;
+ maria_hton->flush_logs= maria_flush_logs;
+ maria_hton->show_status= maria_show_status;
+ /* TODO: decide if we support Maria being used for log tables */
+ maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
+ bzero(maria_log_pagecache, sizeof(*maria_log_pagecache));
+ maria_data_root= mysql_real_data_home;
+ maria_tmpdir= &mysql_tmpdir_list; /* For REDO */
+ res= maria_init() || ma_control_file_create_or_open() ||
+ !init_pagecache(maria_pagecache,
+ pagecache_buffer_size, pagecache_division_limit,
+ pagecache_age_threshold, MARIA_KEY_BLOCK_LENGTH, 0) ||
+ !init_pagecache(maria_log_pagecache,
+ TRANSLOG_PAGECACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, 0) ||
+ translog_init(maria_data_root, log_file_size,
+ MYSQL_VERSION_ID, server_id, maria_log_pagecache,
+ TRANSLOG_DEFAULT_FLAGS, 0) ||
+ maria_recover() ||
+ ma_checkpoint_init(checkpoint_interval);
+ maria_multi_threaded= TRUE;
+ return res ? HA_ERR_INITIALIZATION : 0;
+}
+
+
+#ifdef HAVE_QUERY_CACHE
+/**
+ @brief Register a named table with a call back function to the query cache.
+
+ @param thd The thread handle
+ @param table_key A pointer to the table name in the table cache
+ @param key_length The length of the table name
+ @param[out] engine_callback The pointer to the storage engine call back
+ function, currently 0
+ @param[out] engine_data Engine data will be set to 0.
+
+ @note Despite the name of this function, it is used to check each statement
+ before it is cached and not to register a table or callback function.
+
+ @see handler::register_query_cache_table
+
+ @return The error code. The engine_data and engine_callback will be set to 0.
+ @retval TRUE Success
+ @retval FALSE An error occurred
+*/
+
+my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name,
+ uint table_name_len,
+ qc_engine_callback
+ *engine_callback,
+ ulonglong *engine_data)
+{
+ /*
+ No call back function is needed to determine if a cached statement
+ is valid or not.
+ */
+ *engine_callback= 0;
+
+ /*
+ No engine data is needed.
+ */
+ *engine_data= 0;
+
+ /*
+ If a concurrent INSERT has happened just before the currently processed
+ SELECT statement, the total size of the table is unknown.
+
+ To determine if the table size is known, the current thread's snap shot of
+ the table size with the actual table size are compared.
+
+ If the table size is unknown the SELECT statement can't be cached.
+ */
+ ulonglong actual_data_file_length;
+ ulonglong current_data_file_length;
+
+ /*
+ POSIX visibility rules specify that "2. Whatever memory values a
+ thread can see when it unlocks a mutex <...> can also be seen by any
+ thread that later locks the same mutex". In this particular case,
+ concurrent insert thread had modified the data_file_length in
+ MYISAM_SHARE before it has unlocked (or even locked)
+ structure_guard_mutex. So, here we're guaranteed to see at least that
+ value after we've locked the same mutex. We can see a later value
+ (modified by some other thread) though, but it's ok, as we only want
+ to know if the variable was changed, the actual new value doesn't matter
+ */
+ actual_data_file_length= file->s->state.state.data_file_length;
+ current_data_file_length= file->save_state.data_file_length;
+
+ if (current_data_file_length != actual_data_file_length)
+ {
+ /* Don't cache current statement. */
+ return FALSE;
+ }
+
+ /* It is ok to try to cache current statement. */
+ return TRUE;
+}
+#endif
+
+static struct st_mysql_sys_var* system_variables[]= {
+ MYSQL_SYSVAR(block_size),
+ MYSQL_SYSVAR(checkpoint_interval),
+ MYSQL_SYSVAR(page_checksum),
+ MYSQL_SYSVAR(log_file_size),
+ MYSQL_SYSVAR(log_purge_type),
+ MYSQL_SYSVAR(max_sort_file_size),
+ MYSQL_SYSVAR(pagecache_age_threshold),
+ MYSQL_SYSVAR(pagecache_buffer_size),
+ MYSQL_SYSVAR(pagecache_division_limit),
+ MYSQL_SYSVAR(repair_threads),
+ MYSQL_SYSVAR(sort_buffer_size),
+ MYSQL_SYSVAR(stats_method),
+ MYSQL_SYSVAR(sync_log_dir),
+ NULL
+};
+
+
+/**
+ @brief Updates the checkpoint interval and restarts the background thread.
+*/
+
+static void update_checkpoint_interval(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, void *save)
+{
+ ma_checkpoint_end();
+ ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save));
+}
+
+/**
+ @brief Updates the transaction log file limit.
+*/
+
+static void update_log_file_size(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, void *save)
+{
+ uint32 size= (uint32)((ulong)(*(long *)save));
+ translog_set_file_size(size);
+ *(ulong *)var_ptr= size;
+}
+
+static SHOW_VAR status_variables[]= {
+ {"Maria_pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG_NOFLUSH},
+ {"Maria_pagecache_blocks_unused", (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG_NOFLUSH},
+ {"Maria_pagecache_blocks_used", (char*) &maria_pagecache_var.blocks_used, SHOW_LONG_NOFLUSH},
+ {"Maria_pagecache_read_requests", (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
+ {"Maria_pagecache_reads", (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
+ {"Maria_pagecache_write_requests", (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
+ {"Maria_pagecache_writes", (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG},
+ {NullS, NullS, SHOW_LONG}
+};
+
+struct st_mysql_storage_engine maria_storage_engine=
+{ MYSQL_HANDLERTON_INTERFACE_VERSION };
+
+mysql_declare_plugin(maria)
+{
+ MYSQL_STORAGE_ENGINE_PLUGIN,
+ &maria_storage_engine,
+ "MARIA",
+ "MySQL AB",
+ "Traditional transactional MySQL tables",
+ PLUGIN_LICENSE_GPL,
+ ha_maria_init, /* Plugin Init */
+ NULL, /* Plugin Deinit */
+ 0x0100, /* 1.0 */
+ status_variables, /* status variables */
+ system_variables, /* system variables */
+ NULL
+}
+mysql_declare_plugin_end;
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
new file mode 100644
index 00000000000..cb4efffff5f
--- /dev/null
+++ b/storage/maria/ha_maria.h
@@ -0,0 +1,155 @@
+/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface /* gcc class implementation */
+#endif
+
+/* class for the maria handler */
+
+#include <maria.h>
+
+#define HA_RECOVER_NONE 0 /* No automatic recover */
+#define HA_RECOVER_DEFAULT 1 /* Automatic recover active */
+#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */
+#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */
+#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */
+
+extern ulong maria_sort_buffer_size;
+extern TYPELIB maria_recover_typelib;
+extern ulong maria_recover_options;
+
+class ha_maria :public handler
+{
+ MARIA_HA *file;
+ ulonglong int_table_flags;
+ MARIA_RECORD_POS remember_pos;
+ char *data_file_name, *index_file_name;
+ enum data_file_type data_file_type;
+ bool can_enable_indexes;
+ int repair(THD * thd, HA_CHECK &param, bool optimize);
+
+public:
+ ha_maria(handlerton *hton, TABLE_SHARE * table_arg);
+ ~ha_maria() {}
+ handler *clone(MEM_ROOT *mem_root);
+ const char *table_type() const
+ { return "MARIA"; }
+ const char *index_type(uint key_number);
+ const char **bas_ext() const;
+ ulonglong table_flags() const
+ { return int_table_flags; }
+ ulong index_flags(uint inx, uint part, bool all_parts) const
+ {
+ return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ?
+ 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
+ HA_READ_ORDER | HA_KEYREAD_ONLY);
+ }
+ uint max_supported_keys() const
+ { return MARIA_MAX_KEY; }
+ uint max_supported_key_length() const;
+ uint max_supported_key_part_length() const
+ { return max_supported_key_length(); }
+ enum row_type get_row_type() const;
+ uint checksum() const;
+ virtual double scan_time();
+
+ int open(const char *name, int mode, uint test_if_locked);
+ int close(void);
+ int write_row(uchar * buf);
+ int update_row(const uchar * old_data, uchar * new_data);
+ int delete_row(const uchar * buf);
+ int index_read_map(uchar * buf, const uchar * key, key_part_map keypart_map,
+ enum ha_rkey_function find_flag);
+ int index_read_idx_map(uchar * buf, uint idx, const uchar * key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag);
+ int index_read_last_map(uchar * buf, const uchar * key,
+ key_part_map keypart_map);
+ int index_next(uchar * buf);
+ int index_prev(uchar * buf);
+ int index_first(uchar * buf);
+ int index_last(uchar * buf);
+ int index_next_same(uchar * buf, const uchar * key, uint keylen);
+ int ft_init()
+ {
+ if (!ft_handler)
+ return 1;
+ ft_handler->please->reinit_search(ft_handler);
+ return 0;
+ }
+ FT_INFO *ft_init_ext(uint flags, uint inx, String * key)
+ {
+ return maria_ft_init_search(flags, file, inx,
+ (uchar *) key->ptr(), key->length(),
+ key->charset(), table->record[0]);
+ }
+ int ft_read(uchar * buf);
+ int rnd_init(bool scan);
+ int rnd_end(void);
+ int rnd_next(uchar * buf);
+ int rnd_pos(uchar * buf, uchar * pos);
+ int remember_rnd_pos();
+ int restart_rnd_next(uchar * buf);
+ void position(const uchar * record);
+ int info(uint);
+ int extra(enum ha_extra_function operation);
+ int extra_opt(enum ha_extra_function operation, ulong cache_size);
+ int reset(void);
+ int external_lock(THD * thd, int lock_type);
+ int start_stmt(THD *thd, thr_lock_type lock_type);
+ int delete_all_rows(void);
+ int disable_indexes(uint mode);
+ int enable_indexes(uint mode);
+ int indexes_are_disabled(void);
+ void start_bulk_insert(ha_rows rows);
+ int end_bulk_insert();
+ ha_rows records_in_range(uint inx, key_range * min_key, key_range * max_key);
+ void update_create_info(HA_CREATE_INFO * create_info);
+ int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info);
+ THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to,
+ enum thr_lock_type lock_type);
+ virtual void get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *first_value,
+ ulonglong *nb_reserved_values);
+ int rename_table(const char *from, const char *to);
+ int delete_table(const char *name);
+ int check(THD * thd, HA_CHECK_OPT * check_opt);
+ int analyze(THD * thd, HA_CHECK_OPT * check_opt);
+ int repair(THD * thd, HA_CHECK_OPT * check_opt);
+ bool check_and_repair(THD * thd);
+ bool is_crashed() const;
+ bool auto_repair() const
+ { return maria_recover_options != 0; }
+ int optimize(THD * thd, HA_CHECK_OPT * check_opt);
+ int restore(THD * thd, HA_CHECK_OPT * check_opt);
+ int backup(THD * thd, HA_CHECK_OPT * check_opt);
+ int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt);
+ int preload_keys(THD * thd, HA_CHECK_OPT * check_opt);
+ bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes);
+#ifdef HAVE_REPLICATION
+ int dump(THD * thd, int fd);
+ int net_read_dump(NET * net);
+#endif
+#ifdef HAVE_QUERY_CACHE
+ my_bool register_query_cache_table(THD *thd, char *table_key,
+ uint key_length,
+ qc_engine_callback
+ *engine_callback,
+ ulonglong *engine_data);
+#endif
+};
diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c
new file mode 100644
index 00000000000..88642461216
--- /dev/null
+++ b/storage/maria/lockman.c
@@ -0,0 +1,786 @@
+/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */
+/* QQ: TODO instant duration locks */
+/* QQ: #warning automatically place S instead of LS if possible */
+
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Generic Lock Manager
+
+ Lock manager handles locks on "resources", a resource must be uniquely
+ identified by a 64-bit number. Lock manager itself does not imply
+ anything about the nature of a resource - it can be a row, a table, a
+ database, or just anything.
+
+ Locks belong to "lock owners". A Lock owner is uniquely identified by a
+ 16-bit number. A function loid2lo must be provided by the application
+ that takes such a number as an argument and returns a LOCK_OWNER
+ structure.
+
+ Lock levels are completely defined by three tables. Lock compatibility
+ matrix specifies which locks can be held at the same time on a resource.
+ Lock combining matrix specifies what lock level has the same behaviour as
+ a pair of two locks of given levels. getlock_result matrix simplifies
+ intention locking and lock escalation for an application, basically it
+ defines which locks are intention locks and which locks are "loose"
+ locks. It is only used to provide better diagnostics for the
+ application, lock manager itself does not differentiate between normal,
+ intention, and loose locks.
+
+ Internally lock manager is based on a lock-free hash, see lf_hash.c for
+ details. All locks are stored in a hash, with a resource id as a search
+ key, so all locks for the same resource will be considered collisions and
+ will be put in a one (lock-free) linked list. The main lock-handling
+ logic is in the inner loop that searches for a lock in such a linked
+ list - lockfind().
+
+ This works as follows. Locks generally are added to the end of the list
+ (with one exception, see below). When scanning the list it is always
+ possible to determine what locks are granted (active) and what locks are
+ waiting - first lock is obviously active, the second is active if it's
+ compatible with the first, and so on, a lock is active if it's compatible
+ with all previous locks and all locks before it are also active.
+ To calculate the "compatible with all previous locks" all locks are
+ accumulated in prev_lock variable using lock_combining_matrix.
+
+ Lock upgrades: when a thread that has a lock on a given resource,
+ requests a new lock on the same resource and the old lock is not enough
+ to satisfy new lock requirements (which is defined by
+ lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock is
+ placed in the list. Depending on other locks it is immediately active or
+ it will wait for other locks. Here's an exception to "locks are added
+ to the end" rule - upgraded locks are added after the last active lock
+ but before all waiting locks. Old lock (the one we upgraded from) is
+ not removed from the list, indeed it may be needed if the new lock was
+ in a savepoint that gets rolled back. So old lock is marked as "ignored"
+ (IGNORE_ME flag). New lock gets an UPGRADED flag.
+
+ Loose locks add an important exception to the above. Loose locks do not
+ always commute with other locks. In the list IX-LS both locks are active,
+ while in the LS-IX list only the first lock is active. This creates a
+ problem in lock upgrades. If the list was IX-LS and the owner of the
+ first lock wants to place LS lock (which can be immediately granted), the
+ IX lock is upgraded to LSIX and the list becomes IX-LS-LSIX, which,
+ according to the lock compatibility matrix means that the last lock is
+ waiting - of course it all happened because IX and LS were swapped and
+ they don't commute. To work around this there's ACTIVE flag which is set
+ in every lock that never waited (was placed active), and this flag
+ overrides "compatible with all previous locks" rule.
+
+ When a lock is placed to the end of the list it's either compatible with
+ all locks and all locks are active - new lock becomes active at once, or
+ it conflicts with some of the locks, in this case in the 'blocker'
+ variable a conflicting lock is returned and the calling thread waits on a
+ pthread condition in the LOCK_OWNER structure of the owner of the
+ conflicting lock. Or a new lock is compatible with all locks, but some
+ existing locks are not compatible with each other (example: request IS,
+ when the list is S-IX) - that is not all locks are active. In this case a
+ first waiting lock is returned in the 'blocker' variable, lockman_getlock()
+ notices that a "blocker" does not conflict with the requested lock, and
+ "dereferences" it, to find the lock that it's waiting on. The calling
+ thread than begins to wait on the same lock.
+
+ To better support table-row relations where one needs to lock the table
+ with an intention lock before locking the row, extended diagnostics is
+ provided. When an intention lock (presumably on a table) is granted,
+ lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row,
+ perhaps the thread already has a normal lock on this table),
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual),
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check
+ whether it's possible to lock the row, but no need to lock it - perhaps
+ the thread has a loose lock on this table). This is defined by
+ getlock_result[] table.
+*/
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_bit.h>
+#include <lf.h>
+#include "lockman.h"
+
+/*
+ Lock compatibility matrix.
+
+ It's asymmetric. Read it as "Somebody has the lock <value in the row
+ label>, can I set the lock <value in the column label> ?"
+
+ ') Though you can take LS lock while somebody has S lock, it makes no
+ sense - it's simpler to take S lock too.
+
+ 1 - compatible
+ 0 - incompatible
+ -1 - "impossible", so that we can assert the impossibility.
+*/
+static int lock_compatibility_matrix[10][10]=
+{ /* N S X IS IX SIX LS LX SLX LSIX */
+ { -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */
+ { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */
+ { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */
+};
+
+/*
+ Lock combining matrix.
+
+ It's symmetric. Read it as "what lock level L is identical to the
+ set of two locks A and B"
+
+ One should never get N from it, we assert the impossibility
+*/
+static enum lockman_lock_type lock_combining_matrix[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */
+ { S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */
+ { X, X, X, X, X, X, X, X, X, X}, /* X */
+ { IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */
+ { IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */
+ { SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */
+ { LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */
+ { LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */
+ { SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */
+ { LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */
+};
+
+#define REPEAT_ONCE_MORE 0
+#define OK_TO_PLACE_THE_LOCK 1
+#define OK_TO_PLACE_THE_REQUEST 2
+#define ALREADY_HAVE_THE_LOCK 4
+#define ALREADY_HAVE_THE_REQUEST 8
+#define PLACE_NEW_DISABLE_OLD 16
+#define REQUEST_NEW_DISABLE_OLD 32
+#define RESOURCE_WAS_UNLOCKED 64
+
+#define NEED_TO_WAIT (OK_TO_PLACE_THE_REQUEST | ALREADY_HAVE_THE_REQUEST |\
+ REQUEST_NEW_DISABLE_OLD)
+#define ALREADY_HAVE (ALREADY_HAVE_THE_LOCK | ALREADY_HAVE_THE_REQUEST)
+#define LOCK_UPGRADE (PLACE_NEW_DISABLE_OLD | REQUEST_NEW_DISABLE_OLD)
+
+
+/*
+ the return codes for lockman_getlock
+
+ It's asymmetric. Read it as "I have the lock <value in the row label>,
+ what value should be returned for <value in the column label> ?"
+
+ 0 means impossible combination (assert!)
+
+ Defines below help to preserve the table structure.
+ I/L/A values are self explanatory
+ x means the combination is possible (assert should not crash)
+ but it cannot happen in row locks, only in table locks (S,X),
+ or lock escalations (LS,LX)
+*/
+#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE
+#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+#define A GOT_THE_LOCK
+#define x GOT_THE_LOCK
+static enum lockman_getlock_result getlock_result[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */
+ { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */
+ { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */
+ { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */
+ { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */
+ { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */
+ { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */
+ { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */
+ { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */
+ { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */
+};
+#undef I
+#undef L
+#undef A
+#undef x
+
+LF_REQUIRE_PINS(4);
+
+typedef struct lockman_lock {
+ uint64 resource;
+ struct lockman_lock *lonext;
+ intptr volatile link;
+ uint32 hashnr;
+ /* QQ: TODO - remove hashnr from LOCK */
+ uint16 loid;
+ uchar lock; /* sizeof(uchar) <= sizeof(enum) */
+ uchar flags;
+} LOCK;
+
+#define IGNORE_ME 1
+#define UPGRADED 2
+#define ACTIVE 4
+
+typedef struct {
+ intptr volatile *prev;
+ LOCK *curr, *next;
+ LOCK *blocker, *upgrade_from;
+} CURSOR;
+
+#define PTR(V) (LOCK *)((V) & (~(intptr)1))
+#define DELETED(V) ((V) & 1)
+
+/*
+ NOTE
+ cursor is positioned in either case
+ pins[0..3] are used, they are NOT removed on return
+*/
+static int lockfind(LOCK * volatile *head, LOCK *node,
+ CURSOR *cursor, LF_PINS *pins)
+{
+ uint32 hashnr, cur_hashnr;
+ uint64 resource, cur_resource;
+ intptr cur_link;
+ my_bool cur_active, compatible, upgrading, prev_active;
+ enum lockman_lock_type lock, prev_lock, cur_lock;
+ uint16 loid, cur_loid;
+ int cur_flags, flags;
+
+ hashnr= node->hashnr;
+ resource= node->resource;
+ lock= node->lock;
+ loid= node->loid;
+ flags= node->flags;
+
+retry:
+ cursor->prev= (intptr *)head;
+ prev_lock= N;
+ cur_active= TRUE;
+ compatible= TRUE;
+ upgrading= FALSE;
+ cursor->blocker= cursor->upgrade_from= 0;
+ _lf_unpin(pins, 3);
+ do {
+ cursor->curr= PTR(*cursor->prev);
+ _lf_pin(pins, 1, cursor->curr);
+ } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+ for (;;)
+ {
+ if (!cursor->curr)
+ break;
+ do {
+ cur_link= cursor->curr->link;
+ cursor->next= PTR(cur_link);
+ _lf_pin(pins, 0, cursor->next);
+ } while (cur_link != cursor->curr->link && LF_BACKOFF);
+ cur_hashnr= cursor->curr->hashnr;
+ cur_resource= cursor->curr->resource;
+ cur_lock= cursor->curr->lock;
+ cur_loid= cursor->curr->loid;
+ cur_flags= cursor->curr->flags;
+ if (*cursor->prev != (intptr)cursor->curr)
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ if (!DELETED(cur_link))
+ {
+ if (cur_hashnr > hashnr ||
+ (cur_hashnr == hashnr && cur_resource >= resource))
+ {
+ if (cur_hashnr > hashnr || cur_resource > resource)
+ break;
+ /* ok, we have a lock for this resource */
+ DBUG_ASSERT(lock_compatibility_matrix[prev_lock][cur_lock] >= 0);
+ DBUG_ASSERT(lock_compatibility_matrix[cur_lock][lock] >= 0);
+ if ((cur_flags & IGNORE_ME) && ! (flags & IGNORE_ME))
+ {
+ DBUG_ASSERT(cur_active);
+ if (cur_loid == loid)
+ cursor->upgrade_from= cursor->curr;
+ }
+ else
+ {
+ prev_active= cur_active;
+ if (cur_flags & ACTIVE)
+ DBUG_ASSERT(prev_active == TRUE);
+ else
+ cur_active&= lock_compatibility_matrix[prev_lock][cur_lock];
+ if (upgrading && !cur_active /*&& !(cur_flags & UPGRADED)*/)
+ break;
+ if (prev_active && !cur_active)
+ {
+ cursor->blocker= cursor->curr;
+ _lf_pin(pins, 3, cursor->curr);
+ }
+ if (cur_loid == loid)
+ {
+ /* we already have a lock on this resource */
+ DBUG_ASSERT(lock_combining_matrix[cur_lock][lock] != N);
+ DBUG_ASSERT(!upgrading || (flags & IGNORE_ME));
+ if (lock_combining_matrix[cur_lock][lock] == cur_lock)
+ {
+ /* new lock is compatible */
+ if (cur_active)
+ {
+ cursor->blocker= cursor->curr; /* loose-locks! */
+ _lf_unpin(pins, 3); /* loose-locks! */
+ return ALREADY_HAVE_THE_LOCK;
+ }
+ else
+ return ALREADY_HAVE_THE_REQUEST;
+ }
+ /* not compatible, upgrading */
+ upgrading= TRUE;
+ cursor->upgrade_from= cursor->curr;
+ }
+ else
+ {
+ if (!lock_compatibility_matrix[cur_lock][lock])
+ {
+ compatible= FALSE;
+ cursor->blocker= cursor->curr;
+ _lf_pin(pins, 3, cursor->curr);
+ }
+ }
+ prev_lock= lock_combining_matrix[prev_lock][cur_lock];
+ DBUG_ASSERT(prev_lock != N);
+ }
+ }
+ cursor->prev= &(cursor->curr->link);
+ _lf_pin(pins, 2, cursor->curr);
+ }
+ else
+ {
+ if (my_atomic_casptr((void **)cursor->prev,
+ (void **)&cursor->curr, cursor->next))
+ _lf_alloc_free(pins, cursor->curr);
+ else
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ }
+ cursor->curr= cursor->next;
+ _lf_pin(pins, 1, cursor->curr);
+ }
+ /*
+ either the end of lock list - no more locks for this resource,
+ or upgrading and the end of active lock list
+ */
+ if (upgrading)
+ {
+ if (compatible /*&& prev_active*/)
+ return PLACE_NEW_DISABLE_OLD;
+ else
+ return REQUEST_NEW_DISABLE_OLD;
+ }
+ if (cur_active && compatible)
+ {
+ /*
+ either no locks for this resource or all are compatible.
+ ok to place the lock in any case.
+ */
+ return prev_lock == N ? RESOURCE_WAS_UNLOCKED
+ : OK_TO_PLACE_THE_LOCK;
+ }
+ /* we have a lock conflict. ok to place a lock request. And wait */
+ return OK_TO_PLACE_THE_REQUEST;
+}
+
+/*
+ NOTE
+ it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays
+*/
+static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins,
+ LOCK **blocker)
+{
+ CURSOR cursor;
+ int res;
+
+ do
+ {
+ res= lockfind(head, node, &cursor, pins);
+ DBUG_ASSERT(res != ALREADY_HAVE_THE_REQUEST);
+ if (!(res & ALREADY_HAVE))
+ {
+ if (res & LOCK_UPGRADE)
+ {
+ node->flags|= UPGRADED;
+ node->lock= lock_combining_matrix[cursor.upgrade_from->lock][node->lock];
+ }
+ if (!(res & NEED_TO_WAIT))
+ node->flags|= ACTIVE;
+ node->link= (intptr)cursor.curr;
+ DBUG_ASSERT(node->link != (intptr)node);
+ DBUG_ASSERT(cursor.prev != &node->link);
+ if (!my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
+ {
+ res= REPEAT_ONCE_MORE;
+ node->flags&= ~ACTIVE;
+ }
+ if (res & LOCK_UPGRADE)
+ cursor.upgrade_from->flags|= IGNORE_ME;
+ /*
+ QQ: is this OK ? if a reader has already read upgrade_from,
+ it may find it conflicting with node :(
+ - see the last test from test_lockman_simple()
+ */
+ }
+
+ } while (res == REPEAT_ONCE_MORE);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ /*
+ note that blocker is not necessarily pinned here (when it's == curr).
+ this is ok as in such a case it's either a dummy node for
+ initialize_bucket() and dummy nodes don't need pinning,
+ or it's a lock of the same transaction for lockman_getlock,
+ and it cannot be removed by another thread
+ */
+ *blocker= cursor.blocker;
+ return res;
+}
+
+/*
+ NOTE
+ it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays
+*/
+static int lockpeek(LOCK * volatile *head, LOCK *node, LF_PINS *pins,
+ LOCK **blocker)
+{
+ CURSOR cursor;
+ int res;
+
+ res= lockfind(head, node, &cursor, pins);
+
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ if (blocker)
+ *blocker= cursor.blocker;
+ return res;
+}
+
+/*
+ NOTE
+ it uses pins[0..3], on return all pins are removed.
+
+ One _must_ have the lock (or request) to call this
+*/
+static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res;
+
+ do
+ {
+ res= lockfind(head, node, &cursor, pins);
+ DBUG_ASSERT(res & ALREADY_HAVE);
+
+ if (cursor.upgrade_from)
+ cursor.upgrade_from->flags&= ~IGNORE_ME;
+
+ /*
+ XXX this does not work with savepoints, as old lock is left ignored.
+ It cannot be unignored, as would basically mean moving the lock back
+ in the lock chain (from upgraded). And the latter is not allowed -
+ because it breaks list scanning. So old ignored lock must be deleted,
+ new - same - lock must be installed right after the lock we're deleting,
+ then we can delete. Good news is - this is only required when rolling
+ back a savepoint.
+ */
+ if (my_atomic_casptr((void **)&(cursor.curr->link),
+ (void **)&cursor.next, 1+(char *)cursor.next))
+ {
+ if (my_atomic_casptr((void **)cursor.prev,
+ (void **)&cursor.curr, cursor.next))
+ _lf_alloc_free(pins, cursor.curr);
+ else
+ lockfind(head, node, &cursor, pins);
+ }
+ else
+ {
+ res= REPEAT_ONCE_MORE;
+ if (cursor.upgrade_from)
+ cursor.upgrade_from->flags|= IGNORE_ME;
+ }
+ } while (res == REPEAT_ONCE_MORE);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ _lf_unpin(pins, 3);
+ return res;
+}
+
+void lockman_init(LOCKMAN *lm, loid_to_lo_func *func, uint timeout)
+{
+ lf_alloc_init(&lm->alloc, sizeof(LOCK), offsetof(LOCK, lonext));
+ lf_dynarray_init(&lm->array, sizeof(LOCK **));
+ lm->size= 1;
+ lm->count= 0;
+ lm->loid_to_lo= func;
+ lm->lock_timeout= timeout;
+}
+
+void lockman_destroy(LOCKMAN *lm)
+{
+ LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0);
+ while (el)
+ {
+ intptr next= el->link;
+ if (el->hashnr & 1)
+ lf_alloc_direct_free(&lm->alloc, el);
+ else
+ my_free((void *)el, MYF(0));
+ el= (LOCK *)next;
+ }
+ lf_alloc_destroy(&lm->alloc);
+ lf_dynarray_destroy(&lm->array);
+}
+
+/* TODO: optimize it */
+#define MAX_LOAD 1
+
+static void initialize_bucket(LOCKMAN *lm, LOCK * volatile *node,
+ uint bucket, LF_PINS *pins)
+{
+ int res;
+ uint parent= my_clear_highest_bit(bucket);
+ LOCK *dummy= (LOCK *)my_malloc(sizeof(LOCK), MYF(MY_WME));
+ LOCK **tmp= 0, *cur;
+ LOCK * volatile *el= _lf_dynarray_lvalue(&lm->array, parent);
+
+ if (*el == NULL && bucket)
+ initialize_bucket(lm, el, parent, pins);
+ dummy->hashnr= my_reverse_bits(bucket);
+ dummy->loid= 0;
+ dummy->lock= X; /* doesn't matter, in fact */
+ dummy->resource= 0;
+ dummy->flags= 0;
+ res= lockinsert(el, dummy, pins, &cur);
+ DBUG_ASSERT(res & (ALREADY_HAVE_THE_LOCK | RESOURCE_WAS_UNLOCKED));
+ if (res & ALREADY_HAVE_THE_LOCK)
+ {
+ my_free((void *)dummy, MYF(0));
+ dummy= cur;
+ }
+ my_atomic_casptr((void **)node, (void **)&tmp, dummy);
+}
+
+static inline uint calc_hash(uint64 resource)
+{
+ const uchar *pos= (uchar *)&resource;
+ ulong nr1= 1, nr2= 4, i;
+ for (i= 0; i < sizeof(resource) ; i++, pos++)
+ {
+ nr1^= (ulong) ((((uint) nr1 & 63)+nr2) * ((uint)*pos)) + (nr1 << 8);
+ nr2+= 3;
+ }
+ return nr1 & INT_MAX32;
+}
+
+/*
+ RETURN
+ see enum lockman_getlock_result
+ NOTE
+ uses pins[0..3], they're removed on return
+*/
+enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo,
+ uint64 resource,
+ enum lockman_lock_type lock)
+{
+ int res;
+ uint csize, bucket, hashnr;
+ LOCK *node, * volatile *el, *blocker;
+ LF_PINS *pins= lo->pins;
+ enum lockman_lock_type old_lock;
+
+ DBUG_ASSERT(lo->loid);
+ lf_rwlock_by_pins(pins);
+ node= (LOCK *)_lf_alloc_new(pins);
+ node->flags= 0;
+ node->lock= lock;
+ node->loid= lo->loid;
+ node->resource= resource;
+ hashnr= calc_hash(resource);
+ bucket= hashnr % lm->size;
+ el= _lf_dynarray_lvalue(&lm->array, bucket);
+ if (*el == NULL)
+ initialize_bucket(lm, el, bucket, pins);
+ node->hashnr= my_reverse_bits(hashnr) | 1;
+ res= lockinsert(el, node, pins, &blocker);
+ if (res & ALREADY_HAVE)
+ {
+ int r;
+ old_lock= blocker->lock;
+ _lf_alloc_free(pins, node);
+ lf_rwunlock_by_pins(pins);
+ r= getlock_result[old_lock][lock];
+ DBUG_ASSERT(r);
+ return r;
+ }
+ /* a new value was added to the hash */
+ csize= lm->size;
+ if ((my_atomic_add32(&lm->count, 1)+1.0) / csize > MAX_LOAD)
+ my_atomic_cas32(&lm->size, &csize, csize*2);
+ node->lonext= lo->all_locks;
+ lo->all_locks= node;
+ for ( ; res & NEED_TO_WAIT; res= lockpeek(el, node, pins, &blocker))
+ {
+ LOCK_OWNER *wait_for_lo;
+ ulonglong deadline;
+ struct timespec timeout;
+
+ _lf_assert_pin(pins, 3); /* blocker must be pinned here */
+ wait_for_lo= lm->loid_to_lo(blocker->loid);
+
+ /*
+ now, this is tricky. blocker is not necessarily a LOCK
+ we're waiting for. If it's compatible with what we want,
+ then we're waiting for a lock that blocker is waiting for
+ (see two places where blocker is set in lockfind)
+ In the latter case, let's "dereference" it
+ */
+ if (lock_compatibility_matrix[blocker->lock][lock])
+ {
+ blocker= wait_for_lo->all_locks;
+ _lf_pin(pins, 3, blocker);
+ if (blocker != wait_for_lo->all_locks)
+ continue;
+ wait_for_lo= wait_for_lo->waiting_for;
+ }
+
+ /*
+ note that the blocker transaction may have ended by now,
+ its LOCK_OWNER and short id were reused, so 'wait_for_lo' may point
+ to an unrelated - albeit valid - LOCK_OWNER
+ */
+ if (!wait_for_lo)
+ continue;
+
+ lo->waiting_for= wait_for_lo;
+ lf_rwunlock_by_pins(pins);
+
+ /*
+ We lock a mutex - it may belong to a wrong LOCK_OWNER, but it must
+ belong to _some_ LOCK_OWNER. It means, we can never free() a LOCK_OWNER,
+ if there're other active LOCK_OWNERs.
+ */
+ /* QQ: race condition here */
+ pthread_mutex_lock(wait_for_lo->mutex);
+ if (DELETED(blocker->link))
+ {
+ /*
+ blocker transaction was ended, or a savepoint that owned
+ the lock was rolled back. Either way - the lock was removed
+ */
+ pthread_mutex_unlock(wait_for_lo->mutex);
+ lf_rwlock_by_pins(pins);
+ continue;
+ }
+
+ /* yuck. waiting */
+ deadline= my_getsystime() + lm->lock_timeout * 10000;
+ timeout.tv_sec= deadline/10000000;
+ timeout.tv_nsec= (deadline % 10000000) * 100;
+ do
+ {
+ pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout);
+ } while (!DELETED(blocker->link) && my_getsystime() < deadline);
+ pthread_mutex_unlock(wait_for_lo->mutex);
+ lf_rwlock_by_pins(pins);
+ if (!DELETED(blocker->link))
+ {
+ /*
+ timeout.
+ note that we _don't_ release the lock request here.
+ Instead we're relying on the caller to abort the transaction,
+ and release all locks at once - see lockman_release_locks()
+ */
+ _lf_unpin(pins, 3);
+ lf_rwunlock_by_pins(pins);
+ return DIDNT_GET_THE_LOCK;
+ }
+ }
+ lo->waiting_for= 0;
+ _lf_assert_unpin(pins, 3); /* unpin should not be needed */
+ lf_rwunlock_by_pins(pins);
+ return getlock_result[lock][lock];
+}
+
+/*
+ RETURN
+ 0 - deleted
+ 1 - didn't (not found)
+ NOTE
+ see lockdelete() for pin usage notes
+*/
+int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo)
+{
+ LOCK * volatile *el, *node, *next;
+ uint bucket;
+ LF_PINS *pins= lo->pins;
+
+ pthread_mutex_lock(lo->mutex);
+ lf_rwlock_by_pins(pins);
+ for (node= lo->all_locks; node; node= next)
+ {
+ next= node->lonext;
+ bucket= calc_hash(node->resource) % lm->size;
+ el= _lf_dynarray_lvalue(&lm->array, bucket);
+ if (*el == NULL)
+ initialize_bucket(lm, el, bucket, pins);
+ lockdelete(el, node, pins);
+ my_atomic_add32(&lm->count, -1);
+ }
+ lf_rwunlock_by_pins(pins);
+ lo->all_locks= 0;
+ /* now signal all waiters */
+ pthread_cond_broadcast(lo->cond);
+ pthread_mutex_unlock(lo->mutex);
+ return 0;
+}
+
+#ifdef MY_LF_EXTRA_DEBUG
+static const char *lock2str[]=
+{ "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" };
+/*
+ NOTE
+ the function below is NOT thread-safe !!!
+*/
+void print_lockhash(LOCKMAN *lm)
+{
+ LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0);
+ printf("hash: size %u count %u\n", lm->size, lm->count);
+ while (el)
+ {
+ intptr next= el->link;
+ if (el->hashnr & 1)
+ {
+ printf("0x%08lx { resource %lu, loid %u, lock %s",
+ (long) el->hashnr, (ulong) el->resource, el->loid,
+ lock2str[el->lock]);
+ if (el->flags & IGNORE_ME) printf(" IGNORE_ME");
+ if (el->flags & UPGRADED) printf(" UPGRADED");
+ if (el->flags & ACTIVE) printf(" ACTIVE");
+ if (DELETED(next)) printf(" ***DELETED***");
+ printf("}\n");
+ }
+ else
+ {
+ /*printf("0x%08x { dummy }\n", el->hashnr);*/
+ DBUG_ASSERT(el->resource == 0 && el->loid == 0 && el->lock == X);
+ }
+ el= PTR(next);
+ }
+}
+#endif
diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h
new file mode 100644
index 00000000000..82ab483896f
--- /dev/null
+++ b/storage/maria/lockman.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _lockman_h
+#define _lockman_h
+
+/*
+ Lock levels:
+ ^^^^^^^^^^^
+
+ N - "no lock", not a lock, used sometimes internally to simplify the code
+ S - Shared
+ X - eXclusive
+ IS - Intention Shared
+ IX - Intention eXclusive
+ SIX - Shared + Intention eXclusive
+ LS - Loose Shared
+ LX - Loose eXclusive
+ SLX - Shared + Loose eXclusive
+ LSIX - Loose Shared + Intention eXclusive
+*/
+enum lockman_lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
+
+struct lockman_lock;
+
+typedef struct st_lock_owner LOCK_OWNER;
+struct st_lock_owner {
+ LF_PINS *pins; /* must be allocated from lockman's pinbox */
+ struct lockman_lock *all_locks; /* a LIFO */
+ LOCK_OWNER *waiting_for;
+ pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */
+ pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
+ uint16 loid;
+};
+
+typedef LOCK_OWNER *loid_to_lo_func(uint16);
+typedef struct {
+ LF_DYNARRAY array; /* hash itself */
+ LF_ALLOCATOR alloc; /* allocator for elements */
+ int32 volatile size; /* size of array */
+ int32 volatile count; /* number of elements in the hash */
+ uint lock_timeout;
+ loid_to_lo_func *loid_to_lo;
+} LOCKMAN;
+#define DIDNT_GET_THE_LOCK 0
+enum lockman_getlock_result {
+ NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
+ GOT_THE_LOCK,
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+};
+
+void lockman_init(LOCKMAN *, loid_to_lo_func *, uint);
+void lockman_destroy(LOCKMAN *);
+enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo,
+ uint64 resource,
+ enum lockman_lock_type lock);
+int lockman_release_locks(LOCKMAN *, LOCK_OWNER *);
+
+#ifdef EXTRA_DEBUG
+void print_lockhash(LOCKMAN *lm);
+#endif
+
+#endif
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
new file mode 100644
index 00000000000..4dc188c31a5
--- /dev/null
+++ b/storage/maria/ma_bitmap.c
@@ -0,0 +1,2510 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Bitmap handling (for records in block)
+
+ The data file starts with a bitmap page, followed by as many data
+ pages as the bitmap can cover. After this there is a new bitmap page
+ and more data pages etc.
+
+ The bitmap code assumes there is always an active bitmap page and thus
+ that there is at least one bitmap page in the file
+
+ Structure of bitmap page:
+
+ Fixed size records (to be implemented later):
+
+ 2 bits are used to indicate:
+
+ 0 Empty
+ 1 0-75 % full (at least room for 2 records)
+ 2 75-100 % full (at least room for one record)
+ 3 100 % full (no more room for records)
+
+ Assuming 8K pages, this will allow us to map:
+ 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M
+
+ (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN)
+
+ Note that for fixed size rows, we can't add more columns without doing
+ a full reorganization of the table. The user can always force a dynamic
+ size row format by specifying ROW_FORMAT=dynamic.
+
+
+ Dynamic size records:
+
+ 3 bits are used to indicate
+
+ 0 Empty page
+ 1 0-30 % full (at least room for 3 records)
+ 2 30-60 % full (at least room for 2 records)
+ 3 60-90 % full (at least room for one record)
+ 4 100 % full (no more room for records)
+ 5 Tail page, 0-40 % full
+ 6 Tail page, 40-80 % full
+ 7 Full tail page or full blob page
+
+ Assuming 8K pages, this will allow us to map:
+ 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M
+
+ Note that values 1-3 may be adjust for each individual table based on
+ 'min record length'. Tail pages are for overflow data which can be of
+ any size and thus doesn't have to be adjusted for different tables.
+ If we add more columns to the table, some of the originally calculated
+ 'cut off' points may not be optimal, but they shouldn't be 'drasticly
+ wrong'.
+
+ When allocating data from the bitmap, we are trying to do it in a
+ 'best fit' manner. Blobs and varchar blocks are given out in large
+ continuous extents to allow fast access to these. Before allowing a
+ row to 'flow over' to other blocks, we will compact the page and use
+ all space on it. If there is many rows in the page, we will ensure
+ there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other
+ rows to grow.
+
+ The bitmap format allows us to extend the row file in big chunks, if needed.
+
+ When calculating the size for a packed row, we will calculate the following
+ things separately:
+ - Row header + null_bits + empty_bits fixed size segments etc.
+ - Size of all char/varchar fields
+ - Size of each blob field
+
+ The bitmap handler will get all the above information and return
+ either one page or a set of pages to put the different parts.
+
+ Bitmaps are read on demand in response to insert/delete/update operations.
+ The following bitmap pointers will be cached and stored on disk on close:
+ - Current insert_bitmap; When inserting new data we will first try to
+ fill this one.
+ - First bitmap which is not completely full. This is updated when we
+ free data with an update or delete.
+
+ While flushing out bitmaps, we will cache the status of the bitmap in memory
+ to avoid having to read a bitmap for insert of new data that will not
+ be of any use
+ - Total empty space
+ - Largest number of continuous pages
+
+ Bitmap ONLY goes to disk in the following scenarios
+ - The file is closed (and we flush all changes to disk)
+ - On checkpoint
+ (Ie: When we do a checkpoint, we have to ensure that all bitmaps are
+ put on disk even if they are not in the page cache).
+ - When explicitely requested (for example on backup or after recvoery,
+ to simplify things)
+
+ The flow of writing a row is that:
+ - Lock the bitmap
+ - Decide which data pages we will write to
+ - Mark them full in the bitmap page so that other threads do not try to
+ use the same data pages as us
+ - We unlock the bitmap
+ - Write the data pages
+ - Lock the bitmap
+ - Correct the bitmap page with the true final occupation of the data
+ pages (that is, we marked pages full but when we are done we realize
+ we didn't fill them)
+ - Unlock the bitmap.
+*/
+
+#include "maria_def.h"
+#include "ma_blockrec.h"
+
+#define FULL_HEAD_PAGE 4
+#define FULL_TAIL_PAGE 7
+
+/*#define WRONG_BITMAP_FLUSH 1*/ /*define only for provoking bugs*/
+#undef WRONG_BITMAP_FLUSH
+
+static my_bool _ma_read_bitmap_page(MARIA_SHARE *share,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page);
+
+
+/* Write bitmap page to key cache */
+
+static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
+ MARIA_FILE_BITMAP *bitmap)
+{
+ DBUG_ENTER("write_changed_bitmap");
+ DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
+ DBUG_ASSERT(bitmap->file.write_callback != 0);
+ DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+
+ if ((bitmap->non_flushable == 0)
+#ifdef WRONG_BITMAP_FLUSH
+ || 1
+#endif
+ )
+ {
+ my_bool res= pagecache_write(share->pagecache,
+ &bitmap->file, bitmap->page, 0,
+ (uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE);
+ DBUG_RETURN(res);
+ }
+ else
+ {
+ /**
+ @todo RECOVERY BUG
+ Not flushable: its content is not reflected by the log, to honour WAL we
+ must keep the bitmap page pinned. Scenario of INSERT:
+ REDO - UNDO (written to log but not forced)
+ bitmap goes to page cache (because other INSERT needs to)
+ and then to disk (pagecache eviction)
+ crash: recovery will not find REDO-UNDO, table is corrupted.
+ Solutions:
+ give LSNs to bitmap pages or change pagecache to flush all log when
+ flushing a bitmap page or keep bitmap page pinned until checkpoint.
+ */
+ MARIA_PINNED_PAGE page_link;
+ int res= pagecache_write(share->pagecache,
+ &bitmap->file, bitmap->page, 0,
+ (uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE, PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE);
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&bitmap->pinned_pages, (void*) &page_link);
+ DBUG_RETURN(res);
+ }
+}
+
+/*
+ Initialize bitmap variables in share
+
+ SYNOPSIS
+ _ma_bitmap_init()
+ share Share handler
+ file data file handler
+
+ NOTES
+ This is called the first time a file is opened.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
+{
+ uint aligned_bit_blocks;
+ uint max_page_size;
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ uint size= share->block_size;
+#ifndef DBUG_OFF
+ /* We want to have a copy of the bitmap to be able to print differences */
+ size*= 2;
+#endif
+
+ if (((bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))) == NULL) ||
+ my_init_dynamic_array(&bitmap->pinned_pages,
+ sizeof(MARIA_PINNED_PAGE), 1, 1))
+ return 1;
+
+ bitmap->block_size= share->block_size;
+ bitmap->file.file= file;
+ bitmap->file.callback_data= (uchar*) share;
+ bitmap->file.write_fail= &maria_page_write_failure;
+ if (share->temporary)
+ {
+ bitmap->file.read_callback= &maria_page_crc_check_none;
+ bitmap->file.write_callback= &maria_page_filler_set_none;
+ }
+ else
+ {
+ bitmap->file.read_callback= &maria_page_crc_check_bitmap;
+ if (share->options & HA_OPTION_PAGE_CHECKSUM)
+ bitmap->file.write_callback= &maria_page_crc_set_normal;
+ else
+ bitmap->file.write_callback= &maria_page_filler_set_bitmap;
+ }
+
+ /* Size needs to be aligned on 6 */
+ aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6;
+ bitmap->total_size= aligned_bit_blocks * 6;
+ /*
+ In each 6 bytes, we have 6*8/3 = 16 pages covered
+ The +1 is to add the bitmap page, as this doesn't have to be covered
+ */
+ bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
+ bitmap->flush_all_requested= bitmap->non_flushable= 0;
+
+ /* Update size for bits */
+ /* TODO; Make this dependent of the row size */
+ max_page_size= share->block_size - PAGE_OVERHEAD_SIZE;
+ bitmap->sizes[0]= max_page_size; /* Empty page */
+ bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100;
+ bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100;
+ bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100;
+ bitmap->sizes[4]= 0; /* Full page */
+ bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100;
+ bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100;
+ bitmap->sizes[7]= 0;
+
+ pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW);
+ pthread_cond_init(&share->bitmap.bitmap_cond, 0);
+
+ _ma_bitmap_reset_cache(share);
+
+ if (share->state.first_bitmap_with_space == ~(ulonglong) 0)
+ {
+ /* Start scanning for free space from start of file */
+ share->state.first_bitmap_with_space = 0;
+ }
+ return 0;
+}
+
+
+/*
+ Free data allocated by _ma_bitmap_init
+
+ SYNOPSIS
+ _ma_bitmap_end()
+ share Share handler
+*/
+
+my_bool _ma_bitmap_end(MARIA_SHARE *share)
+{
+ my_bool res= _ma_bitmap_flush(share);
+ pthread_mutex_destroy(&share->bitmap.bitmap_lock);
+ pthread_cond_destroy(&share->bitmap.bitmap_cond);
+ delete_dynamic(&share->bitmap.pinned_pages);
+ my_free((uchar*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR));
+ share->bitmap.map= 0;
+ return res;
+}
+
+
+/*
+ Send updated bitmap to the page cache
+
+ SYNOPSIS
+ _ma_bitmap_flush()
+ share Share handler
+
+ NOTES
+ In the future, _ma_bitmap_flush() will be called to flush changes don't
+ by this thread (ie, checking the changed flag is ok). The reason we
+ check it again in the mutex is that if someone else did a flush at the
+ same time, we don't have to do the write.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_bitmap_flush(MARIA_SHARE *share)
+{
+ my_bool res= 0;
+ DBUG_ENTER("_ma_bitmap_flush");
+ if (share->bitmap.changed)
+ {
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+ if (share->bitmap.changed)
+ {
+ res= write_changed_bitmap(share, &share->bitmap);
+ share->bitmap.changed= 0;
+ }
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ }
+ DBUG_RETURN(res);
+}
+
+
+/**
+ Dirty-page filtering criteria for bitmap pages
+
+ @param type Page's type
+ @param pageno Page's number
+ @param rec_lsn Page's rec_lsn
+ @param arg pages_covered of bitmap
+*/
+
+static enum pagecache_flush_filter_result
+filter_flush_bitmap_pages(enum pagecache_page_type type
+ __attribute__ ((unused)),
+ pgcache_page_no_t pageno,
+ LSN rec_lsn __attribute__ ((unused)),
+ void *arg)
+{
+ return ((pageno % (*(ulong*)arg)) == 0);
+}
+
+
+/**
+ Flushes current bitmap page to the pagecache, and then all bitmap pages
+ from pagecache to the file. Used by Checkpoint.
+
+ @param share Table's share
+*/
+
+my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
+{
+ my_bool res= 0;
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ DBUG_ENTER("_ma_bitmap_flush_all");
+ pthread_mutex_lock(&bitmap->bitmap_lock);
+ if (bitmap->changed)
+ {
+ bitmap->flush_all_requested= TRUE;
+#ifndef WRONG_BITMAP_FLUSH
+ while (bitmap->non_flushable > 0)
+ {
+ DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
+ pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
+ }
+#endif
+ /*
+ Bitmap is in a flushable state: its contents in memory are reflected by
+ log records (complete REDO-UNDO groups) and all bitmap pages are
+ unpinned. We keep the mutex to preserve this situation, and flush to the
+ file.
+ */
+ res= write_changed_bitmap(share, bitmap);
+ bitmap->changed= FALSE;
+ /*
+ We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
+ pages have been flushed. That's a condition of correctness of
+ Recovery: data pages may have been all flushed, if we write the
+ checkpoint record Recovery will start from after their REDOs. If
+ bitmap page was not flushed, as the REDOs about it will be skipped, it
+ will wrongly not be recovered. If bitmap pages had a rec_lsn it would
+ be different.
+ There should be no pinned pages as bitmap->non_flushable==0.
+ */
+ if (flush_pagecache_blocks_with_filter(share->pagecache,
+ &bitmap->file, FLUSH_KEEP,
+ filter_flush_bitmap_pages,
+ &bitmap->pages_covered) &
+ PCFLUSH_PINNED_AND_ERROR)
+ res= TRUE;
+ bitmap->flush_all_requested= FALSE;
+ /*
+ Some well-behaved threads may be waiting for flush_all_requested to
+ become false, wake them up.
+ */
+ DBUG_PRINT("info", ("bitmap flusher waking up others"));
+ pthread_cond_broadcast(&bitmap->bitmap_cond);
+ }
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Unpin all pinned bitmap pages
+
+ @param share Table's share
+
+ @return Operation status
+ @retval 0 ok
+*/
+
+static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
+{
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
+ dynamic_array_ptr(&bitmap->pinned_pages, 0));
+ MARIA_PINNED_PAGE *pinned_page= page_link + bitmap->pinned_pages.elements;
+ DBUG_ENTER("_ma_bitmap_unpin_all");
+ DBUG_PRINT("info", ("pinned: %u", bitmap->pinned_pages.elements));
+ while (pinned_page-- != page_link)
+ pagecache_unlock_by_link(share->pagecache, pinned_page->link,
+ pinned_page->unlock, PAGECACHE_UNPIN,
+ LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE);
+ bitmap->pinned_pages.elements= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Intialize bitmap in memory to a zero bitmap
+
+ SYNOPSIS
+ _ma_bitmap_delete_all()
+ share Share handler
+
+ NOTES
+ This is called on maria_delete_all_rows (truncate data file).
+*/
+
+void _ma_bitmap_delete_all(MARIA_SHARE *share)
+{
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ if (bitmap->map) /* Not in create */
+ {
+ bzero(bitmap->map, bitmap->block_size);
+ bitmap->changed= 1;
+ bitmap->page= 0;
+ bitmap->used_size= bitmap->total_size;
+ }
+}
+
+
+/**
+ @brief Reset bitmap caches
+
+ @fn _ma_bitmap_reset_cache()
+ @param share Maria share
+
+ @notes
+ This is called after we have swapped file descriptors and we want
+ bitmap to forget all cached information
+*/
+
+void _ma_bitmap_reset_cache(MARIA_SHARE *share)
+{
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+
+ if (bitmap->map) /* If using bitmap */
+ {
+ /* Forget changes in current bitmap page */
+ bitmap->changed= 0;
+
+ /*
+ We can't read a page yet, as in some case we don't have an active
+ page cache yet.
+ Pretend we have a dummy, full and not changed bitmap page in memory.
+ */
+ bitmap->page= ~(ulonglong) 0;
+ bitmap->used_size= bitmap->total_size;
+ bfill(bitmap->map, share->block_size, 255);
+#ifndef DBUG_OFF
+ memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
+#endif
+ }
+}
+
+
+/*
+ Return bitmap pattern for the smallest head block that can hold 'size'
+
+ SYNOPSIS
+ size_to_head_pattern()
+ bitmap Bitmap
+ size Requested size
+
+ RETURN
+ 0-3 For a description of the bitmap sizes, see the header
+*/
+
+static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size <= bitmap->sizes[3])
+ return 3;
+ if (size <= bitmap->sizes[2])
+ return 2;
+ if (size <= bitmap->sizes[1])
+ return 1;
+ DBUG_ASSERT(size <= bitmap->sizes[0]);
+ return 0;
+}
+
+
+/*
+ Return bitmap pattern for head block where there is size bytes free
+
+ SYNOPSIS
+ _ma_free_size_to_head_pattern()
+ bitmap Bitmap
+ size Requested size
+
+ RETURN
+ 0-4 (Possible bitmap patterns for head block)
+*/
+
+uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size < bitmap->sizes[3])
+ return 4;
+ if (size < bitmap->sizes[2])
+ return 3;
+ if (size < bitmap->sizes[1])
+ return 2;
+ return (size < bitmap->sizes[0]) ? 1 : 0;
+}
+
+
+/*
+ Return bitmap pattern for the smallest tail block that can hold 'size'
+
+ SYNOPSIS
+ size_to_tail_pattern()
+ bitmap Bitmap
+ size Requested size
+
+ RETURN
+ 0, 5 or 6 For a description of the bitmap sizes, see the header
+*/
+
+static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size <= bitmap->sizes[6])
+ return 6;
+ if (size <= bitmap->sizes[5])
+ return 5;
+ DBUG_ASSERT(size <= bitmap->sizes[0]);
+ return 0;
+}
+
+
+/*
+ Return bitmap pattern for tail block where there is size bytes free
+
+ SYNOPSIS
+ free_size_to_tail_pattern()
+ bitmap Bitmap
+ size Requested size
+
+ RETURN
+ 0, 5, 6, 7 For a description of the bitmap sizes, see the header
+*/
+
+static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size >= bitmap->sizes[0])
+ return 0; /* Revert to empty page */
+ if (size < bitmap->sizes[6])
+ return 7;
+ if (size < bitmap->sizes[5])
+ return 6;
+ return 5;
+}
+
+
+/*
+ Return size guranteed to be available on a page
+
+ SYNOPSIS
+ pattern_to_head_size()
+ bitmap Bitmap
+ pattern Pattern (0-7)
+
+ RETURN
+ 0 - block_size
+*/
+
+static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern)
+{
+ DBUG_ASSERT(pattern <= 7);
+ return bitmap->sizes[pattern];
+}
+
+
+/*
+ Print bitmap for debugging
+
+ SYNOPSIS
+ _ma_print_bitmap()
+ bitmap Bitmap to print
+
+ IMPLEMENTATION
+ Prints all changed bits since last call to _ma_print_bitmap().
+ This is done by having a copy of the last bitmap in
+ bitmap->map+bitmap->block_size.
+*/
+
+#ifndef DBUG_OFF
+
+const char *bits_to_txt[]=
+{
+ "empty", "00-30% full", "30-60% full", "60-90% full", "full",
+ "tail 00-40 % full", "tail 40-80 % full", "tail/blob full"
+};
+
+static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap)
+{
+ uchar *pos, *end, *org_pos;
+ ulong page;
+
+ end= bitmap->map + bitmap->used_size;
+ DBUG_LOCK_FILE;
+ fprintf(DBUG_FILE,"\nBitmap page changes at page %lu\n",
+ (ulong) bitmap->page);
+
+ page= (ulong) bitmap->page+1;
+ for (pos= bitmap->map, org_pos= bitmap->map + bitmap->block_size ;
+ pos < end ;
+ pos+= 6, org_pos+= 6)
+ {
+ ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
+ ulonglong org_bits= uint6korr(org_pos);
+ uint i;
+
+ /*
+ Test if there is any changes in the next 16 bitmaps (to not have to
+ loop through all bits if we know they are the same)
+ */
+ if (bits != org_bits)
+ {
+ for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3)
+ {
+ if ((bits & 7) != (org_bits & 7))
+ fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n", page+i,
+ bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]);
+ }
+ }
+ page+= 16;
+ }
+ fputc('\n', DBUG_FILE);
+ DBUG_UNLOCK_FILE;
+ memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
+}
+
+
+/* Print content of bitmap for debugging */
+
+void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
+ ulonglong page)
+{
+ uchar *pos, *end;
+ char llbuff[22];
+
+ end= bitmap->map + bitmap->used_size;
+ DBUG_LOCK_FILE;
+ fprintf(DBUG_FILE,"\nDump of bitmap page at %s\n", llstr(page, llbuff));
+
+ page++; /* Skip bitmap page */
+ for (pos= data, end= pos + bitmap->total_size;
+ pos < end ;
+ pos+= 6)
+ {
+ ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
+
+ /*
+ Test if there is any changes in the next 16 bitmaps (to not have to
+ loop through all bits if we know they are the same)
+ */
+ if (bits)
+ {
+ uint i;
+ for (i= 0; i < 16 ; i++, bits>>= 3)
+ {
+ if (bits & 7)
+ fprintf(DBUG_FILE, "Page: %8s %s\n", llstr(page+i, llbuff),
+ bits_to_txt[bits & 7]);
+ }
+ }
+ page+= 16;
+ }
+ fputc('\n', DBUG_FILE);
+ DBUG_UNLOCK_FILE;
+}
+
+#endif /* DBUG_OFF */
+
+
+/***************************************************************************
+ Reading & writing bitmap pages
+***************************************************************************/
+
+/*
+ Read a given bitmap page
+
+ SYNOPSIS
+ read_bitmap_page()
+ info Maria handler
+ bitmap Bitmap handler
+ page Page to read
+
+ TODO
+ Update 'bitmap->used_size' to real size of used bitmap
+
+ NOTE
+ We don't always have share->bitmap.bitmap_lock here
+ (when called from_ma_check_bitmap_data() for example).
+
+ RETURN
+ 0 ok
+ 1 error (Error writing old bitmap or reading bitmap page)
+*/
+
+static my_bool _ma_read_bitmap_page(MARIA_SHARE *share,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page)
+{
+ my_off_t end_of_page= (page + 1) * bitmap->block_size;
+ my_bool res;
+ DBUG_ENTER("_ma_read_bitmap_page");
+ DBUG_ASSERT(page % bitmap->pages_covered == 0);
+ DBUG_ASSERT(!bitmap->changed);
+
+ bitmap->page= page;
+ if (end_of_page > share->state.state.data_file_length)
+ {
+ /*
+ Inexistent or half-created page (could be crash in the middle of
+ _ma_bitmap_create_first(), before appending maria_bitmap_marker).
+ */
+ /**
+ @todo RECOVERY BUG
+ We are updating data_file_length before writing any log record for the
+ row operation. What if now state is flushed by a checkpoint with the
+ new value, and crash before the checkpoint record is written, recovery
+ may not even open the table (no log records) so not fix
+ data_file_length ("WAL violation")?
+ Scenario: assume share->id==0, then:
+ thread 1 (here) thread 2 (checkpoint)
+ update data_file_length
+ copy state to memory, flush log
+ set share->id and write FILE_ID (not flushed)
+ see share->id!=0 so flush state
+ crash
+ FILE_ID will be missing, Recovery will not open table and not fix
+ data_file_length. This bug should be fixed with other "checkpoint vs
+ bitmap" bugs.
+ One possibility will be logging a standalone LOGREC_CREATE_BITMAP in a
+ separate transaction (using dummy_transaction_object).
+ */
+ share->state.state.data_file_length= end_of_page;
+ bzero(bitmap->map, bitmap->block_size);
+ bitmap->used_size= 0;
+#ifndef DBUG_OFF
+ memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
+#endif
+ DBUG_RETURN(0);
+ }
+ bitmap->used_size= bitmap->total_size;
+ DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
+ res= pagecache_read(share->pagecache,
+ &bitmap->file, page, 0,
+ (uchar*) bitmap->map,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == NULL;
+
+ /*
+ We can't check maria_bitmap_marker here as if the bitmap page
+ previously had a true checksum and the user switched mode to not checksum
+ this may have any value, except maria_normal_page_marker.
+
+ Using maria_normal_page_marker gives us a protection against bugs
+ when running without any checksums.
+ */
+
+#ifndef DBUG_OFF
+ if (!res)
+ memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
+#endif
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Change to another bitmap page
+
+ SYNOPSIS
+ _ma_change_bitmap_page()
+ info Maria handler
+ bitmap Bitmap handler
+ page Bitmap page to read
+
+ NOTES
+ If old bitmap was changed, write it out before reading new one
+ We return empty bitmap if page is outside of file size
+
+ RETURN
+ 0 ok
+ 1 error (Error writing old bitmap or reading bitmap page)
+*/
+
+static my_bool _ma_change_bitmap_page(MARIA_HA *info,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page)
+{
+ DBUG_ENTER("_ma_change_bitmap_page");
+
+ if (bitmap->changed)
+ {
+ if (write_changed_bitmap(info->s, bitmap))
+ DBUG_RETURN(1);
+ bitmap->changed= 0;
+ }
+ DBUG_RETURN(_ma_read_bitmap_page(info->s, bitmap, page));
+}
+
+
+/*
+ Read next suitable bitmap
+
+ SYNOPSIS
+ move_to_next_bitmap()
+ bitmap Bitmap handle
+
+ NOTES
+ The found bitmap may be full, so calling function may need to call this
+ repeatedly until it finds enough space.
+
+ TODO
+ Add cache of bitmaps to not read something that is not usable
+
+ RETURN
+ 0 ok
+ 1 error (either couldn't save old bitmap or read new one)
+*/
+
+static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap)
+{
+ ulonglong page= bitmap->page;
+ MARIA_STATE_INFO *state= &info->s->state;
+ DBUG_ENTER("move_to_next_bitmap");
+
+ if (state->first_bitmap_with_space != ~(ulonglong) 0 &&
+ state->first_bitmap_with_space != page)
+ {
+ page= state->first_bitmap_with_space;
+ state->first_bitmap_with_space= ~(ulonglong) 0;
+ }
+ else
+ page+= bitmap->pages_covered;
+ DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page));
+}
+
+
+/****************************************************************************
+ Allocate data in bitmaps
+****************************************************************************/
+
+/*
+ Store data in 'block' and mark the place used in the bitmap
+
+ SYNOPSIS
+ fill_block()
+ bitmap Bitmap handle
+ block Store data about what we found
+ best_data Pointer to best 6 uchar aligned area in bitmap->map
+ best_pos Which bit in *best_data the area starts
+ 0 = first bit pattern, 1 second bit pattern etc
+ best_bits The original value of the bits at best_pos
+ fill_pattern Bitmap pattern to store in best_data[best_pos]
+
+ NOTES
+ We mark all pages to be 'TAIL's, which means that
+ block->page_count is really a row position inside the page.
+*/
+
+static void fill_block(MARIA_FILE_BITMAP *bitmap,
+ MARIA_BITMAP_BLOCK *block,
+ uchar *best_data, uint best_pos, uint best_bits,
+ uint fill_pattern)
+{
+ uint page, offset, tmp;
+ uchar *data;
+
+ /* For each 6 bytes we have 6*8/3= 16 patterns */
+ page= (best_data - bitmap->map) / 6 * 16 + best_pos;
+ block->page= bitmap->page + 1 + page;
+ block->page_count= 1 + TAIL_BIT;
+ block->empty_space= pattern_to_size(bitmap, best_bits);
+ block->sub_blocks= 1;
+ block->org_bitmap_value= best_bits;
+ block->used= BLOCKUSED_TAIL; /* See _ma_bitmap_release_unused() */
+
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ best_pos*= 3;
+ data= best_data+ best_pos / 8;
+ offset= best_pos & 7;
+ tmp= uint2korr(data);
+
+ /* we turn off the 3 bits and replace them with fill_pattern */
+ tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
+ int2store(data, tmp);
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
+}
+
+
+/*
+ Allocate data for head block
+
+ SYNOPSIS
+ allocate_head()
+ bitmap bitmap
+ size Size of data region we need to store
+ block Store found information here
+
+ IMPLEMENTATION
+ Find the best-fit page to put a region of 'size'
+ This is defined as the first page of the set of pages
+ with the smallest free space that can hold 'size'.
+
+ RETURN
+ 0 ok (block is updated)
+ 1 error (no space in bitmap; block is not touched)
+*/
+
+
+static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size,
+ MARIA_BITMAP_BLOCK *block)
+{
+ uint min_bits= size_to_head_pattern(bitmap, size);
+ uchar *data= bitmap->map, *end= data + bitmap->used_size;
+ uchar *best_data= 0;
+ uint best_bits= (uint) -1, best_pos;
+ DBUG_ENTER("allocate_head");
+
+ LINT_INIT(best_pos);
+ DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size));
+
+ for (; data < end; data += 6)
+ {
+ ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
+ uint i;
+
+ /*
+ Skip common patterns
+ We can skip empty pages (if we already found a match) or
+ anything matching the following pattern as this will be either
+ a full page or a tail page
+ */
+ if ((!bits && best_data) ||
+ ((bits & LL(04444444444444444)) == LL(04444444444444444)))
+ continue;
+ for (i= 0; i < 16 ; i++, bits >>= 3)
+ {
+ uint pattern= bits & 7;
+ if (pattern <= min_bits)
+ {
+ /* There is enough space here */
+ if (pattern == min_bits)
+ {
+ /* There is exactly enough space here, return this page */
+ best_bits= min_bits;
+ best_data= data;
+ best_pos= i;
+ goto found;
+ }
+ if ((int) pattern > (int) best_bits)
+ {
+ /*
+ There is more than enough space here and it's better than what
+ we have found so far. Remember it, as we will choose it if we
+ don't find anything in this bitmap page.
+ */
+ best_bits= pattern;
+ best_data= data;
+ best_pos= i;
+ }
+ }
+ }
+ }
+ if (!best_data) /* Found no place */
+ {
+ if (bitmap->used_size == bitmap->total_size)
+ DBUG_RETURN(1); /* No space in bitmap */
+ /* Allocate data at end of bitmap */
+ bitmap->used_size+= 6;
+ best_data= data;
+ best_pos= best_bits= 0;
+ }
+
+found:
+ fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Allocate data for tail block
+
+ SYNOPSIS
+ allocate_tail()
+ bitmap bitmap
+ size Size of block we need to find
+ block Store found information here
+
+ RETURN
+ 0 ok (block is updated)
+ 1 error (no space in bitmap; block is not touched)
+*/
+
+
+static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size,
+ MARIA_BITMAP_BLOCK *block)
+{
+ uint min_bits= size_to_tail_pattern(bitmap, size);
+ uchar *data= bitmap->map, *end= data + bitmap->used_size;
+ uchar *best_data= 0;
+ uint best_bits= (uint) -1, best_pos;
+ DBUG_ENTER("allocate_tail");
+ DBUG_PRINT("enter", ("size: %u", size));
+
+ LINT_INIT(best_pos);
+ DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size));
+
+ for (; data < end; data += 6)
+ {
+ ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
+ uint i;
+
+ /*
+ Skip common patterns
+ We can skip empty pages (if we already found a match) or
+ the following patterns: 1-4 (head pages, not suitable for tail) or
+ 7 (full tail page). See 'Dynamic size records' comment at start of file.
+
+ At the moment we only skip full tail pages (ie, all bits are
+ set) as this is easy to detect with one simple test and is a
+ quite common case if we have blobs.
+ */
+
+ if ((!bits && best_data) || bits == LL(0xffffffffffff))
+ continue;
+ for (i= 0; i < 16; i++, bits >>= 3)
+ {
+ uint pattern= bits & 7;
+ if (pattern <= min_bits && (!pattern || pattern >= 5))
+ {
+ if (pattern == min_bits)
+ {
+ best_bits= min_bits;
+ best_data= data;
+ best_pos= i;
+ goto found;
+ }
+ if ((int) pattern > (int) best_bits)
+ {
+ best_bits= pattern;
+ best_data= data;
+ best_pos= i;
+ }
+ }
+ }
+ }
+ if (!best_data)
+ {
+ if (bitmap->used_size == bitmap->total_size)
+ DBUG_RETURN(1);
+ /* Allocate data at end of bitmap */
+ best_data= end;
+ bitmap->used_size+= 6;
+ best_pos= best_bits= 0;
+ }
+
+found:
+ fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Allocate data for full blocks
+
+ SYNOPSIS
+ allocate_full_pages()
+ bitmap bitmap
+ pages_needed Total size in pages (bitmap->total_size) we would like to have
+ block Store found information here
+ full_page 1 if we are not allowed to split extent
+
+ IMPLEMENTATION
+ We will return the smallest area >= size. If there is no such
+ block, we will return the biggest area that satisfies
+ area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
+
+ To speed up searches, we will only consider areas that has at least 16 free
+ pages starting on an even boundary. When finding such an area, we will
+ extend it with all previous and following free pages. This will ensure
+ we don't get holes between areas
+
+ RETURN
+ # Blocks used
+ 0 error (no space in bitmap; block is not touched)
+*/
+
+static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
+ ulong pages_needed,
+ MARIA_BITMAP_BLOCK *block, my_bool full_page)
+{
+ uchar *data= bitmap->map, *data_end= data + bitmap->used_size;
+ uchar *page_end= data + bitmap->total_size;
+ uchar *best_data= 0;
+ uint min_size;
+ uint best_area_size, best_prefix_area_size, best_suffix_area_size;
+ uint page, size;
+ ulonglong best_prefix_bits;
+ DBUG_ENTER("allocate_full_pages");
+ DBUG_PRINT("enter", ("pages_needed: %lu", pages_needed));
+
+ /* Following variables are only used if best_data is set */
+ LINT_INIT(best_prefix_bits);
+ LINT_INIT(best_prefix_area_size);
+ LINT_INIT(best_suffix_area_size);
+
+ min_size= pages_needed;
+ if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE)
+ min_size= BLOB_SEGMENT_MIN_SIZE;
+ best_area_size= ~(uint) 0;
+
+ for (; data < page_end; data+= 6)
+ {
+ ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
+ uchar *data_start;
+ ulonglong prefix_bits= 0;
+ uint area_size, prefix_area_size, suffix_area_size;
+
+ /* Find area with at least 16 free pages */
+ if (bits)
+ continue;
+ data_start= data;
+ /* Find size of area */
+ for (data+=6 ; data < data_end ; data+= 6)
+ {
+ if ((bits= uint6korr(data)))
+ break;
+ }
+ area_size= (data - data_start) / 6 * 16;
+ if (area_size >= best_area_size)
+ continue;
+ prefix_area_size= suffix_area_size= 0;
+ if (!bits)
+ {
+ /*
+ End of page; All the rest of the bits on page are part of area
+ This is needed because bitmap->used_size only covers the set bits
+ in the bitmap.
+ */
+ area_size+= (page_end - data) / 6 * 16;
+ if (area_size >= best_area_size)
+ break;
+ data= page_end;
+ }
+ else
+ {
+ /* Add bits at end of page */
+ for (; !(bits & 7); bits >>= 3)
+ suffix_area_size++;
+ area_size+= suffix_area_size;
+ }
+ if (data_start != bitmap->map)
+ {
+ /* Add bits before page */
+ bits= prefix_bits= uint6korr(data_start - 6);
+ DBUG_ASSERT(bits != 0);
+ /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */
+ if (!(bits & LL(07000000000000000)))
+ {
+ data_start-= 6;
+ do
+ {
+ prefix_area_size++;
+ bits<<= 3;
+ } while (!(bits & LL(07000000000000000)));
+ area_size+= prefix_area_size;
+ /* Calculate offset to page from data_start */
+ prefix_area_size= 16 - prefix_area_size;
+ }
+ }
+ if (area_size >= min_size && area_size <= best_area_size)
+ {
+ best_data= data_start;
+ best_area_size= area_size;
+ best_prefix_bits= prefix_bits;
+ best_prefix_area_size= prefix_area_size;
+ best_suffix_area_size= suffix_area_size;
+
+ /* Prefer to put data in biggest possible area */
+ if (area_size <= pages_needed)
+ min_size= area_size;
+ else
+ min_size= pages_needed;
+ }
+ }
+ if (!best_data)
+ DBUG_RETURN(0); /* No room on page */
+
+ /*
+ Now allocate min(pages_needed, area_size), starting from
+ best_start + best_prefix_area_size
+ */
+ if (best_area_size > pages_needed)
+ best_area_size= pages_needed;
+
+ /* For each 6 bytes we have 6*8/3= 16 patterns */
+ page= ((best_data - bitmap->map) * 8) / 3 + best_prefix_area_size;
+ block->page= bitmap->page + 1 + page;
+ block->page_count= best_area_size;
+ block->empty_space= 0;
+ block->sub_blocks= 1;
+ block->org_bitmap_value= 0;
+ block->used= 0;
+ DBUG_PRINT("info", ("page: %lu page_count: %u",
+ (ulong) block->page, block->page_count));
+
+ if (best_prefix_area_size)
+ {
+ ulonglong tmp;
+ /* Convert offset back to bits */
+ best_prefix_area_size= 16 - best_prefix_area_size;
+ if (best_area_size < best_prefix_area_size)
+ {
+ tmp= (LL(1) << best_area_size*3) - 1;
+ best_area_size= best_prefix_area_size; /* for easy end test */
+ }
+ else
+ tmp= (LL(1) << best_prefix_area_size*3) - 1;
+ tmp<<= (16 - best_prefix_area_size) * 3;
+ DBUG_ASSERT((best_prefix_bits & tmp) == 0);
+ best_prefix_bits|= tmp;
+ int6store(best_data, best_prefix_bits);
+ if (!(best_area_size-= best_prefix_area_size))
+ {
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
+ DBUG_RETURN(block->page_count);
+ }
+ best_data+= 6;
+ }
+ best_area_size*= 3; /* Bits to set */
+ size= best_area_size/8; /* Bytes to set */
+ bfill(best_data, size, 255);
+ best_data+= size;
+ if ((best_area_size-= size * 8))
+ {
+ /* fill last uchar */
+ *best_data|= (uchar) ((1 << best_area_size) -1);
+ best_data++;
+ }
+ if (data_end < best_data)
+ bitmap->used_size= (uint) (best_data - bitmap->map);
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
+ DBUG_RETURN(block->page_count);
+}
+
+
+/****************************************************************************
+ Find right bitmaps where to store data
+****************************************************************************/
+
+/*
+ Find right bitmap and position for head block
+
+ SYNOPSIS
+ find_head()
+ info Maria handler
+ length Size of data region we need store
+ position Position in bitmap_blocks where to store the
+ information for the head block.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_head(MARIA_HA *info, uint length, uint position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ /*
+ There is always place for the head block in bitmap_blocks as these are
+ preallocated at _ma_init_block_record().
+ */
+ block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
+
+ while (allocate_head(bitmap, length, block))
+ if (move_to_next_bitmap(info, bitmap))
+ return 1;
+ return 0;
+}
+
+
+/*
+ Find right bitmap and position for tail
+
+ SYNOPSIS
+ find_tail()
+ info Maria handler
+ length Size of data region we need store
+ position Position in bitmap_blocks where to store the
+ information for the head block.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_tail(MARIA_HA *info, uint length, uint position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ DBUG_ENTER("find_tail");
+
+ /* Needed, as there is no error checking in dynamic_element */
+ if (allocate_dynamic(&info->bitmap_blocks, position))
+ DBUG_RETURN(1);
+ block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
+
+ while (allocate_tail(bitmap, length, block))
+ if (move_to_next_bitmap(info, bitmap))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Find right bitmap and position for full blocks in one extent
+
+ SYNOPSIS
+ find_mid()
+ info Maria handler.
+ pages How many pages to allocate.
+ position Position in bitmap_blocks where to store the
+ information for the head block.
+ NOTES
+ This is used to allocate the main extent after the 'head' block
+ (Ie, the middle part of the head-middle-tail entry)
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_mid(MARIA_HA *info, ulong pages, uint position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
+
+ while (!allocate_full_pages(bitmap, pages, block, 1))
+ {
+ if (move_to_next_bitmap(info, bitmap))
+ return 1;
+ }
+ return 0;
+}
+
+
+/*
+ Find right bitmap and position for putting a blob
+
+ SYNOPSIS
+ find_blob()
+ info Maria handler.
+ length Length of the blob
+
+ NOTES
+ The extents are stored last in info->bitmap_blocks
+
+ IMPLEMENTATION
+ Allocate all full pages for the block + optionally one tail
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_blob(MARIA_HA *info, ulong length)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ uint full_page_size= FULL_PAGE_SIZE(info->s->block_size);
+ ulong pages;
+ uint rest_length, used;
+ uint first_block_pos;
+ MARIA_BITMAP_BLOCK *first_block= 0;
+ DBUG_ENTER("find_blob");
+ DBUG_PRINT("enter", ("length: %lu", length));
+ LINT_INIT(first_block_pos);
+
+ pages= length / full_page_size;
+ rest_length= (uint) (length - pages * full_page_size);
+ if (rest_length >= MAX_TAIL_SIZE(info->s->block_size))
+ {
+ pages++;
+ rest_length= 0;
+ }
+
+ if (pages)
+ {
+ MARIA_BITMAP_BLOCK *block;
+ if (allocate_dynamic(&info->bitmap_blocks,
+ info->bitmap_blocks.elements +
+ pages / BLOB_SEGMENT_MIN_SIZE + 2))
+ DBUG_RETURN(1);
+ first_block_pos= info->bitmap_blocks.elements;
+ block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements,
+ MARIA_BITMAP_BLOCK*);
+ first_block= block;
+ do
+ {
+ used= allocate_full_pages(bitmap,
+ (pages >= 65535 ? 65535 : (uint) pages), block,
+ 0);
+ if (!used)
+ {
+ if (move_to_next_bitmap(info, bitmap))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ pages-= used;
+ info->bitmap_blocks.elements++;
+ block++;
+ }
+ } while (pages != 0);
+ }
+ if (rest_length && find_tail(info, rest_length,
+ info->bitmap_blocks.elements++))
+ DBUG_RETURN(1);
+ if (first_block)
+ first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Find pages to put ALL blobs
+
+ SYNOPSIS
+ allocate_blobs()
+ info Maria handler
+ row Information of what is in the row (from calc_record_size())
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row)
+{
+ ulong *length, *end;
+ uint elements;
+ /*
+ Reserve size for:
+ head block
+ one extent
+ tail block
+ */
+ elements= info->bitmap_blocks.elements;
+ for (length= row->blob_lengths, end= length + info->s->base.blobs;
+ length < end; length++)
+ {
+ if (*length && find_blob(info, *length))
+ return 1;
+ }
+ row->extents_count= (info->bitmap_blocks.elements - elements);
+ return 0;
+}
+
+
+/*
+ Store in the bitmap the new size for a head page
+
+ SYNOPSIS
+ use_head()
+ info Maria handler
+ page Page number to update
+ (Note that caller guarantees this is in the active
+ bitmap)
+ size How much free space is left on the page
+ block_position In which info->bitmap_block we have the
+ information about the head block.
+
+ NOTES
+ This is used on update where we are updating an existing head page
+*/
+
+static void use_head(MARIA_HA *info, ulonglong page, uint size,
+ uint block_position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ uchar *data;
+ uint offset, tmp, offset_page;
+
+ block= dynamic_element(&info->bitmap_blocks, block_position,
+ MARIA_BITMAP_BLOCK*);
+ block->page= page;
+ block->page_count= 1 + TAIL_BIT;
+ block->empty_space= size;
+ block->sub_blocks= 1;
+ block->used= BLOCKUSED_TAIL;
+
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ offset_page= (uint) (page - bitmap->page - 1) * 3;
+ offset= offset_page & 7;
+ data= bitmap->map + offset_page / 8;
+ tmp= uint2korr(data);
+ block->org_bitmap_value= (tmp >> offset) & 7;
+ tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset);
+ int2store(data, tmp);
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
+}
+
+
+/*
+ Find out where to split the row (ie, what goes in head, middle, tail etc)
+
+ SYNOPSIS
+ find_where_to_split_row()
+ share Maria share
+ row Information of what is in the row (from calc_record_size())
+ extents_length Number of bytes needed to store all extents
+ split_size Free size on the page (The head length must be less
+ than this)
+
+ RETURN
+ row_length for the head block.
+*/
+
+static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row,
+ uint extents_length, uint split_size)
+{
+ uint row_length= row->base_length;
+ uint *lengths, *lengths_end;
+
+ DBUG_ASSERT(row_length < split_size);
+ /*
+ Store first in all_field_lengths the different parts that are written
+ to the row. This needs to be in same order as in
+ ma_block_rec.c::write_block_record()
+ */
+ row->null_field_lengths[-3]= extents_length;
+ row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length;
+ row->null_field_lengths[-1]= row->field_lengths_length;
+ for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS,
+ lengths_end= (lengths + share->base.pack_fields - share->base.blobs +
+ EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++)
+ {
+ if (row_length + *lengths > split_size)
+ break;
+ row_length+= *lengths;
+ }
+ return row_length;
+}
+
+
+/*
+ Find where to write the middle parts of the row and the tail
+
+ SYNOPSIS
+ write_rest_of_head()
+ info Maria handler
+ position Position in bitmap_blocks. Is 0 for rows that needs
+ full blocks (ie, has a head, middle part and optional tail)
+ rest_length How much left of the head block to write.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool write_rest_of_head(MARIA_HA *info, uint position,
+ ulong rest_length)
+{
+ MARIA_SHARE *share= info->s;
+ uint full_page_size= FULL_PAGE_SIZE(share->block_size);
+ MARIA_BITMAP_BLOCK *block;
+ DBUG_ENTER("write_rest_of_head");
+ DBUG_PRINT("enter", ("position: %u rest_length: %lu", position,
+ rest_length));
+
+ if (position == 0)
+ {
+ /* Write out full pages */
+ uint pages= rest_length / full_page_size;
+
+ rest_length%= full_page_size;
+ if (rest_length >= MAX_TAIL_SIZE(share->block_size))
+ {
+ /* Put tail on a full page */
+ pages++;
+ rest_length= 0;
+ }
+ if (find_mid(info, pages, 1))
+ DBUG_RETURN(1);
+ /*
+ Insert empty block after full pages, to allow write_block_record() to
+ split segment into used + free page
+ */
+ block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*);
+ block->page_count= 0;
+ block->used= 0;
+ }
+ if (rest_length)
+ {
+ if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ /* Empty tail block */
+ block= dynamic_element(&info->bitmap_blocks,
+ ELEMENTS_RESERVED_FOR_MAIN_PART - 1,
+ MARIA_BITMAP_BLOCK *);
+ block->page_count= 0;
+ block->used= 0;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Find where to store one row
+
+ SYNPOSIS
+ _ma_bitmap_find_place()
+ info Maria handler
+ row Information about row to write
+ blocks Store data about allocated places here
+
+ RETURN
+ 0 ok
+ row->space_on_head_page contains minimum number of bytes we
+ expect to put on the head page.
+ 1 error
+ my_errno is set to error
+*/
+
+my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
+ MARIA_BITMAP_BLOCKS *blocks)
+{
+ MARIA_SHARE *share= info->s;
+ my_bool res= 1;
+ uint full_page_size, position, max_page_size;
+ uint head_length, row_length, rest_length, extents_length;
+ DBUG_ENTER("_ma_bitmap_find_place");
+
+ blocks->count= 0;
+ blocks->tail_page_skipped= blocks->page_skipped= 0;
+ row->extents_count= 0;
+
+ /*
+ Reserve place for the following blocks:
+ - Head block
+ - Full page block
+ - Marker block to allow write_block_record() to split full page blocks
+ into full and free part
+ - Tail block
+ */
+
+ info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
+ max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE);
+
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+
+ if (row->total_length <= max_page_size)
+ {
+ /* Row fits in one page */
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
+ if (find_head(info, (uint) row->total_length, position))
+ goto abort;
+ row->space_on_head_page= row->total_length;
+ goto end;
+ }
+
+ /*
+ First allocate all blobs (so that we can find out the needed size for
+ the main block.
+ */
+ if (row->blob_length && allocate_blobs(info, row))
+ goto abort;
+
+ extents_length= row->extents_count * ROW_EXTENT_SIZE;
+ if ((head_length= (row->head_length + extents_length)) <= max_page_size)
+ {
+ /* Main row part fits into one page */
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
+ if (find_head(info, head_length, position))
+ goto abort;
+ row->space_on_head_page= head_length;
+ goto end;
+ }
+
+ /* Allocate enough space */
+ head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
+
+ /* The first segment size is stored in 'row_length' */
+ row_length= find_where_to_split_row(share, row, extents_length,
+ max_page_size);
+
+ full_page_size= FULL_PAGE_SIZE(share->block_size);
+ position= 0;
+ if (head_length - row_length <= full_page_size)
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
+ if (find_head(info, row_length, position))
+ goto abort;
+ row->space_on_head_page= row_length;
+ rest_length= head_length - row_length;
+ if (write_rest_of_head(info, position, rest_length))
+ goto abort;
+
+end:
+ blocks->block= dynamic_element(&info->bitmap_blocks, position,
+ MARIA_BITMAP_BLOCK*);
+ blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
+ /* First block's page_count is for all blocks */
+ blocks->count= info->bitmap_blocks.elements - position;
+ res= 0;
+
+abort:
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Find where to put row on update (when head page is already defined)
+
+ SYNPOSIS
+ _ma_bitmap_find_new_place()
+ info Maria handler
+ row Information about row to write
+ page On which page original row was stored
+ free_size Free size on head page
+ blocks Store data about allocated places here
+
+ NOTES
+ This function is only called when the new row can't fit in the space of
+ the old row in the head page.
+
+ This is essently same as _ma_bitmap_find_place() except that
+ we don't call find_head() to search in bitmaps where to put the page.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row,
+ ulonglong page, uint free_size,
+ MARIA_BITMAP_BLOCKS *blocks)
+{
+ MARIA_SHARE *share= info->s;
+ my_bool res= 1;
+ uint position;
+ uint head_length, row_length, rest_length, extents_length;
+ ulonglong bitmap_page;
+ DBUG_ENTER("_ma_bitmap_find_new_place");
+
+ blocks->count= 0;
+ blocks->tail_page_skipped= blocks->page_skipped= 0;
+ row->extents_count= 0;
+ info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
+
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+ bitmap_page= page / share->bitmap.pages_covered;
+ bitmap_page*= share->bitmap.pages_covered;
+
+ if (share->bitmap.page != bitmap_page &&
+ _ma_change_bitmap_page(info, &share->bitmap, bitmap_page))
+ goto abort;
+
+ /*
+ First allocate all blobs (so that we can find out the needed size for
+ the main block.
+ */
+ if (row->blob_length && allocate_blobs(info, row))
+ goto abort;
+
+ extents_length= row->extents_count * ROW_EXTENT_SIZE;
+ if ((head_length= (row->head_length + extents_length)) <= free_size)
+ {
+ /* Main row part fits into one page */
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
+ use_head(info, page, head_length, position);
+ goto end;
+ }
+
+ /* Allocate enough space */
+ head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
+
+ /* The first segment size is stored in 'row_length' */
+ row_length= find_where_to_split_row(share, row, extents_length, free_size);
+
+ position= 0;
+ if (head_length - row_length < MAX_TAIL_SIZE(share->block_size))
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
+ use_head(info, page, row_length, position);
+ rest_length= head_length - row_length;
+
+ if (write_rest_of_head(info, position, rest_length))
+ goto abort;
+
+end:
+ blocks->block= dynamic_element(&info->bitmap_blocks, position,
+ MARIA_BITMAP_BLOCK*);
+ blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
+ /* First block's page_count is for all blocks */
+ blocks->count= info->bitmap_blocks.elements - position;
+ res= 0;
+
+abort:
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/****************************************************************************
+ Clear and reset bits
+****************************************************************************/
+
+/*
+ Set fill pattern for a page
+
+ set_page_bits()
+ info Maria handler
+ bitmap Bitmap handler
+ page Adress to page
+ fill_pattern Pattern (not size) for page
+
+ NOTES
+ Page may not be part of active bitmap
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint fill_pattern)
+{
+ ulonglong bitmap_page;
+ uint offset_page, offset, tmp, org_tmp;
+ uchar *data;
+ DBUG_ENTER("set_page_bits");
+
+ bitmap_page= page - page % bitmap->pages_covered;
+ if (bitmap_page != bitmap->page &&
+ _ma_change_bitmap_page(info, bitmap, bitmap_page))
+ DBUG_RETURN(1);
+
+ /* Find page number from start of bitmap */
+ offset_page= page - bitmap->page - 1;
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ offset_page*= 3;
+ offset= offset_page & 7;
+ data= bitmap->map + offset_page / 8;
+ org_tmp= tmp= uint2korr(data);
+ tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
+ if (tmp == org_tmp)
+ DBUG_RETURN(0); /* No changes */
+ int2store(data, tmp);
+
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
+ if (fill_pattern != 3 && fill_pattern != 7)
+ set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page);
+ /*
+ Note that if the condition above is false (page is full), and all pages of
+ this bitmap are now full, and that bitmap page was
+ first_bitmap_with_space, we don't modify first_bitmap_with_space, indeed
+ its value still tells us where to start our search for a bitmap with space
+ (which is for sure after this full one).
+ That does mean that first_bitmap_with_space is only a lower bound.
+ */
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get bitmap pattern for a given page
+
+ SYNOPSIS
+ get_page_bits()
+ info Maria handler
+ bitmap Bitmap handler
+ page Page number
+
+ RETURN
+ 0-7 Bitmap pattern
+ ~0 Error (couldn't read page)
+*/
+
+static uint get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page)
+{
+ ulonglong bitmap_page;
+ uint offset_page, offset, tmp;
+ uchar *data;
+ DBUG_ENTER("get_page_bits");
+
+ bitmap_page= page - page % bitmap->pages_covered;
+ if (bitmap_page != bitmap->page &&
+ _ma_change_bitmap_page(info, bitmap, bitmap_page))
+ DBUG_RETURN(~ (uint) 0);
+
+ /* Find page number from start of bitmap */
+ offset_page= page - bitmap->page - 1;
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ offset_page*= 3;
+ offset= offset_page & 7;
+ data= bitmap->map + offset_page / 8;
+ tmp= uint2korr(data);
+ DBUG_RETURN((tmp >> offset) & 7);
+}
+
+
+/*
+ Mark all pages in a region as free
+
+ SYNOPSIS
+ _ma_bitmap_reset_full_page_bits()
+ info Maria handler
+ bitmap Bitmap handler
+ page Start page
+ page_count Number of pages
+
+ NOTES
+ We assume that all pages in region is covered by same bitmap
+ One must have a lock on info->s->bitmap.bitmap_lock
+
+ RETURN
+ 0 ok
+ 1 Error (when reading bitmap)
+*/
+
+my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint page_count)
+{
+ ulonglong bitmap_page;
+ uint offset, bit_start, bit_count, tmp;
+ uchar *data;
+ DBUG_ENTER("_ma_bitmap_reset_full_page_bits");
+ DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count));
+ safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock);
+
+ bitmap_page= page - page % bitmap->pages_covered;
+ if (bitmap_page != bitmap->page &&
+ _ma_change_bitmap_page(info, bitmap, bitmap_page))
+ DBUG_RETURN(1);
+
+ /* Find page number from start of bitmap */
+ page= page - bitmap->page - 1;
+
+ /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */
+ bit_start= page * 3;
+ bit_count= page_count * 3;
+
+ data= bitmap->map + bit_start / 8;
+ offset= bit_start & 7;
+
+ tmp= (255 << offset); /* Bits to keep */
+ if (bit_count + offset < 8)
+ {
+ /* Only clear bits between 'offset' and 'offset+bit_count-1' */
+ tmp^= (255 << (offset + bit_count));
+ }
+ *data&= ~tmp;
+
+ if ((int) (bit_count-= (8 - offset)) > 0)
+ {
+ uint fill;
+ data++;
+ /*
+ -1 is here to avoid one 'if' statement and to let the following code
+ handle the last byte
+ */
+ if ((fill= (bit_count - 1) / 8))
+ {
+ bzero(data, fill);
+ data+= fill;
+ }
+ bit_count-= fill * 8; /* Bits left to clear */
+ tmp= (1 << bit_count) - 1;
+ *data&= ~tmp;
+ }
+ set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page);
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
+ DBUG_RETURN(0);
+}
+
+/*
+ Set all pages in a region as used
+
+ SYNOPSIS
+ _ma_bitmap_set_full_page_bits()
+ info Maria handler
+ bitmap Bitmap handler
+ page Start page
+ page_count Number of pages
+
+ NOTES
+ We assume that all pages in region is covered by same bitmap
+ One must have a lock on info->s->bitmap.bitmap_lock
+
+ RETURN
+ 0 ok
+ 1 Error (when reading bitmap)
+*/
+
+my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint page_count)
+{
+ ulonglong bitmap_page;
+ uint offset, bit_start, bit_count, tmp;
+ uchar *data;
+ DBUG_ENTER("_ma_bitmap_set_full_page_bits");
+ DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count));
+ safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock);
+
+ bitmap_page= page - page % bitmap->pages_covered;
+ if (bitmap_page != bitmap->page &&
+ _ma_change_bitmap_page(info, bitmap, bitmap_page))
+ DBUG_RETURN(1);
+
+ /* Find page number from start of bitmap */
+ page= page - bitmap->page - 1;
+
+ /* Set bits from 'page * 3' -> '(page + page_count) * 3' */
+ bit_start= page * 3;
+ bit_count= page_count * 3;
+
+ data= bitmap->map + bit_start / 8;
+ offset= bit_start & 7;
+
+ tmp= (255 << offset); /* Bits to keep */
+ if (bit_count + offset < 8)
+ {
+ /* Only set bits between 'offset' and 'offset+bit_count-1' */
+ tmp^= (255 << (offset + bit_count));
+ }
+ *data|= tmp;
+
+ if ((int) (bit_count-= (8 - offset)) > 0)
+ {
+ uint fill;
+ data++;
+ /*
+ -1 is here to avoid one 'if' statement and to let the following code
+ handle the last byte
+ */
+ if ((fill= (bit_count - 1) / 8))
+ {
+ bfill(data, fill, 255);
+ data+= fill;
+ }
+ bit_count-= fill * 8; /* Bits left to set */
+ tmp= (1 << bit_count) - 1;
+ *data|= tmp;
+ }
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
+ DBUG_RETURN(0);
+}
+
+
+/**
+ Make a transition of MARIA_FILE_BITMAP::non_flushable.
+ If the bitmap becomes flushable, which requires that REDO-UNDO has been
+ logged and all bitmap pages touched by the thread have a correct
+ allocation, it unpins all bitmap pages, and if _ma_bitmap_flush_all() is
+ waiting (in practice it is a checkpoint), it wakes it up.
+ If the bitmap becomes or stays unflushable, the function merely records it
+ unless a concurrent _ma_bitmap_flush_all() is happening, in which case the
+ function first waits for the flush to be done.
+
+ @param share Table's share
+ @param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable
+ (-1 or +1).
+*/
+
+void _ma_bitmap_flushable(MARIA_SHARE *share, int non_flushable_inc)
+{
+ MARIA_FILE_BITMAP *bitmap;
+
+ /*
+ Not transactional tables are never automaticly flushed and needs no
+ protection
+ */
+#ifndef EXTRA_DEBUG
+ if (!share->now_transactional)
+ return;
+#endif
+
+ bitmap= &share->bitmap;
+ if (non_flushable_inc == -1)
+ {
+ pthread_mutex_lock(&bitmap->bitmap_lock);
+ DBUG_ASSERT(bitmap->non_flushable > 0);
+ if (--bitmap->non_flushable == 0)
+ {
+ _ma_bitmap_unpin_all(share);
+ if (unlikely(bitmap->flush_all_requested))
+ {
+ DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
+ pthread_cond_broadcast(&bitmap->bitmap_cond);
+ }
+ }
+ DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ return;
+ }
+ DBUG_ASSERT(non_flushable_inc == 1);
+ /* It is a read without mutex because only an optimization */
+ if (unlikely(bitmap->flush_all_requested))
+ {
+ /*
+ _ma_bitmap_flush_all() is waiting for the bitmap to become
+ flushable. Not the moment to make the bitmap unflushable or more
+ unflushable; let's rather back off and wait. If we didn't do this, with
+ multiple writers, there may always be one thread causing the bitmap to
+ be unflushable and _ma_bitmap_flush_all() would wait for long.
+ There should not be a deadlock because if our thread increased
+ non_flushable (and thus _ma_bitmap_flush_all() is waiting for at least
+ our thread), it is not going to increase it more so is not going to come
+ here.
+ */
+ pthread_mutex_lock(&bitmap->bitmap_lock);
+ while (bitmap->flush_all_requested)
+ {
+ DBUG_PRINT("info", ("waiting for bitmap flusher"));
+ pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
+ }
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ }
+ /*
+ Ok to set without mutex: we didn't touch the bitmap's content yet; when we
+ touch it we will take the mutex.
+ */
+ bitmap->non_flushable++;
+ DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+}
+
+
+/*
+ Correct bitmap pages to reflect the true allocation
+
+ SYNOPSIS
+ _ma_bitmap_release_unused()
+ info Maria handle
+ blocks Bitmap blocks
+
+ IMPLEMENTATION
+ If block->used & BLOCKUSED_TAIL is set:
+ If block->used & BLOCKUSED_USED is set, then the bits for the
+ corresponding page is set according to block->empty_space
+ If block->used & BLOCKUSED_USED is not set, then the bits for
+ the corresponding page is set to org_bitmap_value;
+
+ If block->used & BLOCKUSED_TAIL is not set:
+ if block->used is not set, the bits for the corresponding page are
+ cleared
+
+ For the first block (head block) the logic is same as for a tail block
+
+ Note that we may have 'filler blocks' that are used to split a block
+ in half; These can be recognized by that they have page_count == 0.
+
+ RETURN
+ 0 ok
+ 1 error (Couldn't write or read bitmap page)
+*/
+
+my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
+{
+ MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count;
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ uint bits, current_bitmap_value;
+ DBUG_ENTER("_ma_bitmap_release_unused");
+
+ /*
+ We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full'
+ when we allocated space in the page
+ */
+ current_bitmap_value= FULL_HEAD_PAGE;
+
+ pthread_mutex_lock(&bitmap->bitmap_lock);
+
+ /* First handle head block */
+ if (block->used & BLOCKUSED_USED)
+ {
+ DBUG_PRINT("info", ("head empty_space: %u", block->empty_space));
+ bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space);
+ if (block->used & BLOCKUSED_USE_ORG_BITMAP)
+ current_bitmap_value= block->org_bitmap_value;
+ }
+ else
+ bits= block->org_bitmap_value;
+ if (bits != current_bitmap_value &&
+ set_page_bits(info, bitmap, block->page, bits))
+ goto err;
+
+
+ /* Handle all full pages and tail pages (for head page and blob) */
+ for (block++; block < end; block++)
+ {
+ uint page_count;
+ if (!block->page_count)
+ continue; /* Skip 'filler blocks' */
+
+ page_count= block->page_count;
+ if (block->used & BLOCKUSED_TAIL)
+ {
+ /* The bitmap page is only one page */
+ page_count= 1;
+ if (block->used & BLOCKUSED_USED)
+ {
+ DBUG_PRINT("info", ("tail empty_space: %u", block->empty_space));
+ bits= free_size_to_tail_pattern(bitmap, block->empty_space);
+ }
+ else
+ bits= block->org_bitmap_value;
+
+ /*
+ The page has all bits set; The following test is an optimization
+ to not set the bits to the same value as before.
+ */
+ if (bits != FULL_TAIL_PAGE &&
+ set_page_bits(info, bitmap, block->page, bits))
+ goto err;
+ }
+ if (!(block->used & BLOCKUSED_USED) &&
+ _ma_bitmap_reset_full_page_bits(info, bitmap,
+ block->page, page_count))
+ goto err;
+ }
+
+ if (--bitmap->non_flushable == 0)
+ {
+ _ma_bitmap_unpin_all(info->s);
+ if (unlikely(bitmap->flush_all_requested))
+ {
+ DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
+ pthread_cond_broadcast(&bitmap->bitmap_cond);
+ }
+ }
+ DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ DBUG_RETURN(0);
+
+err:
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Free full pages from bitmap and pagecache
+
+ SYNOPSIS
+ _ma_bitmap_free_full_pages()
+ info Maria handle
+ extents Extents (as stored on disk)
+ count Number of extents
+
+ IMPLEMENTATION
+ Mark all full pages (not tails) from extents as free, both in bitmap
+ and page cache.
+
+ RETURN
+ 0 ok
+ 1 error (Couldn't write or read bitmap page)
+*/
+
+my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents,
+ uint count)
+{
+ DBUG_ENTER("_ma_bitmap_free_full_pages");
+
+ pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+ for (; count--; extents+= ROW_EXTENT_SIZE)
+ {
+ ulonglong page= uint5korr(extents);
+ uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE);
+ if (!(page_count & TAIL_BIT))
+ {
+ if (page == 0 && page_count == 0)
+ continue; /* Not used extent */
+ if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page,
+ page_count, PAGECACHE_LOCK_WRITE, 1) ||
+ _ma_bitmap_reset_full_page_bits(info, &info->s->bitmap, page,
+ page_count))
+ {
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Mark in the bitmap how much free space there is on a page
+
+ SYNOPSIS
+ _ma_bitmap_set()
+ info Maria handler
+ page Adress to page
+ head 1 if page is a head page, 0 if tail page
+ empty_space How much empty space there is on page
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong page, my_bool head,
+ uint empty_space)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ uint bits;
+ my_bool res;
+ DBUG_ENTER("_ma_bitmap_set");
+
+ pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+ bits= (head ?
+ _ma_free_size_to_head_pattern(bitmap, empty_space) :
+ free_size_to_tail_pattern(bitmap, empty_space));
+ res= set_page_bits(info, bitmap, page, bits);
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Check that bitmap pattern is correct for a page
+
+ NOTES
+ Used in maria_chk
+
+ SYNOPSIS
+ _ma_check_bitmap_data()
+ info Maria handler
+ page_type What kind of page this is
+ page Adress to page
+ empty_space Empty space on page
+ bitmap_pattern Store here the pattern that was in the bitmap for the
+ page. This is always updated.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_check_bitmap_data(MARIA_HA *info,
+ enum en_page_type page_type, ulonglong page,
+ uint empty_space, uint *bitmap_pattern)
+{
+ uint bits;
+ switch (page_type) {
+ case UNALLOCATED_PAGE:
+ case MAX_PAGE_TYPE:
+ bits= 0;
+ break;
+ case HEAD_PAGE:
+ bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space);
+ break;
+ case TAIL_PAGE:
+ bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space);
+ break;
+ case BLOB_PAGE:
+ bits= FULL_TAIL_PAGE;
+ break;
+ default:
+ bits= 0; /* to satisfy compiler */
+ DBUG_ASSERT(0);
+ }
+ return (*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) !=
+ bits;
+}
+
+
+/*
+ Check if the page type matches the one that we have in the bitmap
+
+ SYNOPSIS
+ _ma_check_if_right_bitmap_type()
+ info Maria handler
+ page_type What kind of page this is
+ page Adress to page
+ bitmap_pattern Store here the pattern that was in the bitmap for the
+ page. This is always updated.
+
+ NOTES
+ Used in maria_chk
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
+ enum en_page_type page_type,
+ ulonglong page,
+ uint *bitmap_pattern)
+{
+ if ((*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) > 7)
+ return 1; /* Couldn't read page */
+ switch (page_type) {
+ case HEAD_PAGE:
+ return *bitmap_pattern < 1 || *bitmap_pattern > 4;
+ case TAIL_PAGE:
+ return *bitmap_pattern < 5;
+ case BLOB_PAGE:
+ return *bitmap_pattern != 7;
+ default:
+ break;
+ }
+ DBUG_ASSERT(0);
+ return 1;
+}
+
+
+/**
+ @brief create the first bitmap page of a freshly created data file
+
+ @param share table's share
+
+ @return Operation status
+ @retval 0 OK
+ @retval !=0 Error
+*/
+
+int _ma_bitmap_create_first(MARIA_SHARE *share)
+{
+ uint block_size= share->bitmap.block_size;
+ File file= share->bitmap.file.file;
+ char marker[CRC_SIZE];
+
+ /*
+ Next write operation of the page will write correct CRC
+ if it is needed
+ */
+ int4store(marker, MARIA_NO_CRC_BITMAP_PAGE);
+
+ if (my_chsize(file, block_size - sizeof(marker),
+ 0, MYF(MY_WME)) ||
+ my_pwrite(file, marker, sizeof(marker),
+ block_size - sizeof(marker),
+ MYF(MY_NABP | MY_WME)))
+ return 1;
+ share->state.state.data_file_length= block_size;
+ _ma_bitmap_delete_all(share);
+ return 0;
+}
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
new file mode 100644
index 00000000000..c31238368de
--- /dev/null
+++ b/storage/maria/ma_blockrec.c
@@ -0,0 +1,6100 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Storage of records in block
+
+ Some clarifications about the abbrev used:
+
+ NULL fields -> Fields that may have contain a NULL value.
+ Not null fields -> Fields that may not contain a NULL value.
+ Critical fields -> Fields that can't be null and can't be dropped without
+ causing a table reorganization.
+
+
+ Maria will have a LSN at start of each page (excluding the bitmap pages)
+
+ The different page types that are in a data file are:
+
+ Bitmap pages Map of free pages in the next extent (8192 page size
+ gives us 256M of mapped pages / bitmap)
+ Head page Start of rows are stored on this page.
+ A rowid always points to a head page
+ Blob page This page is totally filled with data from one blob or by
+ a set of long VARCHAR/CHAR fields
+ Tail page This contains the last part from different rows, blobs
+ or varchar fields.
+
+ The data file starts with a bitmap page, followed by as many data
+ pages as the bitmap can cover. After this there is a new bitmap page
+ and more data pages etc.
+
+ For information about the bitmap page, see ma_bitmap.c
+
+ Structure of data and tail page:
+
+ The page has a row directory at end of page to allow us to do deletes
+ without having to reorganize the page. It also allows us to later store
+ some more bytes after each row to allow them to grow without having to move
+ around other rows.
+
+ Page header:
+
+ LSN 7 bytes Log position for last page change
+ PAGE_TYPE 1 uchar 1 for head / 2 for tail / 3 for blob
+ DIR_COUNT 1 uchar Number of row/tail entries on page
+ FREE_DIR_LINK 1 uchar Pointer to first free director entry or 255 if no
+ empty space 2 bytes Empty space on page
+
+ The most significant bit in PAGE_TYPE is set to 1 if the data on the page
+ can be compacted to get more space. (PAGE_CAN_BE_COMPACTED)
+
+ Row data
+
+ Row directory of NO entries, that consist of the following for each row
+ (in reverse order; i.e., first record is stored last):
+
+ Position 2 bytes Position of row on page
+ Length 2 bytes Length of entry
+
+ For Position and Length, the 1 most significant bit of the position and
+ the 1 most significant bit of the length could be used for some states of
+ the row (in other words, we should try to keep these reserved)
+
+ Position is 0 if the entry is not used. In this case length[0] points
+ to a previous free entry (255 if no previous entry) and length[1]
+ to the next free entry (or 255 if last free entry). This works because
+ the directory entry 255 can never be marked free (if the first directory
+ entry is freed, the directory is shrinked).
+
+ checksum 4 bytes Reserved for full page read testing and live backup.
+
+ ----------------
+
+ Structure of blob pages:
+
+ LSN 7 bytes Log position for last page change
+ PAGE_TYPE 1 uchar 3
+
+ data
+
+ -----------------
+
+ Row data structure:
+
+ Flag 1 uchar Marker of which header field exists
+ TRANSID 6 bytes TRANSID of changing transaction
+ (optional, added on insert and first
+ update/delete)
+ VER_PTR 7 bytes Pointer to older version in log
+ (undo record)
+ (optional, added after first
+ update/delete)
+ DELETE_TRANSID 6 bytes (optional). TRANSID of original row.
+ Added on delete.
+ Nulls_extended 1 uchar To allow us to add new DEFAULT NULL
+ fields (optional, added after first
+ change of row after alter table)
+ Number of ROW_EXTENT's 1-3 uchar Length encoded, optional
+ This is the number of extents the
+ row is split into
+ First row_extent 7 uchar Pointer to first row extent (optional)
+
+ Total length of length array 1-3 uchar Only used if we have
+ char/varchar/blob fields.
+ Row checksum 1 uchar Only if table created with checksums
+ Null_bits .. One bit for each NULL field (a field that may
+ have the value NULL)
+ Empty_bits .. One bit for each field that may be 'empty'.
+ (Both for null and not null fields).
+ This bit is 1 if the value for the field is
+ 0 or empty string.
+
+ field_offsets 2 byte/offset
+ For each 32'th field, there is one offset
+ that points to where the field information
+ starts in the block. This is to provide
+ fast access to later field in the row
+ when we only need to return a small
+ set of fields.
+ TODO: Implement this.
+
+ Things marked above as 'optional' will only be present if the
+ corresponding bit is set in 'Flag' field. Flag gives us a way to
+ get more space on a page when doing page compaction as we don't need
+ to store TRANSID that have committed before the smallest running
+ transaction we have in memory.
+
+ Data in the following order:
+ (Field order is precalculated when table is created)
+
+ Critical fixed length, not null, fields. (Note, these can't be dropped)
+ Fixed length, null fields
+
+ Length array, 1-4 uchar per field for all CHAR/VARCHAR/BLOB fields.
+ Number of bytes used in length array per entry is depending on max length
+ for field.
+
+ ROW_EXTENT's
+ CHAR data (space stripped)
+ VARCHAR data
+ BLOB data
+
+ Fields marked in null_bits or empty_bits are not stored in data part or
+ length array.
+
+ If row doesn't fit into the given block, then the first EXTENT will be
+ stored last on the row. This is done so that we don't break any field
+ data in the middle.
+
+ We first try to store the full row into one block. If that's not possible
+ we move out each big blob into their own extents. If this is not enough we
+ move out a concatenation of all varchars to their own extent.
+
+ Each blob and the concatenated char/varchar fields are stored the following
+ way:
+ - Store the parts in as many full-contiguous pages as possible.
+ - The last part, that doesn't fill a full page, is stored in tail page.
+
+ When doing an insert of a new row, we don't have to have
+ VER_PTR in the row. This will make rows that are not changed stored
+ efficiently. On update and delete we would add TRANSID (if it was an old
+ committed row) and VER_PTR to
+ the row. On row page compaction we can easily detect rows where
+ TRANSID was committed before the longest running transaction
+ started and we can then delete TRANSID and VER_PTR from the row to
+ gain more space.
+
+ If a row is deleted in Maria, we change TRANSID to the deleting
+ transaction's id, change VER_PTR to point to the undo record for the delete,
+ and add DELETE_TRANSID (the id of the transaction which last
+ inserted/updated the row before its deletion). DELETE_TRANSID allows an old
+ transaction to avoid reading the log to know if it can see the last version
+ before delete (in other words it reduces the probability of having to follow
+ VER_PTR). TODO: depending on a compilation option, evaluate the performance
+ impact of not storing DELETE_TRANSID (which would make the row smaller).
+
+ Description of the different parts:
+
+ Flag is coded as:
+
+ Description bit
+ TRANS_ID_exists 0
+ VER_PTR_exists 1
+ Row is deleted 2 (Means that DELETE_TRANSID exists)
+ Nulls_extended_exists 3
+ Row is split 7 This means that 'Number_of_row_extents' exists
+
+ Nulls_extended is the number of new DEFAULT NULL fields in the row
+ compared to the number of DEFAULT NULL fields when the first version
+ of the table was created. If Nulls_extended doesn't exist in the row,
+ we know it's 0 as this must be one of the original rows from when the
+ table was created first time. This coding allows us to add 255*8 =
+ 2048 new fields without requiring a full alter table.
+
+ Empty_bits is used to allow us to store 0, 0.0, empty string, empty
+ varstring and empty blob efficiently. (This is very good for data
+ warehousing where NULL's are often regarded as evil). Having this
+ bitmap also allows us to drop information of a field during a future
+ delete if field was deleted with ALTER TABLE DROP COLUMN. To be able
+ to handle DROP COLUMN, we must store in the index header the fields
+ that has been dropped. When unpacking a row we will ignore dropped
+ fields. When storing a row, we will mark a dropped field either with a
+ null in the null bit map or in the empty_bits and not store any data
+ for it.
+ TODO: Add code for handling dropped fields.
+
+
+ A ROW EXTENT is range of pages. One ROW_EXTENT is coded as:
+
+ START_PAGE 5 bytes
+ PAGE_COUNT 2 bytes. High bit is used to indicate tail page/
+ end of blob
+ With 8K pages, we can cover 256M in one extent. This coding gives us a
+ maximum file size of 2^40*8192 = 8192 tera
+
+ As an example of ROW_EXTENT handling, assume a row with one integer
+ field (value 5), two big VARCHAR fields (size 250 and 8192*3), and 2
+ big BLOB fields that we have updated.
+
+ The record format for storing this into an empty file would be:
+
+ Page 1:
+
+ 00 00 00 00 00 00 00 LSN
+ 01 Only one row in page
+ FF No free dir entry
+ xx xx Empty space on page
+
+ 10 Flag: row split, VER_PTR exists
+ 01 00 00 00 00 00 TRANSID 1
+ 00 00 00 00 00 01 00 VER_PTR to first block in LOG file 1
+ 5 Number of row extents
+ 02 00 00 00 00 03 00 VARCHAR's are stored in full pages 2,3,4
+ 0 No null fields
+ 0 No empty fields
+ 05 00 00 00 00 00 80 Tail page for VARCHAR, rowid 0
+ 06 00 00 00 00 80 00 First blob, stored at page 6-133
+ 05 00 00 00 00 01 80 Tail of first blob (896 bytes) at page 5
+ 86 00 00 00 00 80 00 Second blob, stored at page 134-262
+ 05 00 00 00 00 02 80 Tail of second blob (896 bytes) at page 5
+ 05 00 5 integer
+ FA Length of first varchar field (size 250)
+ 00 60 Length of second varchar field (size 8192*3)
+ 00 60 10 First medium BLOB, 1M
+ 01 00 10 00 Second BLOB, 1M
+ xx xx xx xx xx xx Varchars are stored here until end of page
+
+ ..... until end of page
+
+ 09 00 F4 1F Start position 9, length 8180
+ xx xx xx xx Checksum
+*/
+
+#include "maria_def.h"
+#include "ma_blockrec.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+#include <lf.h>
+
+/*
+ Struct for having a cursor over a set of extent.
+ This is used to loop over all extents for a row when reading
+ the row data. It's also used to store the tail positions for
+ a read row to be used by a later update/delete command.
+*/
+
+typedef struct st_maria_extent_cursor
+{
+ /*
+ Pointer to packed uchar array of extents for the row.
+ Format is described above in the header
+ */
+ uchar *extent;
+ /* Where data starts on page; Only for debugging */
+ uchar *data_start;
+ /* Position to all tails in the row. Updated when reading a row */
+ MARIA_RECORD_POS *tail_positions;
+ /* Current page */
+ ulonglong page;
+ /* How many pages in the page region */
+ uint page_count;
+ /* What kind of lock to use for tail pages */
+ enum pagecache_page_lock lock_for_tail_pages;
+ /* Total number of extents (i.e., entries in the 'extent' slot) */
+ uint extent_count;
+ /* <> 0 if current extent is a tail page; Set while using cursor */
+ uint tail;
+ /* Position for tail on tail page */
+ uint tail_row_nr;
+ /*
+ == 1 if we are working on the first extent (i.e., the one that is stored in
+ the row header, not an extent that is stored as part of the row data).
+ */
+ my_bool first_extent;
+} MARIA_EXTENT_CURSOR;
+
+
+/**
+ @brief Structure for passing down info to write_hook_for_clr_end().
+ This hooks needs to know the variation of the live checksum caused by the
+ current operation to update state.checksum under log's mutex,
+ needs to know the transaction's previous undo_lsn to set
+ trn->undo_lsn under log mutex, and needs to know the type of UNDO being
+ undone now to modify state.records under log mutex.
+*/
+
+/** S:share,D:checksum_delta,E:expression,P:pointer_into_record,L:length */
+#define store_checksum_in_rec(S,D,E,P,L) do \
+ { \
+ D= 0; \
+ if ((S)->calc_checksum != NULL) \
+ { \
+ D= (E); \
+ ha_checksum_store(P, D); \
+ L+= HA_CHECKSUM_STORE_SIZE; \
+ } \
+ } while (0)
+
+static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails);
+static my_bool delete_head_or_tail(MARIA_HA *info,
+ ulonglong page, uint record_number,
+ my_bool head, my_bool from_update);
+#ifndef DBUG_OFF
+static void _ma_print_directory(uchar *buff, uint block_size);
+#endif
+static void compact_page(uchar *buff, uint block_size, uint rownr,
+ my_bool extend_block);
+static uchar *store_page_range(uchar *to, MARIA_BITMAP_BLOCK *block,
+ uint block_size, ulong length,
+ uint *tot_ranges);
+static size_t fill_insert_undo_parts(MARIA_HA *info, const uchar *record,
+ LEX_STRING *log_parts,
+ uint *log_parts_count);
+static size_t fill_update_undo_parts(MARIA_HA *info, const uchar *oldrec,
+ const uchar *newrec,
+ LEX_STRING *log_parts,
+ uint *log_parts_count);
+
+/****************************************************************************
+ Initialization
+****************************************************************************/
+
+/*
+ Initialize data needed for block structures
+*/
+
+
+/* Size of the different header elements for a row */
+
+static uchar header_sizes[]=
+{
+ TRANSID_SIZE,
+ VERPTR_SIZE,
+ TRANSID_SIZE, /* Delete transid */
+ 1 /* Null extends */
+};
+
+/*
+ Calculate array of all used headers
+
+ Used to speed up:
+
+ size= 1;
+ if (flag & 1)
+ size+= TRANSID_SIZE;
+ if (flag & 2)
+ size+= VERPTR_SIZE;
+ if (flag & 4)
+ size+= TRANSID_SIZE
+ if (flag & 8)
+ size+= 1;
+
+ NOTES
+ This is called only once at startup of Maria
+*/
+
+static uchar total_header_size[1 << array_elements(header_sizes)];
+#define PRECALC_HEADER_BITMASK (array_elements(total_header_size) -1)
+
+void _ma_init_block_record_data(void)
+{
+ uint i;
+ bzero(total_header_size, sizeof(total_header_size));
+ total_header_size[0]= FLAG_SIZE; /* Flag uchar */
+ for (i= 1; i < array_elements(total_header_size); i++)
+ {
+ uint size= FLAG_SIZE, j, bit;
+ for (j= 0; (bit= (1 << j)) <= i; j++)
+ {
+ if (i & bit)
+ size+= header_sizes[j];
+ }
+ total_header_size[i]= size;
+ }
+}
+
+
+my_bool _ma_once_init_block_record(MARIA_SHARE *share, File data_file)
+{
+
+ share->base.max_data_file_length=
+ (((ulonglong) 1 << ((share->base.rec_reflength-1)*8))-1) *
+ share->block_size;
+#if SIZEOF_OFF_T == 4
+ set_if_smaller(share->base.max_data_file_length, INT_MAX32);
+#endif
+ return _ma_bitmap_init(share, data_file);
+}
+
+
+my_bool _ma_once_end_block_record(MARIA_SHARE *share)
+{
+ int res= _ma_bitmap_end(share);
+ if (share->bitmap.file.file >= 0)
+ {
+ if (flush_pagecache_blocks(share->pagecache, &share->bitmap.file,
+ share->temporary ? FLUSH_IGNORE_CHANGED :
+ FLUSH_RELEASE))
+ res= 1;
+ /*
+ File must be synced as it is going out of the maria_open_list and so
+ becoming unknown to Checkpoint.
+ */
+ if (share->now_transactional &&
+ my_sync(share->bitmap.file.file, MYF(MY_WME)))
+ res= 1;
+ if (my_close(share->bitmap.file.file, MYF(MY_WME)))
+ res= 1;
+ /*
+ Trivial assignment to guard against multiple invocations
+ (May happen if file are closed but we want to keep the maria object
+ around a bit longer)
+ */
+ share->bitmap.file.file= -1;
+ }
+ if (share->id != 0)
+ translog_deassign_id_from_share(share);
+ return res;
+}
+
+
+/* Init info->cur_row structure */
+
+my_bool _ma_init_block_record(MARIA_HA *info)
+{
+ MARIA_ROW *row= &info->cur_row, *new_row= &info->new_row;
+ uint default_extents;
+ DBUG_ENTER("_ma_init_block_record");
+
+ if (!my_multi_malloc(MY_WME,
+ &row->empty_bits, info->s->base.pack_bytes,
+ &row->field_lengths,
+ info->s->base.max_field_lengths + 2,
+ &row->blob_lengths, sizeof(ulong) * info->s->base.blobs,
+ &row->null_field_lengths, (sizeof(uint) *
+ (info->s->base.fields -
+ info->s->base.blobs +
+ EXTRA_LENGTH_FIELDS)),
+ &row->tail_positions, (sizeof(MARIA_RECORD_POS) *
+ (info->s->base.blobs + 2)),
+ &new_row->empty_bits, info->s->base.pack_bytes,
+ &new_row->field_lengths,
+ info->s->base.max_field_lengths + 2,
+ &new_row->blob_lengths,
+ sizeof(ulong) * info->s->base.blobs,
+ &new_row->null_field_lengths, (sizeof(uint) *
+ (info->s->base.fields -
+ info->s->base.blobs +
+ EXTRA_LENGTH_FIELDS)),
+ &info->log_row_parts,
+ sizeof(*info->log_row_parts) *
+ (TRANSLOG_INTERNAL_PARTS + 2 +
+ info->s->base.fields + 3),
+ &info->update_field_data,
+ (info->s->base.fields * 4 +
+ info->s->base.max_field_lengths + 1 + 4),
+ NullS, 0))
+ DBUG_RETURN(1);
+ /* Skip over bytes used to store length of field length for logging */
+ row->field_lengths+= 2;
+ new_row->field_lengths+= 2;
+
+ /* Reserve some initial space to avoid mallocs during execution */
+ default_extents= (ELEMENTS_RESERVED_FOR_MAIN_PART + 1 +
+ (AVERAGE_BLOB_SIZE /
+ FULL_PAGE_SIZE(info->s->block_size) /
+ BLOB_SEGMENT_MIN_SIZE));
+
+ if (my_init_dynamic_array(&info->bitmap_blocks,
+ sizeof(MARIA_BITMAP_BLOCK), default_extents,
+ 64))
+ goto err;
+ info->cur_row.extents_buffer_length= default_extents * ROW_EXTENT_SIZE;
+ if (!(info->cur_row.extents= my_malloc(info->cur_row.extents_buffer_length,
+ MYF(MY_WME))))
+ goto err;
+
+ row->base_length= new_row->base_length= info->s->base_length;
+
+ /*
+ We need to reserve 'EXTRA_LENGTH_FIELDS' number of parts in
+ null_field_lengths to allow splitting of rows in 'find_where_to_split_row'
+ */
+ row->null_field_lengths+= EXTRA_LENGTH_FIELDS;
+ new_row->null_field_lengths+= EXTRA_LENGTH_FIELDS;
+
+ DBUG_RETURN(0);
+
+err:
+ _ma_end_block_record(info);
+ DBUG_RETURN(1);
+}
+
+
+void _ma_end_block_record(MARIA_HA *info)
+{
+ DBUG_ENTER("_ma_end_block_record");
+ my_free((uchar*) info->cur_row.empty_bits, MYF(MY_ALLOW_ZERO_PTR));
+ delete_dynamic(&info->bitmap_blocks);
+ my_free((uchar*) info->cur_row.extents, MYF(MY_ALLOW_ZERO_PTR));
+ /*
+ The data file is closed, when needed, in ma_once_end_block_record().
+ The following protects us from doing an extra, not allowed, close
+ in maria_close()
+ */
+ info->dfile.file= -1;
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ Helper functions
+****************************************************************************/
+
+/*
+ Return the next unused postion on the page after a directory entry.
+
+ SYNOPSIS
+ start_of_next_entry()
+ dir Directory entry to be used. This can not be the
+ the last entry on the page!
+
+ RETURN
+ # Position in page where next entry starts.
+ Everything between the '*dir' and this are free to be used.
+*/
+
+static inline uint start_of_next_entry(uchar *dir)
+{
+ uchar *prev;
+ /*
+ Find previous used entry. (There is always a previous entry as
+ the directory never starts with a deleted entry)
+ */
+ for (prev= dir - DIR_ENTRY_SIZE ;
+ prev[0] == 0 && prev[1] == 0 ;
+ prev-= DIR_ENTRY_SIZE)
+ {}
+ return (uint) uint2korr(prev);
+}
+
+
+/*
+ Return the offset where the previous entry ends (before on page)
+
+ SYNOPSIS
+ end_of_previous_entry()
+ dir Address for current directory entry
+ end Address to last directory entry
+
+ RETURN
+ # Position where previous entry ends (smallest address on page)
+ Everything between # and current entry are free to be used.
+*/
+
+
+static inline uint end_of_previous_entry(uchar *dir, uchar *end)
+{
+ uchar *pos;
+ for (pos= dir + DIR_ENTRY_SIZE ; pos < end ; pos+= DIR_ENTRY_SIZE)
+ {
+ uint offset;
+ if ((offset= uint2korr(pos)))
+ return offset + uint2korr(pos+2);
+ }
+ return PAGE_HEADER_SIZE;
+}
+
+
+/**
+ @brief Extend a record area to fit a given size block
+
+ @fn extend_area_on_page()
+ @param buff Page buffer
+ @param dir Pointer to dir entry in buffer
+ @param rownr Row number we working on
+ @param block_size Block size of buffer
+ @param request_length How much data we want to put at [dir]
+ @param empty_space Total empty space in buffer
+
+ IMPLEMENTATION
+ The logic is as follows (same as in _ma_update_block_record())
+ - If new data fits in old block, use old block.
+ - Extend block with empty space before block. If enough, use it.
+ - Extend block with empty space after block. If enough, use it.
+ - Use compact_page() to get all empty space at dir.
+
+ RETURN
+ @retval 0 ok
+ @retval ret_offset Pointer to store offset to found area
+ @retval ret_length Pointer to store length of found area
+ @retval [dir] rec_offset is store here too
+
+ @retval 1 error (wrong info in block)
+*/
+
+static my_bool extend_area_on_page(uchar *buff, uchar *dir,
+ uint rownr, uint block_size,
+ uint request_length,
+ uint *empty_space, uint *ret_offset,
+ uint *ret_length)
+{
+ uint rec_offset, length;
+ uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
+ DBUG_ENTER("extend_area_on_page");
+
+ rec_offset= uint2korr(dir);
+ if (rec_offset)
+ {
+ /* Extending old row; Mark current space as 'free' */
+ length= uint2korr(dir + 2);
+ DBUG_PRINT("info", ("rec_offset: %u length: %u request_length: %u "
+ "empty_space: %u",
+ rec_offset, length, request_length, *empty_space));
+
+ *empty_space+= length;
+ }
+ else
+ {
+ /* Reusing free directory entry; Free it from the directory list */
+ if (dir[2] == END_OF_DIR_FREE_LIST)
+ buff[DIR_FREE_OFFSET]= dir[3];
+ else
+ {
+ uchar *prev_dir= dir_entry_pos(buff, block_size, (uint) dir[2]);
+ DBUG_ASSERT(uint2korr(prev_dir) == 0 && prev_dir[3] == (uchar) rownr);
+ prev_dir[3]= dir[3];
+ }
+ if (dir[3] != END_OF_DIR_FREE_LIST)
+ {
+ uchar *next_dir= dir_entry_pos(buff, block_size, (uint) dir[3]);
+ DBUG_ASSERT(uint2korr(next_dir) == 0 && next_dir[2] == (uchar) rownr);
+ next_dir[2]= dir[2];
+ }
+ rec_offset= start_of_next_entry(dir);
+ length= 0;
+ }
+ if (length < request_length)
+ {
+ uint old_rec_offset;
+ /*
+ New data did not fit in old position.
+ Find first possible position where to put new data.
+ */
+ old_rec_offset= rec_offset;
+ rec_offset= end_of_previous_entry(dir, buff + block_size -
+ PAGE_SUFFIX_SIZE);
+ length+= (uint) (old_rec_offset - rec_offset);
+ /*
+ old_rec_offset is 0 if we are doing an insert into a not allocated block.
+ This can only happen during REDO of INSERT
+ */
+ if (!old_rec_offset || length < request_length)
+ {
+ /*
+ Did not fit in current block + empty space. Extend with
+ empty space after block.
+ */
+ if (rownr == max_entry - 1)
+ {
+ /* Last entry; Everything is free between this and directory */
+ length= ((block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE * max_entry) -
+ rec_offset);
+ }
+ else
+ length= start_of_next_entry(dir) - rec_offset;
+ DBUG_ASSERT((int) length > 0);
+ if (length < request_length)
+ {
+ /* Not enough continues space, compact page to get more */
+ int2store(dir, rec_offset);
+ compact_page(buff, block_size, rownr, 1);
+ rec_offset= uint2korr(dir);
+ length= uint2korr(dir+2);
+ if (length < request_length)
+ {
+ DBUG_PRINT("error", ("Not enough space: "
+ "length: %u request_length: %u",
+ length, request_length));
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_RETURN(1); /* Error in block */
+ }
+ *empty_space= length; /* All space is here */
+ }
+ }
+ }
+ int2store(dir, rec_offset);
+ *ret_offset= rec_offset;
+ *ret_length= length;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Check that a region is all zero
+
+ SYNOPSIS
+ check_if_zero()
+ pos Start of memory to check
+ length length of memory region
+
+ NOTES
+ Used mainly to detect rows with wrong extent information
+*/
+
+my_bool _ma_check_if_zero(uchar *pos, uint length)
+{
+ uchar *end;
+ for (end= pos+ length; pos != end ; pos++)
+ if (pos[0] != 0)
+ return 1;
+ return 0;
+}
+
+
+/*
+ @brief Copy not changed fields from 'from' to 'to'
+
+ @notes
+ Assumption is that most fields are not changed!
+ (Which is why we don't test if all bits are set for some bytes in bitmap)
+*/
+
+void copy_not_changed_fields(MARIA_HA *info, MY_BITMAP *changed_fields,
+ uchar *to, uchar *from)
+{
+ MARIA_COLUMNDEF *column, *end_column;
+ uchar *bitmap= (uchar*) changed_fields->bitmap;
+ MARIA_SHARE *share= info->s;
+ uint bit= 1;
+
+ for (column= share->columndef, end_column= column+ share->base.fields;
+ column < end_column; column++)
+ {
+ if (!(*bitmap & bit))
+ {
+ uint field_length= column->length;
+ if (column->type == FIELD_VARCHAR)
+ {
+ if (column->fill_length == 1)
+ field_length= (uint) from[column->offset] + 1;
+ else
+ field_length= uint2korr(from + column->offset) + 2;
+ }
+ memcpy(to + column->offset, from + column->offset, field_length);
+ }
+ if ((bit= (bit << 1)) == 256)
+ {
+ bitmap++;
+ bit= 1;
+ }
+ }
+}
+
+#ifdef NOT_YET_NEEDED
+/* Calculate empty space on a page */
+
+static uint empty_space_on_page(uchar *buff, uint block_size)
+{
+ enum en_page_type;
+ page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] &
+ ~(uchar) PAGE_CAN_BE_COMPACTED);
+ if (page_type == UNALLOCATED_PAGE)
+ return block_size;
+ if ((uint) page_type <= TAIL_PAGE)
+ return uint2korr(buff+EMPTY_SPACE_OFFSET);
+ return 0; /* Blob page */
+}
+#endif
+
+/*
+ Find free position in directory
+
+ SYNOPSIS
+ find_free_position()
+ buff Page
+ block_size Size of page
+ res_rownr Store index to free position here
+ res_length Store length of found segment here
+ empty_space Store length of empty space on disk here. This is
+ all empty space, including the found block.
+
+ NOTES
+ If there is a free directory entry (entry with position == 0),
+ then use it and change it to be the size of the empty block
+ after the previous entry. This guarantees that all row entries
+ are stored on disk in inverse directory order, which makes life easier for
+ 'compact_page()' and to know if there is free space after any block.
+
+ If there is no free entry (entry with position == 0), then we create
+ a new one. If there is not space for the directory entry (because
+ the last block overlapps with the directory), we compact the page.
+
+ We will update the offset and the length of the found dir entry to
+ match the position and empty space found.
+
+ buff[EMPTY_SPACE_OFFSET] is NOT updated but left up to the caller
+
+ See start of file for description of how free directory entires are linked
+
+ RETURN
+ 0 Error (directory full or last block goes over directory)
+ # Pointer to directory entry on page
+*/
+
+static uchar *find_free_position(uchar *buff, uint block_size, uint *res_rownr,
+ uint *res_length, uint *empty_space)
+{
+ uint max_entry, free_entry;
+ uint length, first_pos;
+ uchar *dir, *first_dir;
+ DBUG_ENTER("find_free_position");
+
+ max_entry= (uint) buff[DIR_COUNT_OFFSET];
+ free_entry= (uint) buff[DIR_FREE_OFFSET];
+ *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+
+ DBUG_PRINT("info", ("max_entry: %u free_entry: %u", max_entry, free_entry));
+
+ first_dir= dir_entry_pos(buff, block_size, max_entry - 1);
+
+ /* Search after first free position */
+ if (free_entry != END_OF_DIR_FREE_LIST)
+ {
+ if (free_entry >= max_entry)
+ DBUG_RETURN(0);
+ dir= dir_entry_pos(buff, block_size, free_entry);
+ DBUG_ASSERT(uint2korr(dir) == 0 && dir[2] == END_OF_DIR_FREE_LIST);
+ /* Relink free list */
+ if ((buff[DIR_FREE_OFFSET]= dir[3]) != END_OF_DIR_FREE_LIST)
+ {
+ uchar *next_entry= dir_entry_pos(buff, block_size, (uint) dir[3]);
+ DBUG_ASSERT((uint) next_entry[2] == free_entry &&
+ uint2korr(next_entry) == 0);
+ next_entry[2]= END_OF_DIR_FREE_LIST; /* Backlink */
+ }
+
+ first_pos= end_of_previous_entry(dir, buff + block_size -
+ PAGE_SUFFIX_SIZE);
+ length= start_of_next_entry(dir) - first_pos;
+ int2store(dir, first_pos); /* Update dir entry */
+ int2store(dir + 2, length);
+ *res_rownr= free_entry;
+ *res_length= length;
+ DBUG_RETURN(dir);
+ }
+ /* No free places in dir; create a new one */
+
+ /* Check if there is place for the directory entry */
+ if (max_entry == MAX_ROWS_PER_PAGE)
+ DBUG_RETURN(0);
+ dir= first_dir - DIR_ENTRY_SIZE;
+ /* Last used place on page */
+ first_pos= uint2korr(first_dir) + uint2korr(first_dir + 2);
+ /* Check if there is place for the directory entry on the page */
+ if ((uint) (dir - buff) < first_pos)
+ {
+ /* Create place for directory */
+ compact_page(buff, block_size, max_entry-1, 0);
+ first_pos= (uint2korr(first_dir) + uint2korr(first_dir + 2));
+ *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+ DBUG_ASSERT(*empty_space > DIR_ENTRY_SIZE);
+ }
+ buff[DIR_COUNT_OFFSET]= (uchar) max_entry+1;
+ length= (uint) (dir - buff - first_pos);
+ DBUG_ASSERT(length <= *empty_space - DIR_ENTRY_SIZE);
+ int2store(dir, first_pos);
+ int2store(dir+2, length); /* Max length of region */
+ *res_rownr= max_entry;
+ *res_length= length;
+
+ /* Reduce directory entry size from free space size */
+ (*empty_space)-= DIR_ENTRY_SIZE;
+ DBUG_RETURN(dir);
+}
+
+
+/****************************************************************************
+ Updating records
+****************************************************************************/
+
+/*
+ Calculate length of all the different field parts
+
+ SYNOPSIS
+ calc_record_size()
+ info Maria handler
+ record Row to store
+ row Store statistics about row here
+
+ NOTES
+ The statistics is used to find out how much space a row will need
+ and also where we can split a row when we need to split it into several
+ extents.
+*/
+
+static void calc_record_size(MARIA_HA *info, const uchar *record,
+ MARIA_ROW *row)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *field_length_data;
+ MARIA_COLUMNDEF *column, *end_column;
+ uint *null_field_lengths= row->null_field_lengths;
+ ulong *blob_lengths= row->blob_lengths;
+ DBUG_ENTER("calc_record_size");
+
+ row->normal_length= row->char_length= row->varchar_length=
+ row->blob_length= row->extents_count= 0;
+
+ /* Create empty bitmap and calculate length of each varlength/char field */
+ bzero(row->empty_bits, share->base.pack_bytes);
+ field_length_data= row->field_lengths;
+ for (column= share->columndef + share->base.fixed_not_null_fields,
+ end_column= share->columndef + share->base.fields;
+ column < end_column; column++, null_field_lengths++)
+ {
+ if ((record[column->null_pos] & column->null_bit))
+ {
+ if (column->type != FIELD_BLOB)
+ *null_field_lengths= 0;
+ else
+ *blob_lengths++= 0;
+ continue;
+ }
+ switch (column->type) {
+ case FIELD_CHECK:
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_ZERO:
+ DBUG_ASSERT(column->empty_bit == 0);
+ /* fall through */
+ case FIELD_SKIP_PRESPACE: /* Not packed */
+ row->normal_length+= column->length;
+ *null_field_lengths= column->length;
+ break;
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ if (memcmp(record+ column->offset, maria_zero_string,
+ column->length) == 0)
+ {
+ row->empty_bits[column->empty_pos] |= column->empty_bit;
+ *null_field_lengths= 0;
+ }
+ else
+ {
+ row->normal_length+= column->length;
+ *null_field_lengths= column->length;
+ }
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ const char *pos, *end;
+ for (pos= record + column->offset, end= pos + column->length;
+ end > pos && end[-1] == ' '; end--)
+ ;
+ if (pos == end) /* If empty string */
+ {
+ row->empty_bits[column->empty_pos]|= column->empty_bit;
+ *null_field_lengths= 0;
+ }
+ else
+ {
+ uint length= (end - pos);
+ if (column->length <= 255)
+ *field_length_data++= (uchar) length;
+ else
+ {
+ int2store(field_length_data, length);
+ field_length_data+= 2;
+ }
+ row->char_length+= length;
+ *null_field_lengths= length;
+ }
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ uint length, field_length_data_length;
+ const uchar *field_pos= record + column->offset;
+
+ /* 256 is correct as this includes the length uchar */
+ field_length_data[0]= field_pos[0];
+ if (column->length <= 256)
+ {
+ length= (uint) (uchar) *field_pos;
+ field_length_data_length= 1;
+ }
+ else
+ {
+ length= uint2korr(field_pos);
+ field_length_data[1]= field_pos[1];
+ field_length_data_length= 2;
+ }
+ *null_field_lengths= length;
+ if (!length)
+ {
+ row->empty_bits[column->empty_pos]|= column->empty_bit;
+ break;
+ }
+ row->varchar_length+= length;
+ *null_field_lengths= length;
+ field_length_data+= field_length_data_length;
+ break;
+ }
+ case FIELD_BLOB:
+ {
+ const uchar *field_pos= record + column->offset;
+ uint size_length= column->length - portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length, field_pos);
+
+ *blob_lengths++= blob_length;
+ if (!blob_length)
+ row->empty_bits[column->empty_pos]|= column->empty_bit;
+ else
+ {
+ row->blob_length+= blob_length;
+ memcpy(field_length_data, field_pos, size_length);
+ field_length_data+= size_length;
+ }
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
+ row->field_lengths_length= (uint) (field_length_data - row->field_lengths);
+ row->head_length= (row->base_length +
+ share->base.fixed_not_null_fields_length +
+ row->field_lengths_length +
+ size_to_store_key_length(row->field_lengths_length) +
+ row->normal_length +
+ row->char_length + row->varchar_length);
+ row->total_length= (row->head_length + row->blob_length);
+ if (row->total_length < share->base.min_block_length)
+ row->total_length= share->base.min_block_length;
+ DBUG_PRINT("exit", ("head_length: %lu total_length: %lu",
+ (ulong) row->head_length, (ulong) row->total_length));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Compact page by removing all space between rows
+
+ IMPLEMENTATION
+ Move up all rows to start of page.
+ Move blocks that are directly after each other with one memmove.
+
+ TODO LATER
+ Remove TRANSID from rows that are visible to all transactions
+
+ SYNOPSIS
+ compact_page()
+ buff Page to compact
+ block_size Size of page
+ rownr Put empty data after this row
+ extend_block If 1, extend the block at 'rownr' to cover the
+ whole block.
+*/
+
+
+static void compact_page(uchar *buff, uint block_size, uint rownr,
+ my_bool extend_block)
+{
+ uint max_entry= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET];
+ uint page_pos, next_free_pos, start_of_found_block, diff, end_of_found_block;
+ uchar *dir, *end;
+ DBUG_ENTER("compact_page");
+ DBUG_PRINT("enter", ("rownr: %u", rownr));
+ DBUG_ASSERT(max_entry > 0 &&
+ max_entry < (block_size - PAGE_HEADER_SIZE -
+ PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE);
+
+ /* Move all entries before and including rownr up to start of page */
+ dir= dir_entry_pos(buff, block_size, rownr);
+ end= dir_entry_pos(buff, block_size, 0);
+ page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE;
+ diff= 0;
+ for (; dir <= end ; end-= DIR_ENTRY_SIZE)
+ {
+ uint offset= uint2korr(end);
+
+ if (offset)
+ {
+ uint row_length= uint2korr(end + 2);
+ DBUG_ASSERT(offset >= page_pos);
+ DBUG_ASSERT(buff + offset + row_length <= dir);
+
+ if (offset != next_free_pos)
+ {
+ uint length= (next_free_pos - start_of_found_block);
+ /*
+ There was empty space before this and prev block
+ Check if we have to move previous block up to page start
+ */
+ if (page_pos != start_of_found_block)
+ {
+ /* move up previous block */
+ memmove(buff + page_pos, buff + start_of_found_block, length);
+ }
+ page_pos+= length;
+ /* next continuous block starts here */
+ start_of_found_block= offset;
+ diff= offset - page_pos;
+ }
+ int2store(end, offset - diff); /* correct current pos */
+ next_free_pos= offset + row_length;
+ }
+ }
+ if (page_pos != start_of_found_block)
+ {
+ uint length= (next_free_pos - start_of_found_block);
+ memmove(buff + page_pos, buff + start_of_found_block, length);
+ }
+ start_of_found_block= uint2korr(dir);
+
+ if (rownr != max_entry - 1)
+ {
+ /* Move all entries after rownr to end of page */
+ uint rownr_length;
+ next_free_pos= end_of_found_block= page_pos=
+ block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE;
+ diff= 0;
+ /* End points to entry before 'rownr' */
+ for (dir= buff + end_of_found_block ; dir <= end ; dir+= DIR_ENTRY_SIZE)
+ {
+ uint offset= uint2korr(dir);
+ uint row_length= uint2korr(dir + 2);
+ uint row_end= offset + row_length;
+ if (!offset)
+ continue;
+ DBUG_ASSERT(offset >= start_of_found_block && row_end <= next_free_pos);
+
+ if (row_end != next_free_pos)
+ {
+ uint length= (end_of_found_block - next_free_pos);
+ if (page_pos != end_of_found_block)
+ {
+ /* move next block down */
+ memmove(buff + page_pos - length, buff + next_free_pos, length);
+ }
+ page_pos-= length;
+ /* next continuous block starts here */
+ end_of_found_block= row_end;
+ diff= page_pos - row_end;
+ }
+ int2store(dir, offset + diff); /* correct current pos */
+ next_free_pos= offset;
+ }
+ if (page_pos != end_of_found_block)
+ {
+ uint length= (end_of_found_block - next_free_pos);
+ memmove(buff + page_pos - length, buff + next_free_pos, length);
+ next_free_pos= page_pos- length;
+ }
+ /* Extend rownr block to cover hole */
+ rownr_length= next_free_pos - start_of_found_block;
+ int2store(dir+2, rownr_length);
+ }
+ else
+ {
+ if (extend_block)
+ {
+ /* Extend last block cover whole page */
+ uint length= ((uint) (dir - buff) - start_of_found_block);
+ int2store(dir+2, length);
+ }
+ else
+ {
+ /*
+ TODO:
+ Update (buff + EMPTY_SPACE_OFFSET) if we remove transid from rows
+ */
+ }
+ buff[PAGE_TYPE_OFFSET]&= ~(uchar) PAGE_CAN_BE_COMPACTED;
+ }
+ DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size););
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Create an empty tail or head page
+
+ SYNOPSIS
+ make_empty_page()
+ buff Page buffer
+ block_size Block size
+ page_type HEAD_PAGE or TAIL_PAGE
+
+ NOTES
+ EMPTY_SPACE is not updated
+*/
+
+static void make_empty_page(MARIA_HA *info, uchar *buff, uint page_type)
+{
+ uint block_size= info->s->block_size;
+ DBUG_ENTER("make_empty_page");
+
+ bzero(buff, PAGE_HEADER_SIZE);
+
+#if !defined(DONT_ZERO_PAGE_BLOCKS) || defined(HAVE_purify)
+ /*
+ We zero the rest of the block to avoid getting old memory information
+ to disk and to allow the file to be compressed better if archived.
+ The code does not assume the block is zeroed.
+ */
+ if (page_type != BLOB_PAGE)
+ bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE);
+#endif
+ buff[PAGE_TYPE_OFFSET]= (uchar) page_type;
+ buff[DIR_COUNT_OFFSET]= 1;
+ buff[DIR_FREE_OFFSET]= END_OF_DIR_FREE_LIST;
+ int2store(buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE,
+ PAGE_HEADER_SIZE);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Read or initialize new head or tail page
+
+ SYNOPSIS
+ get_head_or_tail_page()
+ info Maria handler
+ block Block to read
+ buff Suggest this buffer to key cache
+ length Minimum space needed
+ page_type HEAD_PAGE || TAIL_PAGE
+ res Store result position here
+
+ NOTES
+ We don't decremented buff[EMPTY_SPACE_OFFSET] with the allocated data
+ as we don't know how much data the caller will actually use.
+
+ RETURN
+ 0 ok All slots in 'res' are updated
+ 1 error my_errno is set
+*/
+
+struct st_row_pos_info
+{
+ uchar *buff; /* page buffer */
+ uchar *data; /* Place for data */
+ uchar *dir; /* Directory */
+ uint length; /* Length for data */
+ uint rownr; /* Offset in directory */
+ uint empty_space; /* Space left on page */
+};
+
+
+static my_bool get_head_or_tail_page(MARIA_HA *info,
+ MARIA_BITMAP_BLOCK *block,
+ uchar *buff, uint length, uint page_type,
+ enum pagecache_page_lock lock,
+ struct st_row_pos_info *res)
+{
+ uint block_size;
+ MARIA_PINNED_PAGE page_link;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("get_head_or_tail_page");
+ DBUG_PRINT("enter", ("length: %u", length));
+
+ block_size= share->block_size;
+ if (block->org_bitmap_value == 0) /* Empty block */
+ {
+ /* New page */
+ make_empty_page(info, buff, page_type);
+ res->buff= buff;
+ res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE);
+ res->data= (buff + PAGE_HEADER_SIZE);
+ res->dir= res->data + res->length;
+ res->rownr= 0;
+ DBUG_ASSERT(length <= res->length);
+ }
+ else
+ {
+ uchar *dir;
+ /* Read old page */
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (!(res->buff= pagecache_read(share->pagecache,
+ &info->dfile,
+ (my_off_t) block->page, 0,
+ buff, share->page_type,
+ lock, &page_link.link)))
+ DBUG_RETURN(1);
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+
+ DBUG_ASSERT((res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type);
+ if (!(dir= find_free_position(res->buff, block_size, &res->rownr,
+ &res->length, &res->empty_space)))
+ goto crashed;
+
+ if (res->length < length)
+ {
+ if (res->empty_space + res->length >= length)
+ {
+ compact_page(res->buff, block_size, res->rownr, 1);
+ /* All empty space are now after current position */
+ dir= dir_entry_pos(res->buff, block_size, res->rownr);
+ res->length= res->empty_space= uint2korr(dir+2);
+ }
+ if (res->length < length)
+ {
+ DBUG_PRINT("error", ("length: %u res->length: %u empty_space: %u",
+ length, res->length, res->empty_space));
+ goto crashed; /* Wrong bitmap information */
+ }
+ }
+ res->dir= dir;
+ res->data= res->buff + uint2korr(dir);
+ }
+ DBUG_RETURN(0);
+
+crashed:
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Write tail for head data or blob
+
+ SYNOPSIS
+ write_tail()
+ info Maria handler
+ block Block to tail page
+ row_part Data to write to page
+ length Length of data
+
+ NOTES
+ block->page_count is updated to the directory offset for the tail
+ so that we can store the position in the row extent information
+
+ RETURN
+ 0 ok
+ block->page_count is set to point (dir entry + TAIL_BIT)
+
+ 1 error; In this case my_errno is set to the error
+*/
+
+static my_bool write_tail(MARIA_HA *info,
+ MARIA_BITMAP_BLOCK *block,
+ uchar *row_part, uint length)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_PINNED_PAGE page_link;
+ uint block_size= share->block_size, empty_space;
+ struct st_row_pos_info row_pos;
+ my_off_t position;
+ my_bool res, block_is_read;
+ DBUG_ENTER("write_tail");
+ DBUG_PRINT("enter", ("page: %lu length: %u",
+ (ulong) block->page, length));
+
+ info->keyread_buff_used= 1;
+
+ /* page will be pinned & locked by get_head_or_tail_page */
+ if (get_head_or_tail_page(info, block, info->keyread_buff, length,
+ TAIL_PAGE, PAGECACHE_LOCK_WRITE,
+ &row_pos))
+ DBUG_RETURN(1);
+ block_is_read= block->org_bitmap_value != 0;
+
+ memcpy(row_pos.data, row_part, length);
+
+ if (share->now_transactional)
+ {
+ /* Log changes in tail block */
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ LSN lsn;
+
+ /* Log REDO changes of tail page */
+ page_store(log_data + FILEID_STORE_SIZE, block->page);
+ dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE,
+ row_pos.rownr);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) row_pos.data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
+ if (translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_TAIL,
+ info->trn, info, sizeof(log_data) + length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ }
+
+ /*
+ Don't allocate smaller block than MIN_TAIL_SIZE (we want to give rows
+ some place to grow in the future)
+ */
+ if (length < MIN_TAIL_SIZE)
+ length= MIN_TAIL_SIZE;
+ int2store(row_pos.dir + 2, length);
+ empty_space= row_pos.empty_space - length;
+ int2store(row_pos.buff + EMPTY_SPACE_OFFSET, empty_space);
+ block->page_count= row_pos.rownr + TAIL_BIT;
+ /*
+ If there is less directory entries free than number of possible tails
+ we can write for a row, we mark the page full to ensure that we don't
+ during _ma_bitmap_find_place() allocate more entries on the tail page
+ than it can hold
+ */
+ block->empty_space= ((uint) (row_pos.buff)[DIR_COUNT_OFFSET] <=
+ MAX_ROWS_PER_PAGE - 1 - share->base.blobs ?
+ empty_space : 0);
+ block->used= BLOCKUSED_USED | BLOCKUSED_TAIL;
+
+ /* Increase data file size, if extended */
+ position= (my_off_t) block->page * block_size;
+ if (info->state->data_file_length <= position)
+ {
+ /*
+ We are modifying a state member before writing the UNDO; this is a WAL
+ violation. But for data_file_length this is ok, as long as we change
+ data_file_length after writing any log record (FILE_ID/REDO/UNDO) (see
+ collect_tables()).
+ */
+ info->state->data_file_length= position + block_size;
+ }
+
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (!(res= pagecache_write(share->pagecache,
+ &info->dfile, block->page, 0,
+ row_pos.buff,share->page_type,
+ block_is_read ? PAGECACHE_LOCK_WRITE_TO_READ :
+ PAGECACHE_LOCK_READ,
+ block_is_read ? PAGECACHE_PIN_LEFT_PINNED :
+ PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE)))
+ {
+ page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
+ page_link.changed= 1;
+ if (block_is_read)
+ {
+ /* Change the lock used when we read the page */
+ set_dynamic(&info->pinned_pages, (void*) &page_link,
+ info->pinned_pages.elements-1);
+ }
+ else
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ }
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Write full pages
+
+ SYNOPSIS
+ write_full_pages()
+ info Maria handler
+ lsn LSN for the undo record
+ block Where to write data
+ data Data to write
+ length Length of data
+
+ NOTES
+ Logging of the changes to the full pages are done in the caller
+ write_block_record().
+
+ RETURN
+ 0 ok
+ 1 error on write
+*/
+
+static my_bool write_full_pages(MARIA_HA *info,
+ LSN lsn,
+ MARIA_BITMAP_BLOCK *block,
+ uchar *data, ulong length)
+{
+ my_off_t page;
+ MARIA_SHARE *share= info->s;
+ uint block_size= share->block_size;
+ uint data_size= FULL_PAGE_SIZE(block_size);
+ uchar *buff= info->keyread_buff;
+ uint page_count;
+ my_off_t position;
+ DBUG_ENTER("write_full_pages");
+ DBUG_PRINT("enter", ("length: %lu page: %lu page_count: %lu",
+ (ulong) length, (ulong) block->page,
+ (ulong) block->page_count));
+ DBUG_ASSERT((block->page_count & TAIL_BIT) == 0);
+
+ info->keyread_buff_used= 1;
+ page= block->page;
+ page_count= block->page_count;
+
+ position= (my_off_t) (page + page_count) * block_size;
+ if (info->state->data_file_length < position)
+ info->state->data_file_length= position;
+
+ /* Increase data file size, if extended */
+
+ for (; length; data+= data_size)
+ {
+ uint copy_length;
+ if (!page_count--)
+ {
+ block++;
+ page= block->page;
+ page_count= block->page_count - 1;
+ DBUG_PRINT("info", ("page: %lu page_count: %lu",
+ (ulong) block->page, (ulong) block->page_count));
+
+ position= (page + page_count + 1) * block_size;
+ if (info->state->data_file_length < position)
+ info->state->data_file_length= position;
+ }
+ lsn_store(buff, lsn);
+ buff[PAGE_TYPE_OFFSET]= (uchar) BLOB_PAGE;
+ copy_length= min(data_size, length);
+ memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length);
+ length-= copy_length;
+
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ if (copy_length != data_size)
+ bzero(buff + block_size - PAGE_SUFFIX_SIZE - (data_size - copy_length),
+ (data_size - copy_length) + PAGE_SUFFIX_SIZE);
+#endif
+
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (pagecache_write(share->pagecache,
+ &info->dfile, page, 0,
+ buff, share->page_type,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, info->trn->rec_lsn))
+ DBUG_RETURN(1);
+ page++;
+ DBUG_ASSERT(block->used & BLOCKUSED_USED);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Store ranges of full pages in compact format for logging
+
+ SYNOPSIS
+ store_page_range()
+ to Store data here
+ block Where pages are to be written
+ block_size block size
+ length Length of data to be written
+ Normally this is full pages, except for the last
+ tail block that may only partly fit the last page.
+ tot_ranges Add here the number of ranges used
+
+ NOTES
+ The format of one entry is:
+
+ Ranges SUB_RANGE_SIZE
+ Empty bytes at end of last byte BLOCK_FILLER_SIZE
+ For each range
+ Page number PAGE_STORE_SIZE
+ Number of pages PAGERANGE_STORE_SIZE
+
+ RETURN
+ # end position for 'to'
+*/
+
+static uchar *store_page_range(uchar *to, MARIA_BITMAP_BLOCK *block,
+ uint block_size, ulong length,
+ uint *tot_ranges)
+{
+ uint data_size= FULL_PAGE_SIZE(block_size);
+ ulong pages_left= (length + data_size -1) / data_size;
+ uint page_count, ranges, empty_space;
+ uchar *to_start;
+ DBUG_ENTER("store_page_range");
+
+ to_start= to;
+ to+= SUB_RANGE_SIZE;
+
+ /* Store number of unused bytes at last page */
+ empty_space= pages_left * data_size - length;
+ int2store(to, empty_space);
+ to+= BLOCK_FILLER_SIZE;
+
+ ranges= 0;
+ do
+ {
+ ulonglong page;
+ page= block->page;
+ page_count= block->page_count;
+ block++;
+ if (page_count > pages_left)
+ page_count= pages_left;
+
+ page_store(to, page);
+ to+= PAGE_STORE_SIZE;
+ pagerange_store(to, page_count);
+ to+= PAGERANGE_STORE_SIZE;
+ ranges++;
+ } while ((pages_left-= page_count));
+ /* Store number of ranges for this block */
+ int2store(to_start, ranges);
+ (*tot_ranges)+= ranges;
+
+ DBUG_RETURN(to);
+}
+
+
+/*
+ Store packed extent data
+
+ SYNOPSIS
+ store_extent_info()
+ to Store first packed data here
+ row_extents_second_part Store rest here
+ first_block First block to store
+ count Number of blocks
+
+ NOTES
+ We don't have to store the position for the head block
+*/
+
+static void store_extent_info(uchar *to,
+ uchar *row_extents_second_part,
+ MARIA_BITMAP_BLOCK *first_block,
+ uint count)
+{
+ MARIA_BITMAP_BLOCK *block, *end_block;
+ uint copy_length;
+ my_bool first_found= 0;
+
+ for (block= first_block, end_block= first_block+count ;
+ block < end_block; block++)
+ {
+ /* The following is only false for marker blocks */
+ if (likely(block->used & BLOCKUSED_USED))
+ {
+ DBUG_ASSERT(block->page_count != 0);
+ page_store(to, block->page);
+ pagerange_store(to + PAGE_STORE_SIZE, block->page_count);
+ to+= ROW_EXTENT_SIZE;
+ if (!first_found)
+ {
+ first_found= 1;
+ to= row_extents_second_part;
+ }
+ }
+ }
+ copy_length= (count - 1) * ROW_EXTENT_SIZE;
+ /*
+ In some unlikely cases we have allocated to many blocks. Clear this
+ data.
+ */
+ bzero(to, (size_t) (row_extents_second_part + copy_length - to));
+}
+
+
+/*
+ Free regions of pages with logging
+
+ NOTES
+ We are removing filler events and tail page events from
+ row->extents to get smaller log.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool free_full_pages(MARIA_HA *info, MARIA_ROW *row)
+{
+ uchar log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ LSN lsn;
+ size_t extents_length;
+ uchar *extents= row->extents;
+ DBUG_ENTER("free_full_pages");
+
+ if (info->s->now_transactional)
+ {
+ /* Compact events by removing filler and tail events */
+ uchar *start= extents;
+ uchar *new_block= 0;
+ uchar *end;
+
+ for (end= extents + row->extents_count * ROW_EXTENT_SIZE ;
+ extents < end ;
+ extents+= ROW_EXTENT_SIZE)
+ {
+ uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE);
+ if (! (page_count & TAIL_BIT) && page_count != 0)
+ {
+ /* Found correct extent */
+ if (!new_block)
+ new_block= extents; /* First extent in range */
+ continue;
+ }
+ /* Found extent to remove, move everything found up */
+ if (new_block)
+ {
+ if (new_block == start)
+ start= extents;
+ else
+ {
+ size_t length= (size_t) (extents - new_block);
+ memmove(start, new_block, length);
+ start+= length;
+ }
+ }
+ new_block= 0;
+ }
+ if (new_block)
+ {
+ if (new_block == start)
+ start= extents; /* Nothing to delete */
+ else
+ {
+ /* Move rest down */
+ size_t length= (size_t) (extents - new_block);
+ memmove(start, new_block, length);
+ start+= length;
+ }
+ }
+
+ if (!unlikely(extents_length= (start - row->extents)))
+ {
+ /*
+ No ranges. This happens in the rear case when we have a allocated
+ place for a blob on a tail page but it did fit into the main page.
+ */
+ DBUG_RETURN(0);
+ }
+ row->extents_count= extents_length / ROW_EXTENT_SIZE;
+
+ pagerange_store(log_data + FILEID_STORE_SIZE,
+ row->extents_count);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= row->extents;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= extents_length;
+ if (translog_write_record(&lsn, LOGREC_REDO_FREE_BLOCKS, info->trn,
+ info, sizeof(log_data) + extents_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ }
+
+ DBUG_RETURN(_ma_bitmap_free_full_pages(info, row->extents,
+ row->extents_count));
+}
+
+
+/*
+ Free one page range
+
+ NOTES
+ This is very similar to free_full_pages()
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count)
+{
+ my_bool res= 0;
+ DBUG_ENTER("free_full_page_range");
+
+ if (pagecache_delete_pages(info->s->pagecache, &info->dfile,
+ page, count, PAGECACHE_LOCK_WRITE, 0))
+ res= 1;
+
+ if (info->s->now_transactional)
+ {
+ LSN lsn;
+ /** @todo unify log_data's shape with delete_head_or_tail() */
+ uchar log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
+ ROW_EXTENT_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ DBUG_ASSERT(info->trn->rec_lsn);
+ pagerange_store(log_data + FILEID_STORE_SIZE, 1);
+ page_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
+ page);
+ int2store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
+ PAGE_STORE_SIZE, count);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+
+ if (translog_write_record(&lsn, LOGREC_REDO_FREE_BLOCKS,
+ info->trn, info, sizeof(log_data),
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data, NULL))
+ res= 1;
+ }
+ pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+ if (_ma_bitmap_reset_full_page_bits(info, &info->s->bitmap, page, count))
+ res= 1;
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Write a record to a (set of) pages
+
+ @fn write_block_record()
+ @param info Maria handler
+ @param old_record Original record in case of update; NULL in case of
+ insert
+ @param record Record we should write
+ @param row Statistics about record (calculated by
+ calc_record_size())
+ @param map_blocks On which pages the record should be stored
+ @param row_pos Position on head page where to put head part of
+ record
+ @param undo_lsn <> LSN_ERROR if we are executing an UNDO
+ @param old_record_checksum Checksum of old_record: ignored if table does
+ not have live checksum; otherwise if
+ old_record==NULL it must be 0.
+
+ @note
+ On return all pinned pages are released.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool write_block_record(MARIA_HA *info,
+ const uchar *old_record, const uchar *record,
+ MARIA_ROW *row,
+ MARIA_BITMAP_BLOCKS *bitmap_blocks,
+ my_bool head_block_is_read,
+ struct st_row_pos_info *row_pos,
+ LSN undo_lsn,
+ ha_checksum old_record_checksum)
+{
+ uchar *data, *end_of_data, *tmp_data_used, *tmp_data;
+ uchar *row_extents_first_part, *row_extents_second_part;
+ uchar *field_length_data;
+ uchar *page_buff;
+ MARIA_BITMAP_BLOCK *block, *head_block;
+ MARIA_SHARE *share= info->s;
+ MARIA_COLUMNDEF *column, *end_column;
+ MARIA_PINNED_PAGE page_link;
+ uint block_size, flag;
+ ulong *blob_lengths;
+ my_bool row_extents_in_use, blob_full_pages_exists;
+ LSN lsn;
+ my_off_t position;
+ DBUG_ENTER("write_block_record");
+
+ LINT_INIT(row_extents_first_part);
+ LINT_INIT(row_extents_second_part);
+
+ head_block= bitmap_blocks->block;
+ block_size= share->block_size;
+
+ page_buff= row_pos->buff;
+ /* Position on head page where we should store the head part */
+ data= row_pos->data;
+ end_of_data= data + row_pos->length;
+
+ /* Write header */
+ flag= share->base.default_row_flag;
+ row_extents_in_use= 0;
+ if (unlikely(row->total_length > row_pos->length))
+ {
+ /* Need extent */
+ if (bitmap_blocks->count <= 1)
+ goto crashed; /* Wrong in bitmap */
+ flag|= ROW_FLAG_EXTENTS;
+ row_extents_in_use= 1;
+ }
+ /* For now we have only a minimum header */
+ *data++= (uchar) flag;
+ if (unlikely(flag & ROW_FLAG_NULLS_EXTENDED))
+ *data++= (uchar) (share->base.null_bytes -
+ share->base.original_null_bytes);
+ if (row_extents_in_use)
+ {
+ /* Store first extent in header */
+ store_key_length_inc(data, bitmap_blocks->count - 1);
+ row_extents_first_part= data;
+ data+= ROW_EXTENT_SIZE;
+ }
+ if (share->base.max_field_lengths)
+ store_key_length_inc(data, row->field_lengths_length);
+ if (share->calc_checksum)
+ {
+ *(data++)= (uchar) (row->checksum); /* store least significant byte */
+ DBUG_ASSERT(!((old_record_checksum != 0) && (old_record == NULL)));
+ }
+ memcpy(data, record, share->base.null_bytes);
+ data+= share->base.null_bytes;
+ memcpy(data, row->empty_bits, share->base.pack_bytes);
+ data+= share->base.pack_bytes;
+
+ /*
+ Allocate a buffer of rest of data (except blobs)
+
+ To avoid double copying of data, we copy as many columns that fits into
+ the page. The rest goes into info->packed_row.
+
+ Using an extra buffer, instead of doing continuous writes to different
+ pages, uses less code and we don't need to have to do a complex call
+ for every data segment we want to store.
+ */
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ row->head_length))
+ DBUG_RETURN(1);
+
+ tmp_data_used= 0; /* Either 0 or last used uchar in 'data' */
+ tmp_data= data;
+
+ if (row_extents_in_use)
+ {
+ uint copy_length= (bitmap_blocks->count - 2) * ROW_EXTENT_SIZE;
+ if (!tmp_data_used && tmp_data + copy_length > end_of_data)
+ {
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ row_extents_second_part= tmp_data;
+ /*
+ We will copy the extents here when we have figured out the tail
+ positions.
+ */
+ tmp_data+= copy_length;
+ }
+
+ /* Copy fields that has fixed lengths (primary key etc) */
+ for (column= share->columndef,
+ end_column= column + share->base.fixed_not_null_fields;
+ column < end_column; column++)
+ {
+ if (!tmp_data_used && tmp_data + column->length > end_of_data)
+ {
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ memcpy(tmp_data, record + column->offset, column->length);
+ tmp_data+= column->length;
+ }
+
+ /* Copy length of data for variable length fields */
+ if (!tmp_data_used && tmp_data + row->field_lengths_length > end_of_data)
+ {
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ field_length_data= row->field_lengths;
+ memcpy(tmp_data, field_length_data, row->field_lengths_length);
+ tmp_data+= row->field_lengths_length;
+
+ /* Copy variable length fields and fields with null/zero */
+ for (end_column= share->columndef + share->base.fields - share->base.blobs;
+ column < end_column ;
+ column++)
+ {
+ const uchar *field_pos;
+ ulong length;
+ if ((record[column->null_pos] & column->null_bit) ||
+ (row->empty_bits[column->empty_pos] & column->empty_bit))
+ continue;
+
+ field_pos= record + column->offset;
+ switch (column->type) {
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_SKIP_PRESPACE:
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ length= column->length;
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ /* Char that is space filled */
+ if (column->length <= 255)
+ length= (uint) (uchar) *field_length_data++;
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_length_data+= 2;
+ }
+ break;
+ case FIELD_VARCHAR:
+ if (column->length <= 256)
+ {
+ length= (uint) (uchar) *field_length_data++;
+ field_pos++; /* Skip length uchar */
+ }
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_length_data+= 2;
+ field_pos+= 2;
+ }
+ break;
+ default: /* Wrong data */
+ DBUG_ASSERT(0);
+ length=0;
+ break;
+ }
+ if (!tmp_data_used && tmp_data + length > end_of_data)
+ {
+ /* Data didn't fit in page; Change to use tmp buffer */
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ memcpy((char*) tmp_data, (char*) field_pos, length);
+ tmp_data+= length;
+ }
+
+ block= head_block + head_block->sub_blocks; /* Point to first blob data */
+
+ end_column= column + share->base.blobs;
+ blob_lengths= row->blob_lengths;
+ if (!tmp_data_used)
+ {
+ /* Still room on page; Copy as many blobs we can into this page */
+ data= tmp_data;
+ for (; column < end_column &&
+ *blob_lengths <= (ulong)(end_of_data - data);
+ column++, blob_lengths++)
+ {
+ uchar *tmp_pos;
+ uint length;
+ if (!*blob_lengths) /* Null or "" */
+ continue;
+ length= column->length - portable_sizeof_char_ptr;
+ memcpy_fixed((uchar*) &tmp_pos, record + column->offset + length,
+ sizeof(char*));
+ memcpy(data, tmp_pos, *blob_lengths);
+ data+= *blob_lengths;
+ /* Skip over tail page that was to be used to store blob */
+ block++;
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+ if (head_block->sub_blocks > 1)
+ {
+ /* We have allocated pages that where not used */
+ bitmap_blocks->page_skipped= 1;
+ }
+ }
+ else
+ data= tmp_data_used; /* Get last used on page */
+
+ {
+ /* Update page directory */
+ uint length= (uint) (data - row_pos->data);
+ DBUG_PRINT("info", ("Used head length on page: %u", length));
+ DBUG_ASSERT(data <= end_of_data);
+ if (length < info->s->base.min_block_length)
+ {
+ /* Extend row to be of size min_block_length */
+ uint diff_length= info->s->base.min_block_length - length;
+ bzero(data, diff_length);
+ data+= diff_length;
+ length= info->s->base.min_block_length;
+ }
+ int2store(row_pos->dir + 2, length);
+ /* update empty space at start of block */
+ row_pos->empty_space-= length;
+ int2store(page_buff + EMPTY_SPACE_OFFSET, row_pos->empty_space);
+ /* Mark in bitmaps how the current page was actually used */
+ head_block->empty_space= row_pos->empty_space;
+ if (page_buff[DIR_COUNT_OFFSET] == MAX_ROWS_PER_PAGE)
+ head_block->empty_space= 0; /* Page is full */
+ head_block->used= BLOCKUSED_USED;
+ }
+
+ /*
+ Now we have to write tail pages, as we need to store the position
+ to them in the row extent header.
+
+ We first write out all blob tails, to be able to store them in
+ the current page or 'tmp_data'.
+
+ Then we write the tail of the non-blob fields (The position to the
+ tail page is stored either in row header, the extents in the head
+ page or in the first full page of the non-blob data. It's never in
+ the tail page of the non-blob data)
+ */
+
+ blob_full_pages_exists= 0;
+ if (row_extents_in_use)
+ {
+ if (column != end_column) /* If blob fields */
+ {
+ MARIA_COLUMNDEF *save_column= column;
+ MARIA_BITMAP_BLOCK *save_block= block;
+ MARIA_BITMAP_BLOCK *end_block;
+ ulong *save_blob_lengths= blob_lengths;
+
+ for (; column < end_column; column++, blob_lengths++)
+ {
+ uchar *blob_pos;
+ if (!*blob_lengths) /* Null or "" */
+ continue;
+ if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL)
+ {
+ uint length;
+ length= column->length - portable_sizeof_char_ptr;
+ memcpy_fixed((uchar *) &blob_pos, record + column->offset + length,
+ sizeof(char*));
+ length= *blob_lengths % FULL_PAGE_SIZE(block_size); /* tail size */
+ if (length != *blob_lengths)
+ blob_full_pages_exists= 1;
+ if (write_tail(info, block + block->sub_blocks-1,
+ blob_pos + *blob_lengths - length,
+ length))
+ goto disk_err;
+ }
+ else
+ blob_full_pages_exists= 1;
+
+ for (end_block= block + block->sub_blocks; block < end_block; block++)
+ {
+ /*
+ Set only a bit, to not cause bitmap code to believe a block is full
+ when there is still a lot of entries in it
+ */
+ block->used|= BLOCKUSED_USED;
+ }
+ }
+ column= save_column;
+ block= save_block;
+ blob_lengths= save_blob_lengths;
+ }
+
+ if (tmp_data_used) /* non blob data overflows */
+ {
+ MARIA_BITMAP_BLOCK *cur_block, *end_block, *last_head_block;
+ MARIA_BITMAP_BLOCK *head_tail_block= 0;
+ ulong length;
+ ulong data_length= (tmp_data - info->rec_buff);
+
+#ifdef SANITY_CHECKS
+ if (head_block->sub_blocks == 1)
+ goto crashed; /* no reserved full or tails */
+#endif
+ /*
+ Find out where to write tail for non-blob fields.
+
+ Problem here is that the bitmap code may have allocated more
+ space than we need. We have to handle the following cases:
+
+ - Bitmap code allocated a tail page we don't need.
+ - The last full page allocated needs to be changed to a tail page
+ (Because we where able to put more data on the head page than
+ the bitmap allocation assumed)
+
+ The reserved pages in bitmap_blocks for the main page has one of
+ the following allocations:
+ - Full pages, with following blocks:
+ # * full pages
+ empty page ; To be used if we change last full to tail page. This
+ has 'count' = 0.
+ tail page (optional, if last full page was part full)
+ - One tail page
+ */
+
+ cur_block= head_block + 1;
+ end_block= head_block + head_block->sub_blocks;
+ /*
+ Loop until we have find a block bigger than we need or
+ we find the empty page block.
+ */
+ while (data_length >= (length= (cur_block->page_count *
+ FULL_PAGE_SIZE(block_size))) &&
+ cur_block->page_count)
+ {
+#ifdef SANITY_CHECKS
+ if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_USED))
+ goto crashed;
+#endif
+ data_length-= length;
+ (cur_block++)->used= BLOCKUSED_USED;
+ }
+ last_head_block= cur_block;
+ if (data_length)
+ {
+ if (cur_block->page_count == 0)
+ {
+ /* Skip empty filler block */
+ cur_block++;
+ }
+#ifdef SANITY_CHECKS
+ if ((cur_block >= end_block))
+ goto crashed;
+#endif
+ if (cur_block->used & BLOCKUSED_TAIL)
+ {
+ DBUG_ASSERT(data_length < MAX_TAIL_SIZE(block_size));
+ /* tail written to full tail page */
+ cur_block->used= BLOCKUSED_USED;
+ head_tail_block= cur_block;
+ }
+ else if (data_length > length - MAX_TAIL_SIZE(block_size))
+ {
+ /* tail written to full page */
+ cur_block->used= BLOCKUSED_USED;
+ if ((cur_block != end_block - 1) &&
+ (end_block[-1].used & BLOCKUSED_TAIL))
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+ else
+ {
+ /*
+ cur_block is a full block, followed by an empty and optional
+ tail block. Change cur_block to a tail block or split it
+ into full blocks and tail blocks.
+
+ TODO:
+ If there is enough space on the following tail block, use
+ this instead of creating a new tail block.
+ */
+ DBUG_ASSERT(cur_block[1].page_count == 0);
+ if (cur_block->page_count == 1)
+ {
+ /* convert full block to tail block */
+ cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL;
+ head_tail_block= cur_block;
+ }
+ else
+ {
+ DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(block_size));
+ DBUG_PRINT("info", ("Splitting blocks into full and tail"));
+ cur_block[1].page= (cur_block->page + cur_block->page_count - 1);
+ cur_block[1].page_count= 1; /* Avoid DBUG_ASSERT */
+ cur_block[1].used= BLOCKUSED_USED | BLOCKUSED_TAIL;
+ cur_block->page_count--;
+ cur_block->used= BLOCKUSED_USED;
+ last_head_block= head_tail_block= cur_block+1;
+ }
+ if (end_block[-1].used & BLOCKUSED_TAIL)
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+ }
+ else
+ {
+ /* Must be an empty or tail page */
+ DBUG_ASSERT(cur_block->page_count == 0 ||
+ cur_block->used & BLOCKUSED_TAIL);
+ if (end_block[-1].used & BLOCKUSED_TAIL)
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+
+ /*
+ Write all extents into page or tmp_data
+
+ Note that we still don't have a correct position for the tail
+ of the non-blob fields.
+ */
+ store_extent_info(row_extents_first_part,
+ row_extents_second_part,
+ head_block+1, bitmap_blocks->count - 1);
+ if (head_tail_block)
+ {
+ ulong block_length= (tmp_data - info->rec_buff);
+ uchar *extent_data;
+
+ length= (uint) (block_length % FULL_PAGE_SIZE(block_size));
+ if (write_tail(info, head_tail_block,
+ info->rec_buff + block_length - length,
+ length))
+ goto disk_err;
+ tmp_data-= length; /* Remove the tail */
+ if (tmp_data == info->rec_buff)
+ {
+ /* We have no full blocks to write for the head part */
+ tmp_data_used= 0;
+ }
+
+ /* Store the tail position for the non-blob fields */
+ if (head_tail_block == head_block + 1)
+ {
+ /*
+ We had a head block + tail block, which means that the
+ tail block is the first extent
+ */
+ extent_data= row_extents_first_part;
+ }
+ else
+ {
+ /*
+ We have a head block + some full blocks + tail block
+ last_head_block is pointing after the last used extent
+ for the head block.
+ */
+ extent_data= row_extents_second_part +
+ ((last_head_block - head_block) - 2) * ROW_EXTENT_SIZE;
+ }
+ DBUG_ASSERT(uint2korr(extent_data+5) & TAIL_BIT);
+ page_store(extent_data, head_tail_block->page);
+ int2store(extent_data + PAGE_STORE_SIZE, head_tail_block->page_count);
+ }
+ }
+ else
+ store_extent_info(row_extents_first_part,
+ row_extents_second_part,
+ head_block+1, bitmap_blocks->count - 1);
+ }
+
+ if (share->now_transactional)
+ {
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ size_t block_length= (size_t) (data - row_pos->data);
+
+ /* Log REDO changes of head page */
+ page_store(log_data + FILEID_STORE_SIZE, head_block->page);
+ dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE,
+ row_pos->rownr);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) row_pos->data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= block_length;
+ if (translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_HEAD, info->trn,
+ info, sizeof(log_data) + block_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ goto disk_err;
+ }
+
+#ifdef RECOVERY_EXTRA_DEBUG
+ if (info->trn->undo_lsn != LSN_IMPOSSIBLE)
+ {
+ /* Stop right after the REDO; testing incomplete log record groups */
+ DBUG_EXECUTE_IF("maria_flush_whole_log",
+ {
+ DBUG_PRINT("maria_flush_whole_log", ("now"));
+ translog_flush(translog_get_horizon());
+ });
+ DBUG_EXECUTE_IF("maria_crash",
+ {
+ DBUG_PRINT("maria_crash", ("now"));
+ fflush(DBUG_FILE);
+ abort();
+ });
+ }
+#endif
+
+ /* Increase data file size, if extended */
+ position= (my_off_t) head_block->page * block_size;
+ if (info->state->data_file_length <= position)
+ info->state->data_file_length= position + block_size;
+
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (pagecache_write(share->pagecache,
+ &info->dfile, head_block->page, 0,
+ page_buff, share->page_type,
+ head_block_is_read ? PAGECACHE_LOCK_WRITE_TO_READ :
+ PAGECACHE_LOCK_READ,
+ head_block_is_read ? PAGECACHE_PIN_LEFT_PINNED :
+ PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE))
+ goto disk_err;
+ page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
+ page_link.changed= 1;
+ if (head_block_is_read)
+ {
+ /* Head page is always the first pinned page */
+ set_dynamic(&info->pinned_pages, (void*) &page_link, 0);
+ }
+ else
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+
+ if (share->now_transactional && (tmp_data_used || blob_full_pages_exists))
+ {
+ /*
+ Log REDO writes for all full pages (head part and all blobs)
+ We write all here to be able to generate the UNDO record early
+ so that we can write the LSN for the UNDO record to all full pages.
+ */
+ uchar tmp_log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
+ (ROW_EXTENT_SIZE + BLOCK_FILLER_SIZE + SUB_RANGE_SIZE) *
+ ROW_EXTENTS_ON_STACK];
+ uchar *log_data, *log_pos;
+ LEX_STRING tmp_log_array[TRANSLOG_INTERNAL_PARTS + 2 +
+ ROW_EXTENTS_ON_STACK];
+ LEX_STRING *log_array_pos, *log_array;
+ int error;
+ ulong log_entry_length= 0;
+ uint ext_length, extents= 0, sub_extents= 0;
+
+ /* If few extents, then allocate things on stack to avoid a malloc call */
+ if (bitmap_blocks->count < ROW_EXTENTS_ON_STACK)
+ {
+ log_array= tmp_log_array;
+ log_data= tmp_log_data;
+ }
+ else
+ {
+ if (my_multi_malloc(MY_WME, &log_array,
+ (uint) ((bitmap_blocks->count +
+ TRANSLOG_INTERNAL_PARTS + 2) *
+ sizeof(*log_array)),
+ &log_data, FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
+ bitmap_blocks->count * (ROW_EXTENT_SIZE +
+ BLOCK_FILLER_SIZE +
+ SUB_RANGE_SIZE),
+ NullS))
+ goto disk_err;
+ }
+ log_pos= log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE * 2;
+ log_array_pos= log_array+ TRANSLOG_INTERNAL_PARTS+1;
+
+ if (tmp_data_used)
+ {
+ /* Full head page */
+ size_t block_length= (ulong) (tmp_data - info->rec_buff);
+ log_pos= store_page_range(log_pos, head_block+1, block_size,
+ block_length, &extents);
+ log_array_pos->str= (char*) info->rec_buff;
+ log_array_pos->length= block_length;
+ log_entry_length+= block_length;
+ log_array_pos++;
+ sub_extents++;
+ }
+ if (blob_full_pages_exists)
+ {
+ MARIA_COLUMNDEF *tmp_column= column;
+ ulong *tmp_blob_lengths= blob_lengths;
+ MARIA_BITMAP_BLOCK *tmp_block= block;
+
+ /* Full blob pages */
+ for (; tmp_column < end_column; tmp_column++, tmp_blob_lengths++)
+ {
+ ulong blob_length;
+ uint length;
+
+ if (!*tmp_blob_lengths) /* Null or "" */
+ continue;
+ length= tmp_column->length - portable_sizeof_char_ptr;
+ blob_length= *tmp_blob_lengths;
+ /*
+ If last part of blog was on tail page, change blob_length to
+ reflect this
+ */
+ if (tmp_block[tmp_block->sub_blocks - 1].used & BLOCKUSED_TAIL)
+ blob_length-= (blob_length % FULL_PAGE_SIZE(block_size));
+ if (blob_length)
+ {
+ memcpy_fixed((uchar*) &log_array_pos->str,
+ record + column->offset + length,
+ sizeof(uchar*));
+ log_array_pos->length= blob_length;
+ log_entry_length+= blob_length;
+ log_array_pos++;
+ sub_extents++;
+
+ log_pos= store_page_range(log_pos, tmp_block, block_size,
+ blob_length, &extents);
+ tmp_block+= tmp_block->sub_blocks;
+ }
+ }
+ }
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ ext_length= (uint) (log_pos - log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= ext_length;
+ pagerange_store(log_data+ FILEID_STORE_SIZE, extents);
+ pagerange_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
+ sub_extents);
+
+ log_entry_length+= ext_length;
+ /* trn->rec_lsn is already set earlier in this function */
+ error= translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_BLOBS,
+ info->trn, info, log_entry_length,
+ (uint) (log_array_pos - log_array),
+ log_array, log_data, NULL);
+ if (log_array != tmp_log_array)
+ my_free(log_array, MYF(0));
+ if (error)
+ goto disk_err;
+ }
+
+ /* Write UNDO or CLR record */
+ lsn= LSN_IMPOSSIBLE;
+ if (share->now_transactional)
+ {
+ LEX_STRING *log_array= info->log_row_parts;
+
+ if (undo_lsn != LSN_ERROR)
+ {
+ /*
+ Store if this CLR is about UNDO_DELETE or UNDO_UPDATE;
+ in the first case, Recovery, when it sees the CLR_END in the
+ REDO phase, may decrement the records' count.
+ */
+ if (_ma_write_clr(info, undo_lsn,
+ old_record ? LOGREC_UNDO_ROW_UPDATE :
+ LOGREC_UNDO_ROW_DELETE,
+ share->calc_checksum != 0,
+ row->checksum - old_record_checksum,
+ &lsn, (void*) 0))
+ goto disk_err;
+ }
+ else
+ {
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE +
+ HA_CHECKSUM_STORE_SIZE];
+ ha_checksum checksum_delta;
+
+ /* LOGREC_UNDO_ROW_INSERT & LOGREC_UNDO_ROW_UPDATE share same header */
+ lsn_store(log_data, info->trn->undo_lsn);
+ page_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
+ head_block->page);
+ dirpos_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE,
+ row_pos->rownr);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
+ sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
+ store_checksum_in_rec(share, checksum_delta,
+ row->checksum - old_record_checksum,
+ log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length);
+ compile_time_assert(sizeof(ha_checksum) == HA_CHECKSUM_STORE_SIZE);
+
+ if (!old_record)
+ {
+ /* Store undo_lsn in case we are aborting the insert */
+ row->orig_undo_lsn= info->trn->undo_lsn;
+ /* Write UNDO log record for the INSERT */
+ if (translog_write_record(&lsn, LOGREC_UNDO_ROW_INSERT,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length,
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data + LSN_STORE_SIZE, &checksum_delta))
+ goto disk_err;
+ }
+ else
+ {
+ /* Write UNDO log record for the UPDATE */
+ size_t row_length;
+ uint row_parts_count;
+ row_length= fill_update_undo_parts(info, old_record, record,
+ log_array +
+ TRANSLOG_INTERNAL_PARTS + 1,
+ &row_parts_count);
+ if (translog_write_record(&lsn, LOGREC_UNDO_ROW_UPDATE, info->trn,
+ info, log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + row_length,
+ TRANSLOG_INTERNAL_PARTS + 1 +
+ row_parts_count, log_array,
+ log_data + LSN_STORE_SIZE, &checksum_delta))
+ goto disk_err;
+ }
+ }
+ }
+ /* Release not used space in used pages */
+ if (_ma_bitmap_release_unused(info, bitmap_blocks))
+ goto disk_err;
+ _ma_unpin_all_pages(info, lsn);
+
+ if (tmp_data_used)
+ {
+ /*
+ Write data stored in info->rec_buff to pages
+ This is the char/varchar data that didn't fit into the head page.
+ */
+ DBUG_ASSERT(bitmap_blocks->count != 0);
+ if (write_full_pages(info, info->trn->undo_lsn, head_block + 1,
+ info->rec_buff, (ulong) (tmp_data - info->rec_buff)))
+ goto disk_err;
+ }
+
+ /* Write rest of blobs (data, but no tails as they are already written) */
+ for (; column < end_column; column++, blob_lengths++)
+ {
+ uchar *blob_pos;
+ uint length;
+ ulong blob_length;
+ if (!*blob_lengths) /* Null or "" */
+ continue;
+ length= column->length - portable_sizeof_char_ptr;
+ memcpy_fixed((uchar*) &blob_pos, record + column->offset + length,
+ sizeof(char*));
+ /* remove tail part */
+ blob_length= *blob_lengths;
+ if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL)
+ blob_length-= (blob_length % FULL_PAGE_SIZE(block_size));
+
+ if (blob_length && write_full_pages(info, info->trn->undo_lsn, block,
+ blob_pos, blob_length))
+ goto disk_err;
+ block+= block->sub_blocks;
+ }
+
+ _ma_finalize_row(info);
+ DBUG_RETURN(0);
+
+crashed:
+ /* Something was wrong with data on page */
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+
+disk_err:
+ /**
+ @todo RECOVERY we are going to let dirty pages go to disk while we have
+ logged UNDO, this violates WAL. We must mark the table corrupted!
+
+ @todo RECOVERY we have written some REDOs without a closing UNDO,
+ it's possible that a next operation by this transaction succeeds and then
+ Recovery would glue the "orphan REDOs" to the succeeded operation and
+ execute the failed REDOs. We need some mark "abort this group" in the
+ log, or mark the table corrupted (then user will repair it and thus REDOs
+ will be skipped).
+
+ @todo RECOVERY to not let write errors go unnoticed, pagecache_write()
+ should take a MARIA_HA* in argument, and it it
+ fails when flushing a page to disk it should call
+ (*the_maria_ha->write_error_func)(the_maria_ha)
+ and this hook will mark the table corrupted.
+ Maybe hook should be stored in the pagecache's block structure, or in a
+ hash "file->maria_ha*".
+
+ @todo RECOVERY we should distinguish below between log write error and
+ table write error. The former should stop Maria immediately, the latter
+ should mark the table corrupted.
+ */
+ /*
+ Unpin all pinned pages to not cause problems for disk cache. This is
+ safe to call even if we already called _ma_unpin_all_pages() above.
+ */
+ _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
+
+ DBUG_RETURN(1);
+}
+
+
+/*
+ @brief Write a record
+
+ @fn allocate_and_write_block_record()
+ @param info Maria handler
+ @param record Record to write
+ @param row Information about fields in 'record'
+ @param undo_lsn <> LSN_ERROR if we are executing an UNDO
+
+ @return
+ @retval 0 ok
+ @retval 1 Error
+*/
+
+static my_bool allocate_and_write_block_record(MARIA_HA *info,
+ const uchar *record,
+ MARIA_ROW *row,
+ LSN undo_lsn)
+{
+ struct st_row_pos_info row_pos;
+ MARIA_BITMAP_BLOCKS *blocks= &row->insert_blocks;
+ DBUG_ENTER("allocate_and_write_block_record");
+
+ _ma_bitmap_flushable(info->s, 1);
+ if (_ma_bitmap_find_place(info, row, blocks))
+ goto err; /* Error reading bitmap */
+
+ /*
+ Sleep; a checkpoint will happen and should not send this over-allocated
+ bitmap to disk but rather wait.
+ */
+ DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(10););
+
+ /* page will be pinned & locked by get_head_or_tail_page */
+ if (get_head_or_tail_page(info, blocks->block, info->buff,
+ row->space_on_head_page, HEAD_PAGE,
+ PAGECACHE_LOCK_WRITE, &row_pos))
+ goto err;
+ row->lastpos= ma_recordpos(blocks->block->page, row_pos.rownr);
+ if (info->s->calc_checksum)
+ {
+ if (undo_lsn == LSN_ERROR)
+ row->checksum= (info->s->calc_checksum)(info, record);
+ else
+ {
+ /* _ma_apply_undo_row_delete() already set row's checksum. Verify it. */
+ DBUG_ASSERT(row->checksum == (info->s->calc_checksum)(info, record));
+ }
+ }
+ if (write_block_record(info, (uchar*) 0, record, row,
+ blocks, blocks->block->org_bitmap_value != 0,
+ &row_pos, undo_lsn, 0))
+ goto err; /* Error reading bitmap */
+ DBUG_PRINT("exit", ("Rowid: %lu (%lu:%u)", (ulong) row->lastpos,
+ (ulong) ma_recordpos_to_page(row->lastpos),
+ ma_recordpos_to_dir_entry(row->lastpos)));
+ /* Now let checkpoint happen but don't commit */
+ DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(1000););
+ DBUG_RETURN(0);
+err:
+ _ma_bitmap_flushable(info->s, -1);
+ _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Write a record and return rowid for it
+
+ SYNOPSIS
+ _ma_write_init_block_record()
+ info Maria handler
+ record Record to write
+
+ NOTES
+ This is done BEFORE we write the keys to the row!
+
+ RETURN
+ HA_OFFSET_ERROR Something went wrong
+ # Rowid for row
+*/
+
+MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info,
+ const uchar *record)
+{
+ DBUG_ENTER("_ma_write_init_block_record");
+
+ calc_record_size(info, record, &info->cur_row);
+ if (allocate_and_write_block_record(info, record,
+ &info->cur_row, LSN_ERROR))
+ DBUG_RETURN(HA_OFFSET_ERROR);
+ DBUG_RETURN(info->cur_row.lastpos);
+}
+
+
+/*
+ Dummy function for (*info->s->write_record)()
+
+ Nothing to do here, as we already wrote the record in
+ _ma_write_init_block_record()
+*/
+
+my_bool _ma_write_block_record(MARIA_HA *info __attribute__ ((unused)),
+ const uchar *record __attribute__ ((unused)))
+{
+ return 0; /* Row already written */
+}
+
+
+/**
+ @brief Remove row written by _ma_write_block_record() and log undo
+
+ @param info Maria handler
+
+ @note
+ This is called in case we got a duplicate unique key while
+ writing keys.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool _ma_write_abort_block_record(MARIA_HA *info)
+{
+ my_bool res= 0;
+ MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks;
+ MARIA_BITMAP_BLOCK *block, *end;
+ LSN lsn= LSN_IMPOSSIBLE;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_write_abort_block_record");
+
+ _ma_bitmap_flushable(share, 1);
+ if (delete_head_or_tail(info,
+ ma_recordpos_to_page(info->cur_row.lastpos),
+ ma_recordpos_to_dir_entry(info->cur_row.lastpos), 1,
+ 0))
+ res= 1;
+ for (block= blocks->block + 1, end= block + blocks->count - 1; block < end;
+ block++)
+ {
+ if (block->used & BLOCKUSED_TAIL)
+ {
+ /*
+ block->page_count is set to the tail directory entry number in
+ write_block_record()
+ */
+ if (delete_head_or_tail(info, block->page, block->page_count & ~TAIL_BIT,
+ 0, 0))
+ res= 1;
+ }
+ else if (block->used & BLOCKUSED_USED)
+ {
+ if (free_full_page_range(info, block->page, block->page_count))
+ res= 1;
+ }
+ }
+
+ if (share->now_transactional)
+ {
+ if (_ma_write_clr(info, info->cur_row.orig_undo_lsn,
+ LOGREC_UNDO_ROW_INSERT,
+ share->calc_checksum != 0,
+ -info->cur_row.checksum,
+ &lsn, (void*) 0))
+ res= 1;
+ }
+ _ma_bitmap_flushable(share, -1);
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Update a record
+
+ NOTES
+ For the moment, we assume that info->curr_row.extents is always updated
+ when a row is read. In the future we may decide to read this on demand
+ for rows split into many extents.
+*/
+
+static my_bool _ma_update_block_record2(MARIA_HA *info,
+ MARIA_RECORD_POS record_pos,
+ const uchar *oldrec,
+ const uchar *record,
+ LSN undo_lsn)
+{
+ MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks;
+ uchar *buff;
+ MARIA_ROW *cur_row= &info->cur_row, *new_row= &info->new_row;
+ MARIA_PINNED_PAGE page_link;
+ uint rownr, org_empty_size, head_length;
+ uint block_size= info->s->block_size;
+ uchar *dir;
+ ulonglong page;
+ struct st_row_pos_info row_pos;
+ my_bool res;
+ ha_checksum old_checksum;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_update_block_record2");
+ DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos));
+
+#ifdef ENABLE_IF_PROBLEM_WITH_UPDATE
+ DBUG_DUMP("oldrec", oldrec, share->base.reclength);
+ DBUG_DUMP("newrec", record, share->base.reclength);
+#endif
+
+ /*
+ Checksums of new and old rows were computed by callers already; new
+ row's was put into cur_row, old row's was put into new_row.
+ */
+ old_checksum= new_row->checksum;
+ new_row->checksum= cur_row->checksum;
+ calc_record_size(info, record, new_row);
+ page= ma_recordpos_to_page(record_pos);
+
+ _ma_bitmap_flushable(share, 1);
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (!(buff= pagecache_read(share->pagecache,
+ &info->dfile, (pgcache_page_no_t) page, 0,
+ info->buff, share->page_type,
+ PAGECACHE_LOCK_WRITE, &page_link.link)))
+ goto err;
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+
+ org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET);
+ rownr= ma_recordpos_to_dir_entry(record_pos);
+ dir= dir_entry_pos(buff, block_size, rownr);
+
+ if ((org_empty_size + cur_row->head_length) >= new_row->total_length)
+ {
+ uint rec_offset, length;
+ MARIA_BITMAP_BLOCK block;
+
+ /*
+ We can fit the new row in the same page as the original head part
+ of the row
+ */
+ block.org_bitmap_value= _ma_free_size_to_head_pattern(&share->bitmap,
+ org_empty_size);
+
+ if (extend_area_on_page(buff, dir, rownr, share->block_size,
+ new_row->total_length, &org_empty_size,
+ &rec_offset, &length))
+ goto err;
+
+ row_pos.buff= buff;
+ row_pos.rownr= rownr;
+ row_pos.empty_space= org_empty_size;
+ row_pos.dir= dir;
+ row_pos.data= buff + rec_offset;
+ row_pos.length= length;
+ blocks->block= &block;
+ blocks->count= 1;
+ block.page= page;
+ block.sub_blocks= 1;
+ block.used= BLOCKUSED_USED | BLOCKUSED_USE_ORG_BITMAP;
+ block.empty_space= row_pos.empty_space;
+ /* Update cur_row, if someone calls update at once again */
+ cur_row->head_length= new_row->total_length;
+
+ if (*cur_row->tail_positions &&
+ delete_tails(info, cur_row->tail_positions))
+ goto err;
+ if (cur_row->extents_count && free_full_pages(info, cur_row))
+ goto err;
+ res= write_block_record(info, oldrec, record, new_row, blocks,
+ 1, &row_pos, undo_lsn, old_checksum);
+ DBUG_RETURN(res);
+ }
+ /*
+ Allocate all size in block for record
+ TODO:
+ Need to improve this to do compact if we can fit one more blob into
+ the head page
+ */
+ head_length= uint2korr(dir + 2);
+ if ((buff[PAGE_TYPE_OFFSET] & PAGE_CAN_BE_COMPACTED) && org_empty_size &&
+ (head_length < new_row->head_length ||
+ (new_row->total_length <= head_length &&
+ org_empty_size + head_length >= new_row->total_length)))
+ {
+ compact_page(buff, share->block_size, rownr, 1);
+ org_empty_size= 0;
+ head_length= uint2korr(dir + 2);
+ }
+
+ /* Delete old row */
+ if (*cur_row->tail_positions && delete_tails(info, cur_row->tail_positions))
+ goto err;
+ if (cur_row->extents_count && free_full_pages(info, cur_row))
+ goto err;
+ if (_ma_bitmap_find_new_place(info, new_row, page, head_length, blocks))
+ goto err;
+
+ row_pos.buff= buff;
+ row_pos.rownr= rownr;
+ row_pos.empty_space= org_empty_size + head_length;
+ row_pos.dir= dir;
+ row_pos.data= buff + uint2korr(dir);
+ row_pos.length= head_length;
+ res= write_block_record(info, oldrec, record, new_row, blocks, 1,
+ &row_pos, undo_lsn, old_checksum);
+ DBUG_RETURN(res);
+
+err:
+ _ma_bitmap_flushable(share, -1);
+ _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
+ DBUG_RETURN(1);
+}
+
+
+/* Wrapper for _ma_update_block_record2() used by ma_update() */
+
+my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos,
+ const uchar *orig_rec, const uchar *new_rec)
+{
+ return _ma_update_block_record2(info, record_pos, orig_rec, new_rec,
+ LSN_ERROR);
+}
+
+
+/*
+ Delete a directory entry
+
+ SYNOPSIS
+ delete_dir_entry()
+ buff Page buffer
+ block_size Block size
+ record_number Record number to delete
+ empty_space Empty space on page after delete
+
+ RETURN
+ -1 Error on page
+ 0 ok
+ 1 Page is now empty
+*/
+
+static int delete_dir_entry(uchar *buff, uint block_size, uint record_number,
+ uint *empty_space_res)
+{
+ uint number_of_records= (uint) buff[DIR_COUNT_OFFSET];
+ uint length, empty_space;
+ uchar *dir;
+ DBUG_ENTER("delete_dir_entry");
+
+#ifdef SANITY_CHECKS
+ if (record_number >= number_of_records ||
+ record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 -
+ PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE))
+ {
+ DBUG_PRINT("error", ("record_number: %u number_of_records: %u",
+ record_number, number_of_records));
+
+ DBUG_RETURN(-1);
+ }
+#endif
+
+ empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+ dir= dir_entry_pos(buff, block_size, record_number);
+ length= uint2korr(dir + 2);
+
+ if (record_number == number_of_records - 1)
+ {
+ /* Delete this entry and all following free directory entries */
+ uchar *end= buff + block_size - PAGE_SUFFIX_SIZE;
+ number_of_records--;
+ dir+= DIR_ENTRY_SIZE;
+ empty_space+= DIR_ENTRY_SIZE;
+
+ /* Unlink and free the next empty ones */
+ while (dir < end && dir[0] == 0 && dir[1] == 0)
+ {
+ number_of_records--;
+ if (dir[2] == END_OF_DIR_FREE_LIST)
+ buff[DIR_FREE_OFFSET]= dir[3];
+ else
+ {
+ uchar *prev_entry= dir_entry_pos(buff, block_size, (uint) dir[2]);
+ DBUG_ASSERT(uint2korr(prev_entry) == 0 && prev_entry[3] ==
+ number_of_records);
+ prev_entry[3]= dir[3];
+ }
+ if (dir[3] != END_OF_DIR_FREE_LIST)
+ {
+ uchar *next_entry= dir_entry_pos(buff, block_size, (uint) dir[3]);
+ DBUG_ASSERT(uint2korr(next_entry) == 0 && next_entry[2] ==
+ number_of_records);
+ next_entry[2]= dir[2];
+ }
+ dir+= DIR_ENTRY_SIZE;
+ empty_space+= DIR_ENTRY_SIZE;
+ }
+
+ if (number_of_records == 0)
+ {
+ /* All entries on page deleted */
+ DBUG_PRINT("info", ("Page marked as unallocated"));
+ buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE;
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ {
+ dir= dir_entry_pos(buff, block_size, record_number);
+ bzero(dir, (record_number+1) * DIR_ENTRY_SIZE);
+ }
+#endif
+ *empty_space_res= block_size;
+ DBUG_RETURN(1);
+ }
+ buff[DIR_COUNT_OFFSET]= (uchar) number_of_records;
+ }
+ else
+ {
+ /* Update directory */
+ dir[0]= dir[1]= 0;
+ dir[2]= END_OF_DIR_FREE_LIST;
+ if ((dir[3]= buff[DIR_FREE_OFFSET]) != END_OF_DIR_FREE_LIST)
+ {
+ /* Relink next entry to point to newly freed entry */
+ uchar *next_entry= dir_entry_pos(buff, block_size, (uint) dir[3]);
+ DBUG_ASSERT(uint2korr(next_entry) == 0 &&
+ next_entry[2] == END_OF_DIR_FREE_LIST);
+ next_entry[2]= record_number;
+ }
+ buff[DIR_FREE_OFFSET]= record_number;
+ }
+ empty_space+= length;
+
+ int2store(buff + EMPTY_SPACE_OFFSET, empty_space);
+ buff[PAGE_TYPE_OFFSET]|= (uchar) PAGE_CAN_BE_COMPACTED;
+
+ *empty_space_res= empty_space;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Delete a head a tail part
+
+ SYNOPSIS
+ delete_head_or_tail()
+ info Maria handler
+ page Page (not file offset!) on which the row is
+ head 1 if this is a head page
+ from_update 1 if we are called from update. In this case we
+ leave the page as write locked as we may put
+ the new row into the old position.
+
+ NOTES
+ Uses info->keyread_buff
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool delete_head_or_tail(MARIA_HA *info,
+ ulonglong page, uint record_number,
+ my_bool head, my_bool from_update)
+{
+ MARIA_SHARE *share= info->s;
+ uint empty_space;
+ uint block_size= share->block_size;
+ uchar *buff;
+ LSN lsn;
+ MARIA_PINNED_PAGE page_link;
+ int res;
+ enum pagecache_page_lock lock_at_write, lock_at_unpin;
+ DBUG_ENTER("delete_head_or_tail");
+
+ info->keyread_buff_used= 1;
+ DBUG_ASSERT(info->s->pagecache->block_size == block_size);
+ if (!(buff= pagecache_read(share->pagecache,
+ &info->dfile, page, 0,
+ info->keyread_buff,
+ info->s->page_type,
+ PAGECACHE_LOCK_WRITE, &page_link.link)))
+ DBUG_RETURN(1);
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+
+ if (from_update)
+ {
+ lock_at_write= PAGECACHE_LOCK_LEFT_WRITELOCKED;
+ lock_at_unpin= PAGECACHE_LOCK_WRITE_UNLOCK;
+ }
+ else
+ {
+ lock_at_write= PAGECACHE_LOCK_WRITE_TO_READ;
+ lock_at_unpin= PAGECACHE_LOCK_READ_UNLOCK;
+ }
+
+ res= delete_dir_entry(buff, block_size, record_number, &empty_space);
+ if (res < 0)
+ DBUG_RETURN(1);
+ if (res == 0) /* after our deletion, page is still not empty */
+ {
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ if (info->s->now_transactional)
+ {
+ /* Log REDO data */
+ page_store(log_data + FILEID_STORE_SIZE, page);
+ dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE,
+ record_number);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD :
+ LOGREC_REDO_PURGE_ROW_TAIL),
+ info->trn, info, sizeof(log_data),
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ }
+ if (pagecache_write(share->pagecache,
+ &info->dfile, page, 0,
+ buff, share->page_type,
+ lock_at_write,
+ PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE))
+ DBUG_RETURN(1);
+ }
+ else /* page is now empty */
+ {
+ if (info->s->now_transactional)
+ {
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ page_store(log_data + FILEID_STORE_SIZE, page);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ if (translog_write_record(&lsn, LOGREC_REDO_FREE_HEAD_OR_TAIL,
+ info->trn, info, sizeof(log_data),
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ }
+ /* Write the empty page (needed only for REPAIR to work) */
+ if (pagecache_write(share->pagecache,
+ &info->dfile, page, 0,
+ buff, share->page_type,
+ lock_at_write,
+ PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE))
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]);
+ }
+ /* The page is pinned with a read lock */
+ page_link.unlock= lock_at_unpin;
+ page_link.changed= 1;
+ set_dynamic(&info->pinned_pages, (void*) &page_link,
+ info->pinned_pages.elements-1);
+
+ DBUG_PRINT("info", ("empty_space: %u", empty_space));
+ DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space));
+}
+
+
+/*
+ delete all tails
+
+ SYNOPSIS
+ delete_tails()
+ info Handler
+ tails Pointer to vector of tail positions, ending with 0
+
+ NOTES
+ Uses info->keyread_buff
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails)
+{
+ my_bool res= 0;
+ DBUG_ENTER("delete_tails");
+ for (; *tails; tails++)
+ {
+ if (delete_head_or_tail(info,
+ ma_recordpos_to_page(*tails),
+ ma_recordpos_to_dir_entry(*tails), 0, 1))
+ res= 1;
+ }
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Delete a record
+
+ NOTES
+ For the moment, we assume that info->cur_row.extents is always updated
+ when a row is read. In the future we may decide to read this on demand
+ for rows with many splits.
+*/
+
+my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
+{
+ ulonglong page;
+ uint record_number;
+ MARIA_SHARE *share= info->s;
+ LSN lsn= LSN_IMPOSSIBLE;
+ DBUG_ENTER("_ma_delete_block_record");
+
+ page= ma_recordpos_to_page(info->cur_row.lastpos);
+ record_number= ma_recordpos_to_dir_entry(info->cur_row.lastpos);
+ DBUG_PRINT("enter", ("Rowid: %lu (%lu:%u)", (ulong) info->cur_row.lastpos,
+ (ulong) page, record_number));
+
+ _ma_bitmap_flushable(share, 1);
+ if (delete_head_or_tail(info, page, record_number, 1, 0) ||
+ delete_tails(info, info->cur_row.tail_positions))
+ goto err;
+
+ if (info->cur_row.extents_count && free_full_pages(info, &info->cur_row))
+ goto err;
+
+ if (share->now_transactional)
+ {
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE +
+ DIRPOS_STORE_SIZE + HA_CHECKSUM_STORE_SIZE];
+ size_t row_length;
+ uint row_parts_count;
+ ha_checksum checksum_delta;
+
+ /* Write UNDO record */
+ lsn_store(log_data, info->trn->undo_lsn);
+ page_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, page);
+ dirpos_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE, record_number);
+
+ info->log_row_parts[TRANSLOG_INTERNAL_PARTS].str= (char*) log_data;
+ info->log_row_parts[TRANSLOG_INTERNAL_PARTS].length=
+ sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
+ store_checksum_in_rec(share, checksum_delta,
+ - info->cur_row.checksum,
+ log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ info->log_row_parts[TRANSLOG_INTERNAL_PARTS +
+ 0].length);
+
+ row_length= fill_insert_undo_parts(info, record, info->log_row_parts +
+ TRANSLOG_INTERNAL_PARTS + 1,
+ &row_parts_count);
+
+ if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, info->trn,
+ info,
+ info->log_row_parts[TRANSLOG_INTERNAL_PARTS +
+ 0].length + row_length,
+ TRANSLOG_INTERNAL_PARTS + 1 + row_parts_count,
+ info->log_row_parts, log_data + LSN_STORE_SIZE,
+ &checksum_delta))
+ goto err;
+
+ }
+
+ _ma_bitmap_flushable(share, -1);
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(0);
+
+err:
+ _ma_bitmap_flushable(share, -1);
+ _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
+ DBUG_RETURN(1);
+}
+
+
+/****************************************************************************
+ Reading of records
+****************************************************************************/
+
+/*
+ Read position to record from record directory at end of page
+
+ SYNOPSIS
+ get_record_position()
+ buff page buffer
+ block_size block size for page
+ record_number Record number in index
+ end_of_data pointer to end of data for record
+
+ RETURN
+ 0 Error in data
+ # Pointer to start of record.
+ In this case *end_of_data is set.
+*/
+
+static uchar *get_record_position(uchar *buff, uint block_size,
+ uint record_number, uchar **end_of_data)
+{
+ uint number_of_records= (uint) buff[DIR_COUNT_OFFSET];
+ uchar *dir;
+ uchar *data;
+ uint offset, length;
+
+#ifdef SANITY_CHECKS
+ if (record_number >= number_of_records ||
+ record_number > ((block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE) /
+ DIR_ENTRY_SIZE))
+ {
+ DBUG_PRINT("error",
+ ("Wrong row number: record_number: %u number_of_records: %u",
+ record_number, number_of_records));
+ return 0;
+ }
+#endif
+
+ dir= dir_entry_pos(buff, block_size, record_number);
+ offset= uint2korr(dir);
+ length= uint2korr(dir + 2);
+#ifdef SANITY_CHECKS
+ if (offset < PAGE_HEADER_SIZE ||
+ offset + length > (block_size -
+ number_of_records * DIR_ENTRY_SIZE -
+ PAGE_SUFFIX_SIZE))
+ {
+ DBUG_PRINT("error",
+ ("Wrong row position: record_number: %u offset: %u "
+ "length: %u number_of_records: %u",
+ record_number, offset, length, number_of_records));
+ return 0;
+ }
+#endif
+ data= buff + offset;
+ *end_of_data= data + length;
+ return data;
+}
+
+
+/*
+ Init extent
+
+ NOTES
+ extent is a cursor over which pages to read
+*/
+
+static void init_extent(MARIA_EXTENT_CURSOR *extent, uchar *extent_info,
+ uint extents, MARIA_RECORD_POS *tail_positions)
+{
+ uint page_count;
+ extent->extent= extent_info;
+ extent->extent_count= extents;
+ extent->page= page_korr(extent_info); /* First extent */
+ page_count= uint2korr(extent_info + ROW_EXTENT_PAGE_SIZE);
+ extent->tail= page_count & TAIL_BIT;
+ if (extent->tail)
+ {
+ extent->page_count= 1;
+ extent->tail_row_nr= page_count & ~TAIL_BIT;
+ }
+ else
+ extent->page_count= page_count;
+ extent->tail_positions= tail_positions;
+ extent->lock_for_tail_pages= PAGECACHE_LOCK_LEFT_UNLOCKED;
+}
+
+
+/*
+ Read next extent
+
+ SYNOPSIS
+ read_next_extent()
+ info Maria handler
+ extent Pointer to current extent (this is updated to point
+ to next)
+ end_of_data Pointer to end of data in read block (out)
+
+ NOTES
+ New block is read into info->buff
+
+ RETURN
+ 0 Error; my_errno is set
+ # Pointer to start of data in read block
+ In this case end_of_data is updated to point to end of data.
+*/
+
+static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent,
+ uchar **end_of_data)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *buff, *data;
+ MARIA_PINNED_PAGE page_link;
+ enum pagecache_page_lock lock;
+ DBUG_ENTER("read_next_extent");
+
+ if (!extent->page_count)
+ {
+ uint page_count;
+ if (!--extent->extent_count)
+ goto crashed;
+ extent->extent+= ROW_EXTENT_SIZE;
+ extent->page= page_korr(extent->extent);
+ page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE);
+ if (!page_count)
+ goto crashed;
+ extent->tail= page_count & TAIL_BIT;
+ if (extent->tail)
+ extent->tail_row_nr= page_count & ~TAIL_BIT;
+ else
+ extent->page_count= page_count;
+ DBUG_PRINT("info",("New extent. Page: %lu page_count: %u tail_flag: %d",
+ (ulong) extent->page, extent->page_count,
+ extent->tail != 0));
+ }
+ extent->first_extent= 0;
+
+ lock= PAGECACHE_LOCK_LEFT_UNLOCKED;
+ if (extent->tail)
+ lock= extent->lock_for_tail_pages;
+
+ DBUG_ASSERT(share->pagecache->block_size == share->block_size);
+ if (!(buff= pagecache_read(share->pagecache,
+ &info->dfile, extent->page, 0,
+ info->buff, share->page_type,
+ lock, &page_link.link)))
+ {
+ /* check if we tried to read over end of file (ie: bad data in record) */
+ if ((extent->page + 1) * share->block_size > info->state->data_file_length)
+ goto crashed;
+ DBUG_RETURN(0);
+ }
+
+ if (!extent->tail)
+ {
+ /* Full data page */
+ if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE)
+ goto crashed;
+ extent->page++; /* point to next page */
+ extent->page_count--;
+ *end_of_data= buff + share->block_size - PAGE_SUFFIX_SIZE;
+ info->cur_row.full_page_count++; /* For maria_chk */
+ DBUG_RETURN(extent->data_start= buff + LSN_SIZE + PAGE_TYPE_SIZE);
+ }
+
+ /* Found tail */
+ if (lock != PAGECACHE_LOCK_LEFT_UNLOCKED)
+ {
+ /* Read during redo */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ }
+
+ if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != TAIL_PAGE)
+ goto crashed;
+ *(extent->tail_positions++)= ma_recordpos(extent->page,
+ extent->tail_row_nr);
+ info->cur_row.tail_count++; /* For maria_chk */
+
+ if (!(data= get_record_position(buff, share->block_size,
+ extent->tail_row_nr,
+ end_of_data)))
+ goto crashed;
+ extent->data_start= data;
+ extent->page_count= 0; /* No more data in extent */
+ DBUG_RETURN(data);
+
+
+crashed:
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_PRINT("error", ("wrong extent information"));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read data that may be split over many blocks
+
+ SYNOPSIS
+ read_long_data()
+ info Maria handler
+ to Store result string here (this is allocated)
+ extent Pointer to current extent position
+ data Current position in buffer
+ end_of_data End of data in buffer
+
+ NOTES
+ When we have to read a new buffer, it's read into info->buff
+
+ This loop is implemented by goto's instead of a for() loop as
+ the code is notable smaller and faster this way (and it's not nice
+ to jump into a for loop() or into a 'then' clause)
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool read_long_data(MARIA_HA *info, uchar *to, ulong length,
+ MARIA_EXTENT_CURSOR *extent,
+ uchar **data, uchar **end_of_data)
+{
+ DBUG_ENTER("read_long_data");
+ DBUG_PRINT("enter", ("length: %lu left_length: %u",
+ length, (uint) (*end_of_data - *data)));
+ DBUG_ASSERT(*data <= *end_of_data);
+
+ /*
+ Fields are never split in middle. This means that if length > rest-of-data
+ we should start reading from the next extent. The reason we may have
+ data left on the page is that if the fixed part of the row was less than
+ min_block_length the head block was extended to min_block_length.
+
+ This may change in the future, which is why we have the loop written
+ the way it's written.
+ */
+ if (extent->first_extent && length > (ulong) (*end_of_data - *data))
+ *end_of_data= *data;
+
+ for(;;)
+ {
+ uint left_length;
+ left_length= (uint) (*end_of_data - *data);
+ if (likely(left_length >= length))
+ {
+ memcpy(to, *data, length);
+ (*data)+= length;
+ DBUG_PRINT("info", ("left_length: %u", left_length - (uint) length));
+ DBUG_RETURN(0);
+ }
+ memcpy(to, *data, left_length);
+ to+= left_length;
+ length-= left_length;
+ if (!(*data= read_next_extent(info, extent, end_of_data)))
+ break;
+ }
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Read a record from page (helper function for _ma_read_block_record())
+
+ SYNOPSIS
+ _ma_read_block_record2()
+ info Maria handler
+ record Store record here
+ data Start of head data for row
+ end_of_data End of data for row
+
+ NOTES
+ The head page is already read by caller
+ Following data is update in info->cur_row:
+
+ cur_row.head_length is set to size of entry in head block
+ cur_row.tail_positions is set to point to all tail blocks
+ cur_row.extents points to extents data
+ cur_row.extents_counts contains number of extents
+ cur_row.empty_bits is set to empty bits
+ cur_row.field_lengths contains packed length of all fields
+ cur_row.blob_length contains total length of all blobs
+ cur_row.checksum contains checksum of read record.
+
+ RETURN
+ 0 ok
+ # Error code
+*/
+
+int _ma_read_block_record2(MARIA_HA *info, uchar *record,
+ uchar *data, uchar *end_of_data)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *field_length_data, *blob_buffer, *start_of_data;
+ uint flag, null_bytes, cur_null_bytes, row_extents, field_lengths;
+ my_bool found_blob= 0;
+ MARIA_EXTENT_CURSOR extent;
+ MARIA_COLUMNDEF *column, *end_column;
+ MARIA_ROW *cur_row= &info->cur_row;
+ DBUG_ENTER("_ma_read_block_record2");
+
+ LINT_INIT(field_length_data);
+ LINT_INIT(blob_buffer);
+
+ start_of_data= data;
+ flag= (uint) (uchar) data[0];
+ cur_null_bytes= share->base.original_null_bytes;
+ null_bytes= share->base.null_bytes;
+ cur_row->head_length= (uint) (end_of_data - data);
+ cur_row->full_page_count= cur_row->tail_count= 0;
+ cur_row->blob_length= 0;
+
+ /* Skip trans header (for now, until we have MVCC csupport) */
+ data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)];
+ if (flag & ROW_FLAG_NULLS_EXTENDED)
+ cur_null_bytes+= data[-1];
+
+ row_extents= 0;
+ if (flag & ROW_FLAG_EXTENTS)
+ {
+ uint row_extent_size;
+ /*
+ Record is split over many data pages.
+ Get number of extents and first extent
+ */
+ get_key_length(row_extents, data);
+ cur_row->extents_count= row_extents;
+ row_extent_size= row_extents * ROW_EXTENT_SIZE;
+ if (cur_row->extents_buffer_length < row_extent_size &&
+ _ma_alloc_buffer(&cur_row->extents,
+ &cur_row->extents_buffer_length,
+ row_extent_size))
+ DBUG_RETURN(my_errno);
+ memcpy(cur_row->extents, data, ROW_EXTENT_SIZE);
+ data+= ROW_EXTENT_SIZE;
+ init_extent(&extent, cur_row->extents, row_extents,
+ cur_row->tail_positions);
+ }
+ else
+ {
+ cur_row->extents_count= 0;
+ (*cur_row->tail_positions)= 0;
+ extent.page_count= 0;
+ extent.extent_count= 1;
+ }
+ extent.first_extent= 1;
+
+ field_lengths= 0;
+ if (share->base.max_field_lengths)
+ {
+ get_key_length(field_lengths, data);
+ cur_row->field_lengths_length= field_lengths;
+#ifdef SANITY_CHECKS
+ if (field_lengths > share->base.max_field_lengths)
+ goto err;
+#endif
+ }
+
+ if (share->calc_checksum)
+ cur_row->checksum= (uint) (uchar) *data++;
+ /* data now points on null bits */
+ memcpy(record, data, cur_null_bytes);
+ if (unlikely(cur_null_bytes != null_bytes))
+ {
+ /*
+ This only happens if we have added more NULL columns with
+ ALTER TABLE and are fetching an old, not yet modified old row
+ */
+ bzero(record + cur_null_bytes, (uint) (null_bytes - cur_null_bytes));
+ }
+ data+= null_bytes;
+ /* We copy the empty bits to be able to use them for delete/update */
+ memcpy(cur_row->empty_bits, data, share->base.pack_bytes);
+ data+= share->base.pack_bytes;
+
+ /* TODO: Use field offsets, instead of just skipping them */
+ data+= share->base.field_offsets * FIELD_OFFSET_SIZE;
+
+ /*
+ Read row extents (note that first extent was already read into
+ cur_row->extents above)
+ */
+ if (row_extents > 1)
+ {
+ if (read_long_data(info, cur_row->extents + ROW_EXTENT_SIZE,
+ (row_extents - 1) * ROW_EXTENT_SIZE,
+ &extent, &data, &end_of_data))
+ DBUG_RETURN(my_errno);
+ }
+
+ /*
+ Data now points to start of fixed length field data that can't be null
+ or 'empty'. Note that these fields can't be split over blocks.
+ */
+ for (column= share->columndef,
+ end_column= column + share->base.fixed_not_null_fields;
+ column < end_column; column++)
+ {
+ uint column_length= column->length;
+ if (data + column_length > end_of_data &&
+ !(data= read_next_extent(info, &extent, &end_of_data)))
+ goto err;
+ memcpy(record + column->offset, data, column_length);
+ data+= column_length;
+ }
+
+ /* Read array of field lengths. This may be stored in several extents */
+ if (field_lengths)
+ {
+ field_length_data= cur_row->field_lengths;
+ if (read_long_data(info, field_length_data, field_lengths, &extent,
+ &data, &end_of_data))
+ DBUG_RETURN(my_errno);
+ }
+
+ /* Read variable length data. Each of these may be split over many extents */
+ for (end_column= share->columndef + share->base.fields;
+ column < end_column; column++)
+ {
+ enum en_fieldtype type= column->type;
+ uchar *field_pos= record + column->offset;
+ /* First check if field is present in record */
+ if ((record[column->null_pos] & column->null_bit) ||
+ (cur_row->empty_bits[column->empty_pos] & column->empty_bit))
+ {
+ bfill(record + column->offset, column->fill_length,
+ type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
+ continue;
+ }
+ switch (type) {
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_SKIP_PRESPACE:
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ if (data + column->length > end_of_data &&
+ !(data= read_next_extent(info, &extent, &end_of_data)))
+ goto err;
+ memcpy(field_pos, data, column->length);
+ data+= column->length;
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ /* Char that is space filled */
+ uint length;
+ if (column->length <= 255)
+ length= (uint) (uchar) *field_length_data++;
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_length_data+= 2;
+ }
+#ifdef SANITY_CHECKS
+ if (length > column->length)
+ goto err;
+#endif
+ if (read_long_data(info, field_pos, length, &extent, &data,
+ &end_of_data))
+ DBUG_RETURN(my_errno);
+ bfill(field_pos + length, column->length - length, ' ');
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ ulong length;
+ if (column->length <= 256)
+ {
+ length= (uint) (uchar) (*field_pos++= *field_length_data++);
+ }
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_pos[0]= field_length_data[0];
+ field_pos[1]= field_length_data[1];
+ field_pos+= 2;
+ field_length_data+= 2;
+ }
+ if (read_long_data(info, field_pos, length, &extent, &data,
+ &end_of_data))
+ DBUG_RETURN(my_errno);
+ break;
+ }
+ case FIELD_BLOB:
+ {
+ uint column_size_length= column->length - portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(column_size_length,
+ field_length_data);
+
+ if (!found_blob)
+ {
+ /* Calculate total length for all blobs */
+ ulong blob_lengths= 0;
+ uchar *length_data= field_length_data;
+ MARIA_COLUMNDEF *blob_field= column;
+
+ found_blob= 1;
+ for (; blob_field < end_column; blob_field++)
+ {
+ uint size_length;
+ if ((record[blob_field->null_pos] & blob_field->null_bit) ||
+ (cur_row->empty_bits[blob_field->empty_pos] &
+ blob_field->empty_bit))
+ continue;
+ size_length= blob_field->length - portable_sizeof_char_ptr;
+ blob_lengths+= _ma_calc_blob_length(size_length, length_data);
+ length_data+= size_length;
+ }
+ cur_row->blob_length= blob_lengths;
+ DBUG_PRINT("info", ("Total blob length: %lu", blob_lengths));
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ blob_lengths))
+ DBUG_RETURN(my_errno);
+ blob_buffer= info->rec_buff;
+ }
+
+ memcpy(field_pos, field_length_data, column_size_length);
+ memcpy_fixed(field_pos + column_size_length, (uchar *) &blob_buffer,
+ sizeof(char*));
+ field_length_data+= column_size_length;
+
+ /*
+ After we have read one extent, then each blob is in it's own extent
+ */
+ if (!extent.first_extent || (ulong) (end_of_data - data) < blob_length)
+ end_of_data= data; /* Force read of next extent */
+
+ if (read_long_data(info, blob_buffer, blob_length, &extent, &data,
+ &end_of_data))
+ DBUG_RETURN(my_errno);
+ blob_buffer+= blob_length;
+ break;
+ }
+ default:
+#ifdef EXTRA_DEBUG
+ DBUG_ASSERT(0); /* purecov: deadcode */
+#endif
+ goto err;
+ }
+ continue;
+ }
+
+ if (row_extents)
+ {
+ DBUG_PRINT("info", ("Row read: page_count: %u extent_count: %u",
+ extent.page_count, extent.extent_count));
+ *extent.tail_positions= 0; /* End marker */
+ if (extent.page_count)
+ goto err;
+ if (extent.extent_count > 1)
+ {
+ if (_ma_check_if_zero(extent.extent + ROW_EXTENT_SIZE,
+ (extent.extent_count-1) * ROW_EXTENT_SIZE))
+ {
+ DBUG_PRINT("error", ("Data in extent is not zero"));
+ DBUG_DUMP("extent", extent.extent + ROW_EXTENT_SIZE,
+ (extent.extent_count-1) * ROW_EXTENT_SIZE);
+ goto err;
+ }
+ }
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Row read"));
+ /*
+ data should normally point to end_of_date. The only exception is if
+ the row is very short in which case we allocated 'min_block_length' data
+ for allowing the row to expand.
+ */
+ if (data != end_of_data && (uint) (end_of_data - start_of_data) >
+ info->s->base.min_block_length)
+ goto err;
+ }
+
+ info->update|= HA_STATE_AKTIV; /* We have an active record */
+ DBUG_RETURN(0);
+
+err:
+ /* Something was wrong with data on record */
+ DBUG_PRINT("error", ("Found record with wrong data"));
+ DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+}
+
+
+/** @brief Read positions to tail blocks and full blocks
+
+ @fn read_row_extent_info()
+ @param info Handler
+
+ @notes
+ This function is a simpler version of _ma_read_block_record2()
+ The data about the used pages is stored in info->cur_row.
+
+ @return Status
+ @retval 0 ok
+ @retval 1 Error. my_errno contains error number
+*/
+
+static my_bool read_row_extent_info(MARIA_HA *info, uchar *buff,
+ uint record_number)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *data, *end_of_data;
+ uint flag, row_extents, field_lengths;
+ MARIA_EXTENT_CURSOR extent;
+ DBUG_ENTER("read_row_extent_info");
+
+ if (!(data= get_record_position(buff, share->block_size,
+ record_number, &end_of_data)))
+ DBUG_RETURN(1); /* Wrong in record */
+
+ flag= (uint) (uchar) data[0];
+ /* Skip trans header */
+ data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)];
+
+ row_extents= 0;
+ if (flag & ROW_FLAG_EXTENTS)
+ {
+ uint row_extent_size;
+ /*
+ Record is split over many data pages.
+ Get number of extents and first extent
+ */
+ get_key_length(row_extents, data);
+ row_extent_size= row_extents * ROW_EXTENT_SIZE;
+ if (info->cur_row.extents_buffer_length < row_extent_size &&
+ _ma_alloc_buffer(&info->cur_row.extents,
+ &info->cur_row.extents_buffer_length,
+ row_extent_size))
+ DBUG_RETURN(1);
+ memcpy(info->cur_row.extents, data, ROW_EXTENT_SIZE);
+ data+= ROW_EXTENT_SIZE;
+ init_extent(&extent, info->cur_row.extents, row_extents,
+ info->cur_row.tail_positions);
+ extent.first_extent= 1;
+ }
+ else
+ (*info->cur_row.tail_positions)= 0;
+ info->cur_row.extents_count= row_extents;
+
+ if (share->base.max_field_lengths)
+ get_key_length(field_lengths, data);
+
+ if (share->calc_checksum)
+ info->cur_row.checksum= (uint) (uchar) *data++;
+ if (row_extents > 1)
+ {
+ MARIA_RECORD_POS *tail_pos;
+ uchar *extents, *end;
+
+ data+= share->base.null_bytes;
+ data+= share->base.pack_bytes;
+ data+= share->base.field_offsets * FIELD_OFFSET_SIZE;
+
+ /*
+ Read row extents (note that first extent was already read into
+ info->cur_row.extents above)
+ Lock tails with write lock as we will delete them later.
+ */
+ extent.lock_for_tail_pages= PAGECACHE_LOCK_LEFT_WRITELOCKED;
+ if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE,
+ (row_extents - 1) * ROW_EXTENT_SIZE,
+ &extent, &data, &end_of_data))
+ DBUG_RETURN(1);
+
+ /* Update tail_positions with pointer to tails */
+ tail_pos= info->cur_row.tail_positions;
+ for (extents= info->cur_row.extents, end= extents+ row_extents;
+ extents < end;
+ extents += ROW_EXTENT_SIZE)
+ {
+ ulonglong page= uint5korr(extents);
+ uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE);
+ if (page_count & TAIL_BIT)
+ *(tail_pos++)= ma_recordpos(page, (page_count & ~TAIL_BIT));
+ }
+ *tail_pos= 0; /* End marker */
+ }
+ DBUG_RETURN(0);
+}
+
+
+
+/*
+ Read a record based on record position
+
+ @fn _ma_read_block_record()
+ @param info Maria handler
+ @param record Store record here
+ @param record_pos Record position
+
+ @return Status
+ @retval 0 ok
+ @retval # Error number
+*/
+
+int _ma_read_block_record(MARIA_HA *info, uchar *record,
+ MARIA_RECORD_POS record_pos)
+{
+ uchar *data, *end_of_data, *buff;
+ uint offset;
+ uint block_size= info->s->block_size;
+ DBUG_ENTER("_ma_read_block_record");
+ DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos));
+
+ offset= ma_recordpos_to_dir_entry(record_pos);
+
+ DBUG_ASSERT(info->s->pagecache->block_size == block_size);
+ if (!(buff= pagecache_read(info->s->pagecache,
+ &info->dfile, ma_recordpos_to_page(record_pos), 0,
+ info->buff, info->s->page_type,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
+ DBUG_RETURN(my_errno);
+ DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE);
+ if (!(data= get_record_position(buff, block_size, offset, &end_of_data)))
+ {
+ DBUG_PRINT("error", ("Wrong directory entry in data block"));
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
+ }
+ DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
+}
+
+
+/* compare unique constraint between stored rows */
+
+my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const uchar *record, MARIA_RECORD_POS pos)
+{
+ uchar *org_rec_buff, *old_record;
+ size_t org_rec_buff_size;
+ int error;
+ DBUG_ENTER("_ma_cmp_block_unique");
+
+ if (!(old_record= my_alloca(info->s->base.reclength)))
+ DBUG_RETURN(1);
+
+ /* Don't let the compare destroy blobs that may be in use */
+ org_rec_buff= info->rec_buff;
+ org_rec_buff_size= info->rec_buff_size;
+ if (info->s->base.blobs)
+ {
+ /* Force realloc of record buffer*/
+ info->rec_buff= 0;
+ info->rec_buff_size= 0;
+ }
+ error= _ma_read_block_record(info, old_record, pos);
+ if (!error)
+ error= _ma_unique_comp(def, record, old_record, def->null_are_equal);
+ if (info->s->base.blobs)
+ {
+ my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ info->rec_buff= org_rec_buff;
+ info->rec_buff_size= org_rec_buff_size;
+ }
+ DBUG_PRINT("exit", ("result: %d", error));
+ my_afree(old_record);
+ DBUG_RETURN(error != 0);
+}
+
+
+/****************************************************************************
+ Table scan
+****************************************************************************/
+
+/*
+ Allocate buffers for table scan
+
+ SYNOPSIS
+ _ma_scan_init_block_record(MARIA_HA *info)
+
+ IMPLEMENTATION
+ We allocate one buffer for the current bitmap and one buffer for the
+ current page
+
+ RETURN
+ 0 ok
+ 1 error (couldn't allocate memory or disk error)
+*/
+
+my_bool _ma_scan_init_block_record(MARIA_HA *info)
+{
+ DBUG_ENTER("_ma_scan_init_block_record");
+ /*
+ bitmap_buff may already be allocated if this is the second call to
+ rnd_init() without a rnd_end() in between, see sql/handler.h
+ */
+ if (!(info->scan.bitmap_buff ||
+ ((info->scan.bitmap_buff=
+ (uchar *) my_malloc(info->s->block_size * 2, MYF(MY_WME))))))
+ DBUG_RETURN(1);
+ info->scan.page_buff= info->scan.bitmap_buff + info->s->block_size;
+ info->scan.bitmap_end= info->scan.bitmap_buff + info->s->bitmap.total_size;
+
+ /* Set scan variables to get _ma_scan_block() to start with reading bitmap */
+ info->scan.number_of_rows= 0;
+ info->scan.bitmap_pos= info->scan.bitmap_end;
+ info->scan.bitmap_page= (ulong) - (long) info->s->bitmap.pages_covered;
+ /*
+ We have to flush bitmap as we will read the bitmap from the page cache
+ while scanning rows
+ */
+ DBUG_RETURN(_ma_bitmap_flush(info->s));
+}
+
+
+/* Free buffers allocated by _ma_scan_block_init() */
+
+void _ma_scan_end_block_record(MARIA_HA *info)
+{
+ DBUG_ENTER("_ma_scan_end_block_record");
+ my_free(info->scan.bitmap_buff, MYF(MY_ALLOW_ZERO_PTR));
+ info->scan.bitmap_buff= 0;
+ if (info->scan_save)
+ {
+ my_free(info->scan_save, MYF(0));
+ info->scan_save= 0;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Save current scan position
+
+ @note
+ For the moment we can only remember one position, but this is
+ good enough for MySQL usage
+
+ @Warning
+ When this function is called, we assume that the thread is not deleting
+ or updating the current row before ma_scan_restore_block_record()
+ is called!
+
+ @return
+ @retval 0 ok
+ @retval HA_ERR_WRONG_IN_RECORD Could not allocate memory to hold position
+*/
+
+int _ma_scan_remember_block_record(MARIA_HA *info,
+ MARIA_RECORD_POS *lastpos)
+{
+ uchar *bitmap_buff;
+ DBUG_ENTER("_ma_scan_remember_block_record");
+ if (!(info->scan_save))
+ {
+ if (!(info->scan_save= my_malloc(ALIGN_SIZE(sizeof(*info->scan_save)) +
+ info->s->block_size * 2,
+ MYF(MY_WME))))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+ info->scan_save->bitmap_buff= ((uchar*) info->scan_save +
+ ALIGN_SIZE(sizeof(*info->scan_save)));
+ }
+ /* Point to the last read row */
+ *lastpos= info->cur_row.nextpos - 1;
+ info->scan.dir+= DIR_ENTRY_SIZE;
+
+ /* Remember used bitmap and used head page */
+ bitmap_buff= info->scan_save->bitmap_buff;
+ memcpy(info->scan_save, &info->scan, sizeof(*info->scan_save));
+ info->scan_save->bitmap_buff= bitmap_buff;
+ memcpy(bitmap_buff, info->scan.bitmap_buff, info->s->block_size * 2);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief restore scan block it's original values
+
+ @note
+ In theory we could swap bitmap buffers instead of copy them.
+ For the moment we don't do that because there are variables pointing
+ inside the buffers and it's a bit of hassle to either make them relative
+ or repoint them.
+*/
+
+void _ma_scan_restore_block_record(MARIA_HA *info,
+ MARIA_RECORD_POS lastpos)
+{
+ uchar *bitmap_buff;
+ DBUG_ENTER("_ma_scan_restore_block_record");
+
+ info->cur_row.nextpos= lastpos;
+ bitmap_buff= info->scan.bitmap_buff;
+ memcpy(&info->scan, info->scan_save, sizeof(*info->scan_save));
+ info->scan.bitmap_buff= bitmap_buff;
+ memcpy(bitmap_buff, info->scan_save->bitmap_buff, info->s->block_size * 2);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Read next record while scanning table
+
+ SYNOPSIS
+ _ma_scan_block_record()
+ info Maria handler
+ record Store found here
+ record_pos Value stored in info->cur_row.next_pos after last call
+ skip_deleted
+
+ NOTES
+ - One must have called mi_scan() before this
+ - In this version, we don't actually need record_pos, we as easily
+ use a variable in info->scan
+
+ IMPLEMENTATION
+ Current code uses a lot of goto's to separate the different kind of
+ states we may be in. This gives us a minimum of executed if's for
+ the normal cases. I tried several different ways to code this, but
+ the current one was in the end the most readable and fastest.
+
+ RETURN
+ 0 ok
+ # Error code
+*/
+
+int _ma_scan_block_record(MARIA_HA *info, uchar *record,
+ MARIA_RECORD_POS record_pos,
+ my_bool skip_deleted __attribute__ ((unused)))
+{
+ uint block_size;
+ my_off_t filepos;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_scan_block_record");
+
+restart_record_read:
+ /* Find next row in current page */
+ if (likely(record_pos < info->scan.number_of_rows))
+ {
+ uint length, offset;
+ uchar *data, *end_of_data;
+
+ while (!(offset= uint2korr(info->scan.dir)))
+ {
+ info->scan.dir-= DIR_ENTRY_SIZE;
+ record_pos++;
+#ifdef SANITY_CHECKS
+ if (info->scan.dir < info->scan.dir_end)
+ {
+ DBUG_ASSERT(0);
+ goto err;
+ }
+#endif
+ }
+ /* found row */
+ info->cur_row.lastpos= info->scan.row_base_page + record_pos;
+ info->cur_row.nextpos= record_pos + 1;
+ data= info->scan.page_buff + offset;
+ length= uint2korr(info->scan.dir + 2);
+ end_of_data= data + length;
+ info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */
+#ifdef SANITY_CHECKS
+ if (end_of_data > info->scan.dir_end ||
+ offset < PAGE_HEADER_SIZE || length < share->base.min_block_length)
+ {
+ DBUG_ASSERT(0);
+ goto err;
+ }
+#endif
+ DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
+ DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
+ }
+
+ /* Find next head page in current bitmap */
+restart_bitmap_scan:
+ block_size= share->block_size;
+ if (likely(info->scan.bitmap_pos < info->scan.bitmap_end))
+ {
+ uchar *data= info->scan.bitmap_pos;
+ longlong bits= info->scan.bits;
+ uint bit_pos= info->scan.bit_pos;
+
+ do
+ {
+ while (likely(bits))
+ {
+ uint pattern= bits & 7;
+ bits >>= 3;
+ bit_pos++;
+ if (pattern > 0 && pattern <= 4)
+ {
+ /* Found head page; Read it */
+ ulong page;
+ info->scan.bitmap_pos= data;
+ info->scan.bits= bits;
+ info->scan.bit_pos= bit_pos;
+ page= (info->scan.bitmap_page + 1 +
+ (data - info->scan.bitmap_buff) / 6 * 16 + bit_pos - 1);
+ info->scan.row_base_page= ma_recordpos(page, 0);
+ if (!(pagecache_read(share->pagecache,
+ &info->dfile,
+ page, 0, info->scan.page_buff,
+ share->page_type,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
+ DBUG_RETURN(my_errno);
+ if (((info->scan.page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) !=
+ HEAD_PAGE))
+ {
+ /*
+ This may happen if someone has been deleting all rows
+ from a page since we read the bitmap, so it may be ok.
+ Print warning in debug log and continue.
+ */
+ DBUG_PRINT("warning",
+ ("Found page of type %d when expecting head page",
+ (info->scan.page_buff[PAGE_TYPE_OFFSET] &
+ PAGE_TYPE_MASK)));
+ continue;
+ }
+ if ((info->scan.number_of_rows=
+ (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) == 0)
+ {
+ DBUG_PRINT("error", ("Wrong page header"));
+ DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+ }
+ DBUG_PRINT("info", ("Page %lu has %u rows",
+ (ulong) page, info->scan.number_of_rows));
+ info->scan.dir= (info->scan.page_buff + block_size -
+ PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
+ info->scan.dir_end= (info->scan.dir -
+ (info->scan.number_of_rows - 1) *
+ DIR_ENTRY_SIZE);
+ record_pos= 0;
+ goto restart_record_read;
+ }
+ }
+ for (data+= 6; data < info->scan.bitmap_end; data+= 6)
+ {
+ bits= uint6korr(data);
+ /* Skip not allocated pages and blob / full tail pages */
+ if (bits && bits != LL(07777777777777777))
+ break;
+ }
+ bit_pos= 0;
+ } while (data < info->scan.bitmap_end);
+ }
+
+ /* Read next bitmap */
+ info->scan.bitmap_page+= share->bitmap.pages_covered;
+ filepos= (my_off_t) info->scan.bitmap_page * block_size;
+ if (unlikely(filepos >= info->state->data_file_length))
+ {
+ DBUG_PRINT("info", ("Found end of file"));
+ DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE));
+ }
+ DBUG_PRINT("info", ("Reading bitmap at %lu",
+ (ulong) info->scan.bitmap_page));
+ if (!(pagecache_read(share->pagecache, &info->s->bitmap.file,
+ info->scan.bitmap_page,
+ 0, info->scan.bitmap_buff, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
+ DBUG_RETURN(my_errno);
+ /* Skip scanning 'bits' in bitmap scan code */
+ info->scan.bitmap_pos= info->scan.bitmap_buff - 6;
+ info->scan.bits= 0;
+ goto restart_bitmap_scan;
+
+err:
+ DBUG_PRINT("error", ("Wrong data on page"));
+ DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+}
+
+
+/*
+ Compare a row against a stored one
+
+ NOTES
+ Not implemented, as block record is not supposed to be used in a shared
+ global environment
+*/
+
+my_bool _ma_compare_block_record(MARIA_HA *info __attribute__ ((unused)),
+ const uchar *record __attribute__ ((unused)))
+{
+ return 0;
+}
+
+
+#ifndef DBUG_OFF
+
+static void _ma_print_directory(uchar *buff, uint block_size)
+{
+ uint max_entry= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET], row= 0;
+ uint end_of_prev_row= PAGE_HEADER_SIZE;
+ uchar *dir, *end;
+
+ dir= dir_entry_pos(buff, block_size, max_entry-1);
+ end= dir_entry_pos(buff, block_size, 0);
+
+ DBUG_LOCK_FILE;
+ fprintf(DBUG_FILE,"Directory dump (pos:length):\n");
+
+ for (row= 1; dir <= end ; end-= DIR_ENTRY_SIZE, row++)
+ {
+ uint offset= uint2korr(end);
+ uint length= uint2korr(end+2);
+ fprintf(DBUG_FILE, " %4u:%4u", offset, offset ? length : 0);
+ if (!(row % (80/12)))
+ fputc('\n', DBUG_FILE);
+ if (offset)
+ {
+ DBUG_ASSERT(offset >= end_of_prev_row);
+ end_of_prev_row= offset + length;
+ }
+ }
+ fputc('\n', DBUG_FILE);
+ fflush(DBUG_FILE);
+ DBUG_UNLOCK_FILE;
+}
+#endif /* DBUG_OFF */
+
+
+/*
+ Store an integer with simple packing
+
+ SYNOPSIS
+ ma_store_integer()
+ to Store the packed integer here
+ nr Integer to store
+
+ NOTES
+ This is mostly used to store field numbers and lengths of strings.
+ We have to cast the result for the LL() becasue of a bug in Forte CC
+ compiler.
+
+ Packing used is:
+ nr < 251 is stored as is (in 1 byte)
+ Numbers that require 1-4 bytes are stored as char(250+byte_length), data
+ Bigger numbers are stored as 255, data as ulonglong (not yet done).
+
+ RETURN
+ Position in 'to' after the packed length
+*/
+
+uchar *ma_store_length(uchar *to, ulong nr)
+{
+ if (nr < 251)
+ {
+ *to=(uchar) nr;
+ return to+1;
+ }
+ if (nr < 65536)
+ {
+ if (nr <= 255)
+ {
+ to[0]= (uchar) 251;
+ to[1]= (uchar) nr;
+ return to+2;
+ }
+ to[0]= (uchar) 252;
+ int2store(to+1, nr);
+ return to+3;
+ }
+ if (nr < 16777216)
+ {
+ *to++= (uchar) 253;
+ int3store(to, nr);
+ return to+3;
+ }
+ *to++= (uchar) 254;
+ int4store(to, nr);
+ return to+4;
+}
+
+
+/* Calculate how many bytes needed to store a number */
+
+uint ma_calc_length_for_store_length(ulong nr)
+{
+ if (nr < 251)
+ return 1;
+ if (nr < 65536)
+ {
+ if (nr <= 255)
+ return 2;
+ return 3;
+ }
+ if (nr < 16777216)
+ return 4;
+ return 5;
+}
+
+
+/* Retrive a stored number */
+
+static ulong ma_get_length(uchar **packet)
+{
+ reg1 uchar *pos= *packet;
+ if (*pos < 251)
+ {
+ (*packet)++;
+ return (ulong) *pos;
+ }
+ if (*pos == 251)
+ {
+ (*packet)+= 2;
+ return (ulong) pos[1];
+ }
+ if (*pos == 252)
+ {
+ (*packet)+= 3;
+ return (ulong) uint2korr(pos+1);
+ }
+ if (*pos == 253)
+ {
+ (*packet)+= 4;
+ return (ulong) uint3korr(pos+1);
+ }
+ DBUG_ASSERT(*pos == 254);
+ (*packet)+= 5;
+ return (ulong) uint4korr(pos+1);
+}
+
+
+/*
+ Fill array with pointers to field parts to be stored in log for insert
+
+ SYNOPSIS
+ fill_insert_undo_parts()
+ info Maria handler
+ record Inserted row
+ log_parts Store pointers to changed memory areas here
+ log_parts_count See RETURN
+
+ NOTES
+ We have information in info->cur_row about the read row.
+
+ RETURN
+ length of data in log_parts.
+ log_parts_count contains number of used log_parts
+*/
+
+static size_t fill_insert_undo_parts(MARIA_HA *info, const uchar *record,
+ LEX_STRING *log_parts,
+ uint *log_parts_count)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_COLUMNDEF *column, *end_column;
+ uchar *field_lengths= info->cur_row.field_lengths;
+ size_t row_length;
+ MARIA_ROW *cur_row= &info->cur_row;
+ LEX_STRING *start_log_parts;
+ DBUG_ENTER("fill_insert_undo_parts");
+
+ start_log_parts= log_parts;
+
+ /* Store null bits */
+ log_parts->str= (char*) record;
+ log_parts->length= share->base.null_bytes;
+ row_length= log_parts->length;
+ log_parts++;
+
+ /* Stored bitmap over packed (zero length or all-zero fields) */
+ log_parts->str= info->cur_row.empty_bits;
+ log_parts->length= share->base.pack_bytes;
+ row_length+= log_parts->length;
+ log_parts++;
+
+ if (share->base.max_field_lengths)
+ {
+ /* Store length of all not empty char, varchar and blob fields */
+ log_parts->str= field_lengths-2;
+ log_parts->length= info->cur_row.field_lengths_length+2;
+ int2store(log_parts->str, info->cur_row.field_lengths_length);
+ row_length+= log_parts->length;
+ log_parts++;
+ }
+
+ if (share->base.blobs)
+ {
+ /* Store total blob length to make buffer allocation easier during undo */
+ log_parts->str= info->length_buff;
+ log_parts->length= (uint) (ma_store_length(log_parts->str,
+ info->cur_row.blob_length) -
+ (uchar*) log_parts->str);
+ row_length+= log_parts->length;
+ log_parts++;
+ }
+
+ /* Handle constant length fields that are always present */
+ for (column= share->columndef,
+ end_column= column+ share->base.fixed_not_null_fields;
+ column < end_column;
+ column++)
+ {
+ log_parts->str= (char*) record + column->offset;
+ log_parts->length= column->length;
+ row_length+= log_parts->length;
+ log_parts++;
+ }
+
+ /* Handle NULL fields and CHAR/VARCHAR fields */
+ for (end_column= share->columndef + share->base.fields - share->base.blobs;
+ column < end_column;
+ column++)
+ {
+ const uchar *column_pos;
+ size_t column_length;
+ if ((record[column->null_pos] & column->null_bit) ||
+ cur_row->empty_bits[column->empty_pos] & column->empty_bit)
+ continue;
+
+ column_pos= record+ column->offset;
+ column_length= column->length;
+
+ switch (column->type) {
+ case FIELD_CHECK:
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_ZERO:
+ case FIELD_SKIP_PRESPACE: /* Not packed */
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ if (column->length <= 255)
+ column_length= *field_lengths++;
+ else
+ {
+ column_length= uint2korr(field_lengths);
+ field_lengths+= 2;
+ }
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ if (column->fill_length == 1)
+ column_length= *field_lengths;
+ else
+ column_length= uint2korr(field_lengths);
+ field_lengths+= column->fill_length;
+ column_pos+= column->fill_length;
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+ log_parts->str= (char*) column_pos;
+ log_parts->length= column_length;
+ row_length+= log_parts->length;
+ log_parts++;
+ }
+
+ /* Add blobs */
+ for (end_column+= share->base.blobs; column < end_column; column++)
+ {
+ const uchar *field_pos= record + column->offset;
+ uint size_length= column->length - portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length, field_pos);
+
+ /*
+ We don't have to check for null, as blob_length is guranteed to be 0
+ if the blob is null
+ */
+ if (blob_length)
+ {
+ char *blob_pos;
+ memcpy_fixed((uchar*) &blob_pos, record + column->offset + size_length,
+ sizeof(blob_pos));
+ log_parts->str= blob_pos;
+ log_parts->length= blob_length;
+ row_length+= log_parts->length;
+ log_parts++;
+ }
+ }
+ *log_parts_count= (log_parts - start_log_parts);
+ DBUG_RETURN(row_length);
+}
+
+
+/*
+ Fill array with pointers to field parts to be stored in log for update
+
+ SYNOPSIS
+ fill_update_undo_parts()
+ info Maria handler
+ oldrec Original row
+ newrec New row
+ log_parts Store pointers to changed memory areas here
+ log_parts_count See RETURN
+
+ IMPLEMENTATION
+ Format of undo record:
+
+ Fields are stored in same order as the field array.
+
+ Offset to changed field data (packed)
+
+ For each changed field
+ Fieldnumber (packed)
+ Length, if variable length field (packed)
+
+ For each changed field
+ Data
+
+ Packing is using ma_store_integer()
+
+ The reason we store field numbers & length separated from data (ie, not
+ after each other) is to get better cpu caching when we loop over
+ fields (as we probably don't have to access data for each field when we
+ want to read and old row through the undo log record).
+
+ As a special case, we use '255' for the field number of the null bitmap.
+
+ RETURN
+ length of data in log_parts.
+ log_parts_count contains number of used log_parts
+*/
+
+static size_t fill_update_undo_parts(MARIA_HA *info, const uchar *oldrec,
+ const uchar *newrec,
+ LEX_STRING *log_parts,
+ uint *log_parts_count)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_COLUMNDEF *column, *end_column;
+ MARIA_ROW *old_row= &info->cur_row, *new_row= &info->new_row;
+ uchar *field_data, *start_field_data;
+ uchar *old_field_lengths= old_row->field_lengths;
+ uchar *new_field_lengths= new_row->field_lengths;
+ size_t row_length= 0;
+ uint field_lengths;
+ LEX_STRING *start_log_parts;
+ my_bool new_column_is_empty;
+ DBUG_ENTER("fill_update_undo_parts");
+
+ start_log_parts= log_parts;
+
+ /*
+ First log part is for number of fields, field numbers and lengths
+ The +4 is to reserve place for the number of changed fields.
+ */
+ start_field_data= field_data= info->update_field_data + 4;
+ log_parts++;
+
+ if (memcmp(oldrec, newrec, share->base.null_bytes))
+ {
+ /* Store changed null bits */
+ *field_data++= (uchar) 255; /* Special case */
+ log_parts->str= (char*) oldrec;
+ log_parts->length= share->base.null_bytes;
+ row_length= log_parts->length;
+ log_parts++;
+ }
+
+ /* Handle constant length fields */
+ for (column= share->columndef,
+ end_column= column+ share->base.fixed_not_null_fields;
+ column < end_column;
+ column++)
+ {
+ if (memcmp(oldrec + column->offset, newrec + column->offset,
+ column->length))
+ {
+ field_data= ma_store_length(field_data,
+ (uint) (column - share->columndef));
+ log_parts->str= (char*) oldrec + column->offset;
+ log_parts->length= column->length;
+ row_length+= column->length;
+ log_parts++;
+ }
+ }
+
+ /* Handle the rest: NULL fields and CHAR/VARCHAR fields and BLOB's */
+ for (end_column= share->columndef + share->base.fields;
+ column < end_column;
+ column++)
+ {
+ const uchar *new_column_pos, *old_column_pos;
+ size_t new_column_length, old_column_length;
+
+ /* First check if old column is null or empty */
+ if (oldrec[column->null_pos] & column->null_bit)
+ {
+ /*
+ It's safe to skip this one as either the new column is also null
+ (no change) or the new_column is not null, in which case the null-bit
+ maps differed and we have already stored the null bitmap.
+ */
+ continue;
+ }
+ if (old_row->empty_bits[column->empty_pos] & column->empty_bit)
+ {
+ if (new_row->empty_bits[column->empty_pos] & column->empty_bit)
+ continue; /* Both are empty; skip */
+
+ /* Store null length column */
+ field_data= ma_store_length(field_data,
+ (uint) (column - share->columndef));
+ field_data= ma_store_length(field_data, 0);
+ continue;
+ }
+ /*
+ Remember if the 'new' value is empty (as in this case we must always
+ log the original value
+ */
+ new_column_is_empty= ((newrec[column->null_pos] & column->null_bit) ||
+ (new_row->empty_bits[column->empty_pos] &
+ column->empty_bit));
+
+ old_column_pos= oldrec + column->offset;
+ new_column_pos= newrec + column->offset;
+ old_column_length= new_column_length= column->length;
+
+ switch (column->type) {
+ case FIELD_CHECK:
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_ZERO:
+ case FIELD_SKIP_PRESPACE: /* Not packed */
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ break;
+ case FIELD_VARCHAR:
+ new_column_length--; /* Skip length prefix */
+ old_column_pos+= column->fill_length;
+ new_column_pos+= column->fill_length;
+ /* Fall through */
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ if (new_column_length <= 255)
+ {
+ old_column_length= *old_field_lengths++;
+ if (!new_column_is_empty)
+ new_column_length= *new_field_lengths++;
+ }
+ else
+ {
+ old_column_length= uint2korr(old_field_lengths);
+ old_field_lengths+= 2;
+ if (!new_column_is_empty)
+ {
+ new_column_length= uint2korr(new_field_lengths);
+ new_field_lengths+= 2;
+ }
+ }
+ break;
+ }
+ case FIELD_BLOB:
+ {
+ uint size_length= column->length - portable_sizeof_char_ptr;
+ old_column_length= _ma_calc_blob_length(size_length, old_column_pos);
+ memcpy_fixed((uchar*) &old_column_pos,
+ oldrec + column->offset + size_length,
+ sizeof(old_column_pos));
+ if (!new_column_is_empty)
+ {
+ new_column_length= _ma_calc_blob_length(size_length, new_column_pos);
+ memcpy_fixed((uchar*) &new_column_pos,
+ newrec + column->offset + size_length,
+ sizeof(old_column_pos));
+ }
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+
+ if (new_column_is_empty || new_column_length != old_column_length ||
+ memcmp(old_column_pos, new_column_pos, new_column_length))
+ {
+ field_data= ma_store_length(field_data,
+ (uint) (column - share->columndef));
+ field_data= ma_store_length(field_data, old_column_length);
+
+ log_parts->str= (char*) old_column_pos;
+ log_parts->length= old_column_length;
+ row_length+= old_column_length;
+ log_parts++;
+ }
+ }
+
+ *log_parts_count= (log_parts - start_log_parts);
+
+ /* Store length of field length data before the field/field_lengths */
+ field_lengths= (field_data - start_field_data);
+ start_log_parts->str= ((char*)
+ (start_field_data -
+ ma_calc_length_for_store_length(field_lengths)));
+ ma_store_length(start_log_parts->str, field_lengths);
+ start_log_parts->length= (size_t) ((char*) field_data -
+ start_log_parts->str);
+ row_length+= start_log_parts->length;
+ DBUG_RETURN(row_length);
+}
+
+/***************************************************************************
+ In-write hooks called under log's lock when log record is written
+***************************************************************************/
+
+/**
+ @brief Sets transaction's rec_lsn if needed
+
+ A transaction sometimes writes a REDO even before the page is in the
+ pagecache (example: brand new head or tail pages; full pages). So, if
+ Checkpoint happens just after the REDO write, it needs to know that the
+ REDO phase must start before this REDO. Scanning the pagecache cannot
+ tell that as the page is not in the cache. So, transaction sets its rec_lsn
+ to the REDO's LSN or somewhere before, and Checkpoint reads the
+ transaction's rec_lsn.
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_redo(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn, MARIA_HA *tbl_info
+ __attribute__ ((unused)),
+ LSN *lsn, void *hook_arg
+ __attribute__ ((unused)))
+{
+ /*
+ Users of dummy_transaction_object must keep this TRN clean as it
+ is used by many threads (like those manipulating non-transactional
+ tables). It might be dangerous if one user sets rec_lsn or some other
+ member and it is picked up by another user (like putting this rec_lsn into
+ a page of a non-transactional table); it's safer if all members stay 0. So
+ non-transactional log records (REPAIR, CREATE, RENAME, DROP) should not
+ call this hook; we trust them but verify ;)
+ */
+ DBUG_ASSERT(trn->trid != 0);
+ /*
+ If the hook stays so simple, it would be faster to pass
+ !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn
+ to translog_write_record(), like Monty did in his original code, and not
+ have a hook. For now we keep it like this.
+ */
+ if (trn->rec_lsn == 0)
+ trn->rec_lsn= *lsn;
+ return 0;
+}
+
+
+/**
+ @brief Sets transaction's undo_lsn, first_undo_lsn if needed
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_undo(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn, MARIA_HA *tbl_info
+ __attribute__ ((unused)),
+ LSN *lsn, void *hook_arg
+ __attribute__ ((unused)))
+{
+ DBUG_ASSERT(trn->trid != 0);
+ trn->undo_lsn= *lsn;
+ if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0))
+ trn->first_undo_lsn=
+ trn->undo_lsn | LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn);
+ DBUG_ASSERT(tbl_info->state == &tbl_info->s->state.state);
+ return 0;
+ /*
+ when we implement purging, we will specialize this hook: UNDO_PURGE
+ records will additionally set trn->undo_purge_lsn
+ */
+}
+
+
+/**
+ @brief Sets the table's records count and checksum to 0, then calls the
+ generic REDO hook.
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_redo_delete_all(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn, MARIA_HA *tbl_info
+ __attribute__ ((unused)),
+ LSN *lsn, void *hook_arg)
+{
+ MARIA_SHARE *share= tbl_info->s;
+ DBUG_ASSERT(tbl_info->state == &tbl_info->s->state.state);
+ share->state.state.records= share->state.state.checksum= 0;
+ return write_hook_for_redo(type, trn, tbl_info, lsn, hook_arg);
+}
+
+
+/**
+ @brief Upates "records" and "checksum" and calls the generic UNDO hook
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_undo_row_insert(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg)
+{
+ MARIA_SHARE *share= tbl_info->s;
+ share->state.state.records++;
+ share->state.state.checksum+= *(ha_checksum *)hook_arg;
+ return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg);
+}
+
+
+/**
+ @brief Upates "records" and calls the generic UNDO hook
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_undo_row_delete(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg)
+{
+ MARIA_SHARE *share= tbl_info->s;
+ share->state.state.records--;
+ share->state.state.checksum+= *(ha_checksum *)hook_arg;
+ return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg);
+}
+
+
+/**
+ @brief Upates "records" and "checksum" and calls the generic UNDO hook
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_undo_row_update(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg)
+{
+ MARIA_SHARE *share= tbl_info->s;
+ share->state.state.checksum+= *(ha_checksum *)hook_arg;
+ return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg);
+}
+
+
+/**
+ @brief Updates table's lsn_of_file_id.
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_file_id(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn
+ __attribute__ ((unused)),
+ MARIA_HA *tbl_info,
+ LSN *lsn __attribute__ ((unused)),
+ void *hook_arg
+ __attribute__ ((unused)))
+{
+ DBUG_ASSERT(cmp_translog_addr(tbl_info->s->lsn_of_file_id, *lsn) < 0);
+ tbl_info->s->lsn_of_file_id= *lsn;
+ return 0;
+}
+
+/***************************************************************************
+ Applying of REDO log records
+***************************************************************************/
+
+/*
+ Apply LOGREC_REDO_INSERT_ROW_HEAD & LOGREC_REDO_INSERT_ROW_TAIL
+
+ SYNOPSIS
+ _ma_apply_redo_insert_row_head_or_tail()
+ info Maria handler
+ lsn LSN to put on page
+ page_type HEAD_PAGE or TAIL_PAGE
+ header Header (without FILEID)
+ data Data to be put on page
+ data_length Length of data
+
+ RETURN
+ 0 ok
+ # Error number
+*/
+
+uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
+ uint page_type,
+ const uchar *header,
+ const uchar *data,
+ size_t data_length)
+{
+ MARIA_SHARE *share= info->s;
+ ulonglong page;
+ uint rownr, empty_space;
+ uint block_size= share->block_size;
+ uint rec_offset;
+ uchar *buff, *dir;
+ uint result;
+ MARIA_PINNED_PAGE page_link;
+ enum pagecache_page_lock unlock_method;
+ enum pagecache_page_pin unpin_method;
+ my_off_t end_of_page;
+ DBUG_ENTER("_ma_apply_redo_insert_row_head_or_tail");
+
+ page= page_korr(header);
+ rownr= dirpos_korr(header + PAGE_STORE_SIZE);
+
+ DBUG_PRINT("enter", ("rowid: %lu page: %lu rownr: %u data_length: %u",
+ (ulong) ma_recordpos(page, rownr),
+ (ulong) page, rownr, (uint) data_length));
+
+ end_of_page= (page + 1) * info->s->block_size;
+ if (end_of_page > info->state->data_file_length)
+ {
+ DBUG_PRINT("info", ("Enlarging data file from %lu to %lu",
+ (ulong) info->state->data_file_length,
+ (ulong) end_of_page));
+ /*
+ New page at end of file. Note that the test above is also positive if
+ data_file_length is not a multiple of block_size (system crashed while
+ writing the last page): in this case we just extend the last page and
+ fill it entirely with zeroes, then the REDO will put correct data on
+ it.
+ */
+ DBUG_ASSERT(rownr == 0);
+ if (rownr != 0)
+ goto err;
+ unlock_method= PAGECACHE_LOCK_WRITE;
+ unpin_method= PAGECACHE_PIN;
+
+ buff= info->keyread_buff;
+ info->keyread_buff_used= 1;
+ make_empty_page(info, buff, page_type);
+ empty_space= (block_size - PAGE_OVERHEAD_SIZE);
+ rec_offset= PAGE_HEADER_SIZE;
+ dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE;
+ }
+ else
+ {
+ share->pagecache->readwrite_flags&= ~MY_WME;
+ buff= pagecache_read(share->pagecache, &info->dfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link);
+ share->pagecache->readwrite_flags= share->pagecache->org_readwrite_flags;
+ if (!buff)
+ {
+ /* Skip errors when reading outside of file and uninitialized pages */
+ if (my_errno != HA_ERR_FILE_TOO_SHORT &&
+ my_errno != HA_ERR_WRONG_CRC)
+ {
+ /* Fatal disk error when reading page */
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(my_errno);
+ }
+ /* Create new page */
+ buff= pagecache_block_link_to_buffer(page_link.link);
+ buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE;
+ }
+ else if (lsn_korr(buff) >= lsn) /* Test if already applied */
+ {
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ /* Fix bitmap, just in case */
+ empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+ }
+ unlock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED;
+ unpin_method= PAGECACHE_PIN_LEFT_PINNED;
+
+ if (((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != page_type))
+ {
+ /*
+ This is a page that has been freed before and now should be
+ changed to new type.
+ */
+ DBUG_ASSERT(rownr == 0);
+ if (((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE &&
+ (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != UNALLOCATED_PAGE) ||
+ rownr != 0)
+ goto err;
+ make_empty_page(info, buff, page_type);
+ empty_space= (block_size - PAGE_OVERHEAD_SIZE);
+ rec_offset= PAGE_HEADER_SIZE;
+ dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE;
+ }
+ else
+ {
+ uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
+
+ dir= dir_entry_pos(buff, block_size, rownr);
+ empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+
+ if (max_entry <= rownr)
+ {
+ /* Add directory entry first in directory and data last on page */
+ DBUG_ASSERT(max_entry == rownr);
+ if (max_entry != rownr)
+ goto err;
+ rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) +
+ uint2korr(dir + DIR_ENTRY_SIZE +2));
+ if ((uint) (dir - buff) < rec_offset + data_length)
+ {
+ /* Create place for directory & data */
+ compact_page(buff, block_size, max_entry - 1, 0);
+ rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) +
+ uint2korr(dir + DIR_ENTRY_SIZE + 2));
+ empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+ DBUG_ASSERT(!((uint) (dir - buff) < rec_offset + data_length));
+ if ((uint) (dir - buff) < rec_offset + data_length)
+ goto err;
+ }
+ buff[DIR_COUNT_OFFSET]= (uchar) max_entry+1;
+ int2store(dir, rec_offset);
+ empty_space-= DIR_ENTRY_SIZE;
+ }
+ else
+ {
+ uint length;
+ /*
+ Reuse old entry. This is empty if the command was an insert and
+ possible used if the command was an update.
+ */
+ if (extend_area_on_page(buff, dir, rownr, block_size,
+ data_length, &empty_space,
+ &rec_offset, &length))
+ goto err;
+ }
+ }
+ }
+ /* Copy data */
+ int2store(dir+2, data_length);
+ memcpy(buff + rec_offset, data, data_length);
+ empty_space-= data_length;
+ int2store(buff + EMPTY_SPACE_OFFSET, empty_space);
+
+ /*
+ If page was not read before, write it but keep it pinned.
+ We don't update its LSN When we have processed all REDOs for this page
+ in the current REDO's group, we will stamp page with UNDO's LSN
+ (if we stamped it now, a next REDO, in
+ this group, for this page, would be skipped) and unpin then.
+ */
+ result= 0;
+ if (unlock_method == PAGECACHE_LOCK_WRITE &&
+ pagecache_write(share->pagecache,
+ &info->dfile, page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ unlock_method, unpin_method,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE))
+ result= my_errno;
+
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+
+ /* Fix bitmap */
+ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
+ result= my_errno;
+
+ /*
+ Data page and bitmap page are in place, we can update data_file_length in
+ case we extended the file. We could not do it earlier: bitmap code tests
+ data_file_length to know if it has to create a new page or not.
+ */
+ set_if_bigger(info->state->data_file_length, end_of_page);
+ DBUG_RETURN(result);
+
+err:
+ if (unlock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED)
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
+}
+
+
+/*
+ Apply LOGREC_REDO_PURGE_ROW_HEAD & LOGREC_REDO_PURGE_ROW_TAIL
+
+ SYNOPSIS
+ _ma_apply_redo_purge_row_head_or_tail()
+ info Maria handler
+ lsn LSN to put on page
+ page_type HEAD_PAGE or TAIL_PAGE
+ header Header (without FILEID)
+
+ NOTES
+ This function is very similar to delete_head_or_tail()
+
+ RETURN
+ 0 ok
+ # Error number
+*/
+
+uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
+ uint page_type,
+ const uchar *header)
+{
+ MARIA_SHARE *share= info->s;
+ ulonglong page;
+ uint rownr, empty_space;
+ uint block_size= share->block_size;
+ uchar *buff= info->keyread_buff;
+ int result;
+ MARIA_PINNED_PAGE page_link;
+ DBUG_ENTER("_ma_apply_redo_purge_row_head_or_tail");
+
+ page= page_korr(header);
+ rownr= dirpos_korr(header+PAGE_STORE_SIZE);
+ DBUG_PRINT("enter", ("rowid: %lu page: %lu rownr: %u",
+ (ulong) ma_recordpos(page, rownr),
+ (ulong) page, rownr));
+
+ info->keyread_buff_used= 1;
+
+ if (!(buff= pagecache_read(share->pagecache, &info->dfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(my_errno);
+ }
+
+ if (lsn_korr(buff) >= lsn)
+ {
+ /*
+ Already applied
+ Note that in case the page is not anymore a head or tail page
+ a future redo will fix the bitmap.
+ */
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+
+ if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type)
+ {
+ empty_space= uint2korr(buff+EMPTY_SPACE_OFFSET);
+ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE,
+ empty_space))
+ DBUG_RETURN(my_errno);
+ }
+ DBUG_RETURN(0);
+ }
+
+ DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (uchar) page_type);
+
+ if (delete_dir_entry(buff, block_size, rownr, &empty_space) < 0)
+ goto err;
+
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+
+ result= 0;
+ /* This will work even if the page was marked as UNALLOCATED_PAGE */
+ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
+ result= my_errno;
+
+ DBUG_RETURN(result);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
+
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_FREE_BLOCKS
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @note It marks the pages free in the bitmap
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_free_blocks(MARIA_HA *info,
+ LSN lsn __attribute__((unused)),
+ const uchar *header)
+{
+ MARIA_SHARE *share= info->s;
+ uint ranges;
+ DBUG_ENTER("_ma_apply_redo_free_blocks");
+
+ ranges= pagerange_korr(header);
+ header+= PAGERANGE_STORE_SIZE;
+ DBUG_ASSERT(ranges > 0);
+
+ while (ranges--)
+ {
+ my_bool res;
+ uint page_range;
+ ulonglong page, start_page;
+
+ start_page= page= page_korr(header);
+ header+= PAGE_STORE_SIZE;
+ /* Page range may have this bit set to indicate a tail page */
+ page_range= pagerange_korr(header) & ~TAIL_BIT;
+ DBUG_ASSERT(page_range > 0);
+
+ header+= PAGERANGE_STORE_SIZE;
+
+ DBUG_PRINT("info", ("page: %lu pages: %u", (long) page, page_range));
+ DBUG_ASSERT((page_range & TAIL_BIT) == 0);
+
+ /** @todo leave bitmap lock to the bitmap code... */
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+ res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, start_page,
+ page_range);
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ if (res)
+ DBUG_RETURN(res);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_FREE_HEAD_OR_TAIL
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @note It marks the page free in the bitmap, and sets the directory's count
+ to 0.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
+ const uchar *header)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *buff;
+ ulonglong page;
+ MARIA_PINNED_PAGE page_link;
+ my_bool res;
+ DBUG_ENTER("_ma_apply_redo_free_head_or_tail");
+
+ page= page_korr(header);
+
+ if (!(buff= pagecache_read(share->pagecache,
+ &info->dfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE, &page_link.link)))
+ {
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(1);
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ }
+ else
+ {
+ buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE;
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ {
+ uint number_of_records= (uint) buff[DIR_COUNT_OFFSET];
+ uchar *dir= dir_entry_pos(buff, info->s->block_size,
+ number_of_records-1);
+ buff[DIR_FREE_OFFSET]= END_OF_DIR_FREE_LIST;
+ bzero(dir, number_of_records * DIR_ENTRY_SIZE);
+ }
+#endif
+
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ }
+ /** @todo leave bitmap lock to the bitmap code... */
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+ res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, page, 1);
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ if (res)
+ DBUG_RETURN(res);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_INSERT_ROW_BLOBS
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @note Write full pages (full head & blob pages)
+
+ @return Operation status
+ @retval 0 OK
+ @retval !=0 Error
+*/
+
+uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
+ LSN lsn, const uchar *header)
+{
+ MARIA_SHARE *share= info->s;
+ const uchar *data;
+ uint data_size= FULL_PAGE_SIZE(info->s->block_size);
+ uint blob_count, ranges;
+ DBUG_ENTER("_ma_apply_redo_insert_row_blobs");
+
+ ranges= pagerange_korr(header);
+ header+= PAGERANGE_STORE_SIZE;
+ blob_count= pagerange_korr(header);
+ header+= PAGERANGE_STORE_SIZE;
+ DBUG_ASSERT(ranges >= blob_count);
+
+ data= (header + ranges * ROW_EXTENT_SIZE +
+ blob_count * (SUB_RANGE_SIZE + BLOCK_FILLER_SIZE));
+
+ while (blob_count--)
+ {
+ uint sub_ranges, empty_space;
+
+ sub_ranges= uint2korr(header);
+ header+= SUB_RANGE_SIZE;
+ empty_space= uint2korr(header);
+ header+= BLOCK_FILLER_SIZE;
+ DBUG_ASSERT(sub_ranges <= blob_count + 1 && empty_space < data_size);
+
+ while (sub_ranges--)
+ {
+ uint i;
+ uint res;
+ uint page_range;
+ ulonglong page, start_page;
+ uchar *buff;
+
+ start_page= page= page_korr(header);
+ header+= PAGE_STORE_SIZE;
+ page_range= pagerange_korr(header);
+ header+= PAGERANGE_STORE_SIZE;
+
+ for (i= page_range; i-- > 0 ; page++)
+ {
+ MARIA_PINNED_PAGE page_link;
+ enum pagecache_page_lock unlock_method;
+ enum pagecache_page_pin unpin_method;
+ uint length;
+
+ if (((page + 1) * info->s->block_size) >
+ info->state->data_file_length)
+ {
+ /* New page or half written page at end of file */
+ DBUG_PRINT("info", ("Enlarging data file from %lu to %lu",
+ (ulong) info->state->data_file_length,
+ (ulong) ((page + 1 ) * info->s->block_size)));
+ info->state->data_file_length= (page + 1) * info->s->block_size;
+ buff= info->keyread_buff;
+ info->keyread_buff_used= 1;
+ make_empty_page(info, buff, BLOB_PAGE);
+ unlock_method= PAGECACHE_LOCK_LEFT_UNLOCKED;
+ unpin_method= PAGECACHE_PIN_LEFT_UNPINNED;
+ }
+ else
+ {
+ share->pagecache->readwrite_flags&= ~MY_WME;
+ buff= pagecache_read(share->pagecache,
+ &info->dfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE, &page_link.link);
+ share->pagecache->readwrite_flags= share->pagecache->
+ org_readwrite_flags;
+ if (!buff)
+ {
+ if (my_errno != HA_ERR_FILE_TOO_SHORT &&
+ my_errno != HA_ERR_WRONG_CRC)
+ {
+ /* If not read outside of file */
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(my_errno);
+ }
+ /*
+ Physical file was too short, create new page. It can be that
+ recovery started with a file with N pages, wrote page N+2 into
+ pagecache (increased data_file_length but not physical file
+ length), now reads page N+1: the read fails.
+ */
+ buff= pagecache_block_link_to_buffer(page_link.link);
+ make_empty_page(info, buff, BLOB_PAGE);
+ }
+ else
+ {
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ continue;
+ }
+ }
+ unlock_method= PAGECACHE_LOCK_WRITE_UNLOCK;
+ unpin_method= PAGECACHE_UNPIN;
+ }
+
+ /*
+ Blob pages are never updated twice in same redo-undo chain, so
+ it's safe to update lsn for them here
+ */
+ lsn_store(buff, lsn);
+ buff[PAGE_TYPE_OFFSET]= BLOB_PAGE;
+
+ length= data_size;
+ if (i == 0 && sub_ranges == 0)
+ {
+ /* Last page may be only partly filled. */
+ length-= empty_space;
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ bzero(buff + info->s->block_size - PAGE_SUFFIX_SIZE - empty_space,
+ empty_space);
+#endif
+ }
+ memcpy(buff+ PAGE_TYPE_OFFSET + 1, data, length);
+ data+= length;
+ if (pagecache_write(share->pagecache,
+ &info->dfile, page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ unlock_method, unpin_method,
+ PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE))
+ DBUG_RETURN(my_errno);
+ }
+ /** @todo leave bitmap lock to the bitmap code... */
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+ res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, start_page,
+ page_range);
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ if (res)
+ DBUG_RETURN(res);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/****************************************************************************
+ Applying of UNDO entries
+****************************************************************************/
+
+my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header)
+{
+ ulonglong page;
+ uint rownr;
+ uchar *buff;
+ my_bool res= 1;
+ MARIA_PINNED_PAGE page_link;
+ MARIA_SHARE *share= info->s;
+ ha_checksum checksum;
+ LSN lsn;
+ DBUG_ENTER("_ma_apply_undo_row_insert");
+
+ page= page_korr(header);
+ header+= PAGE_STORE_SIZE;
+ rownr= dirpos_korr(header);
+ header+= DIRPOS_STORE_SIZE;
+ DBUG_PRINT("enter", ("Page: %lu rownr: %u", (ulong) page, rownr));
+
+ if (!(buff= pagecache_read(share->pagecache,
+ &info->dfile, page, 0,
+ info->buff, share->page_type,
+ PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ DBUG_RETURN(1);
+
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+
+ if (read_row_extent_info(info, buff, rownr))
+ DBUG_RETURN(1);
+
+ _ma_bitmap_flushable(share, 1);
+ if (delete_head_or_tail(info, page, rownr, 1, 1) ||
+ delete_tails(info, info->cur_row.tail_positions))
+ goto err;
+
+ if (info->cur_row.extents_count && free_full_pages(info, &info->cur_row))
+ goto err;
+
+ checksum= 0;
+ if (share->calc_checksum)
+ checksum= -ha_checksum_korr(header);
+ if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_ROW_INSERT,
+ share->calc_checksum != 0, checksum, &lsn, (void*) 0))
+ goto err;
+
+ res= 0;
+err:
+ _ma_bitmap_flushable(share, -1);
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+}
+
+
+/* Execute undo of a row delete (insert the row back somewhere) */
+
+my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header,
+ size_t header_length __attribute__((unused)))
+{
+ uchar *record;
+ const uchar *null_bits, *field_length_data;
+ MARIA_SHARE *share= info->s;
+ MARIA_ROW row;
+ uint *null_field_lengths;
+ ulong *blob_lengths;
+ MARIA_COLUMNDEF *column, *end_column;
+ my_bool res;
+ DBUG_ENTER("_ma_apply_undo_row_delete");
+
+ /*
+ Use cur row as a base; We need to make a copy as we will change
+ some buffers to point directly to 'header'
+ */
+ memcpy(&row, &info->cur_row, sizeof(row));
+ if (share->calc_checksum)
+ {
+ /*
+ We extract the checksum delta here, saving a recomputation in
+ allocate_and_write_block_record(). It's only an optimization.
+ */
+ row.checksum= - ha_checksum_korr(header);
+ header+= HA_CHECKSUM_STORE_SIZE;
+ }
+
+ null_field_lengths= row.null_field_lengths;
+ blob_lengths= row.blob_lengths;
+
+ /*
+ Fill in info->cur_row with information about the row, like in
+ calc_record_size(), to be used by write_block_record()
+ */
+
+ row.normal_length= row.char_length= row.varchar_length=
+ row.blob_length= row.extents_count= row.field_lengths_length= 0;
+
+ null_bits= header;
+ header+= share->base.null_bytes;
+ row.empty_bits= (uchar*) header;
+ header+= share->base.pack_bytes;
+ if (share->base.max_field_lengths)
+ {
+ row.field_lengths_length= uint2korr(header);
+ row.field_lengths= (uchar*) header + 2 ;
+ header+= 2 + row.field_lengths_length;
+ }
+ if (share->base.blobs)
+ row.blob_length= ma_get_length((uchar**) &header);
+
+ /* We need to build up a record (without blobs) in rec_buff */
+ if (!(record= my_malloc(share->base.reclength, MYF(MY_WME))))
+ DBUG_RETURN(1);
+
+ memcpy(record, null_bits, share->base.null_bytes);
+
+ /* Copy field information from header to record */
+
+ /* Handle constant length fields that are always present */
+ for (column= share->columndef,
+ end_column= column+ share->base.fixed_not_null_fields;
+ column < end_column;
+ column++)
+ {
+ memcpy(record + column->offset, header, column->length);
+ header+= column->length;
+ }
+
+ /* Handle NULL fields and CHAR/VARCHAR fields */
+ field_length_data= row.field_lengths;
+ for (end_column= share->columndef + share->base.fields;
+ column < end_column;
+ column++, null_field_lengths++)
+ {
+ if ((record[column->null_pos] & column->null_bit) ||
+ row.empty_bits[column->empty_pos] & column->empty_bit)
+ {
+ if (column->type != FIELD_BLOB)
+ *null_field_lengths= 0;
+ else
+ *blob_lengths++= 0;
+ if (share->calc_checksum)
+ bfill(record + column->offset, column->fill_length,
+ column->type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
+ continue;
+ }
+ switch (column->type) {
+ case FIELD_CHECK:
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_ZERO:
+ case FIELD_SKIP_PRESPACE: /* Not packed */
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ row.normal_length+= column->length;
+ *null_field_lengths= column->length;
+ memcpy(record + column->offset, header, column->length);
+ header+= column->length;
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ uint length;
+ if (column->length <= 255)
+ length= (uint) *field_length_data++;
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_length_data+= 2;
+ }
+ row.char_length+= length;
+ *null_field_lengths= length;
+ memcpy(record + column->offset, header, length);
+ if (share->calc_checksum)
+ bfill(record + column->offset + length, (column->length - length),
+ ' ');
+ header+= length;
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ uint length;
+ uchar *field_pos= record + column->offset;
+
+ /* 256 is correct as this includes the length uchar */
+ if (column->fill_length == 1)
+ {
+ field_pos[0]= *field_length_data;
+ length= (uint) *field_length_data;
+ }
+ else
+ {
+ field_pos[0]= field_length_data[0];
+ field_pos[1]= field_length_data[1];
+ length= uint2korr(field_length_data);
+ }
+ field_length_data+= column->fill_length;
+ field_pos+= column->fill_length;
+ row.varchar_length+= length;
+ *null_field_lengths= length;
+ memcpy(field_pos, header, length);
+ header+= length;
+ break;
+ }
+ case FIELD_BLOB:
+ {
+ /* Copy length of blob and pointer to blob data to record */
+ uchar *field_pos= record + column->offset;
+ uint size_length= column->length - portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length, field_length_data);
+
+ memcpy(field_pos, field_length_data, size_length);
+ field_length_data+= size_length;
+ memcpy(field_pos + size_length, &header, sizeof(&header));
+ header+= blob_length;
+ *blob_lengths++= blob_length;
+ row.blob_length+= blob_length;
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
+ row.head_length= (row.base_length +
+ share->base.fixed_not_null_fields_length +
+ row.field_lengths_length +
+ size_to_store_key_length(row.field_lengths_length) +
+ row.normal_length +
+ row.char_length + row.varchar_length);
+ row.total_length= (row.head_length + row.blob_length);
+ if (row.total_length < share->base.min_block_length)
+ row.total_length= share->base.min_block_length;
+
+ /* Row is now up to date. Time to insert the record */
+
+ res= allocate_and_write_block_record(info, record, &row, undo_lsn);
+ info->cur_row.lastpos= row.lastpos;
+ my_free(record, MYF(0));
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Execute undo of a row update
+
+ @fn _ma_apply_undo_row_update()
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header,
+ size_t header_length __attribute__((unused)))
+{
+ ulonglong page;
+ uint rownr, field_length_header;
+ MARIA_SHARE *share= info->s;
+ const uchar *field_length_data, *field_length_data_end;
+ uchar *current_record, *orig_record;
+ int error= 1;
+ MARIA_RECORD_POS record_pos;
+ ha_checksum checksum_delta;
+ DBUG_ENTER("_ma_apply_undo_row_update");
+ LINT_INIT(checksum_delta);
+
+ page= page_korr(header);
+ header+= PAGE_STORE_SIZE;
+ rownr= dirpos_korr(header);
+ header+= DIRPOS_STORE_SIZE;
+ record_pos= ma_recordpos(page, rownr);
+ info->cur_row.lastpos= record_pos; /* For key insert */
+ DBUG_PRINT("enter", ("Page: %lu rownr: %u", (ulong) page, rownr));
+
+ if (share->calc_checksum)
+ {
+ checksum_delta= ha_checksum_korr(header);
+ header+= HA_CHECKSUM_STORE_SIZE;
+ }
+ /*
+ Set header to point to old field values, generated by
+ fill_update_undo_parts()
+ */
+ field_length_header= ma_get_length((uchar**) &header);
+ field_length_data= header;
+ header+= field_length_header;
+ field_length_data_end= header;
+
+ /* Allocate buffer for current row & original row */
+ if (!(current_record= my_malloc(share->base.reclength * 2, MYF(MY_WME))))
+ DBUG_RETURN(1);
+ orig_record= current_record+ share->base.reclength;
+
+ /* Read current record */
+ if (_ma_read_block_record(info, current_record, record_pos))
+ goto err;
+
+ if (*field_length_data == 255)
+ {
+ /* Bitmap changed */
+ field_length_data++;
+ memcpy(orig_record, header, share->base.null_bytes);
+ header+= share->base.null_bytes;
+ }
+ else
+ memcpy(orig_record, current_record, share->base.null_bytes);
+ bitmap_clear_all(&info->changed_fields);
+
+ while (field_length_data < field_length_data_end)
+ {
+ uint field_nr= ma_get_length((uchar**) &field_length_data), field_length;
+ MARIA_COLUMNDEF *column= share->columndef + field_nr;
+ uchar *orig_field_pos= orig_record + column->offset;
+
+ bitmap_set_bit(&info->changed_fields, field_nr);
+ if (field_nr >= share->base.fixed_not_null_fields)
+ {
+ if (!(field_length= ma_get_length((uchar**) &field_length_data)))
+ {
+ /* Null field or empty field */
+ bfill(orig_field_pos, column->fill_length,
+ column->type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
+ continue;
+ }
+ }
+ else
+ field_length= column->length;
+
+ switch (column->type) {
+ case FIELD_CHECK:
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_ZERO:
+ case FIELD_SKIP_PRESPACE: /* Not packed */
+ memcpy(orig_field_pos, header, column->length);
+ header+= column->length;
+ break;
+ case FIELD_SKIP_ZERO: /* Number */
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ uint diff;
+ memcpy(orig_field_pos, header, field_length);
+ if ((diff= (column->length - field_length)))
+ bfill(orig_field_pos + column->length - diff, diff,
+ column->type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
+ header+= field_length;
+ }
+ break;
+ case FIELD_VARCHAR:
+ if (column->length <= 256)
+ {
+ *orig_field_pos++= (uchar) field_length;
+ }
+ else
+ {
+ int2store(orig_field_pos, field_length);
+ orig_field_pos+= 2;
+ }
+ memcpy(orig_field_pos, header, field_length);
+ header+= field_length;
+ break;
+ case FIELD_BLOB:
+ {
+ uint size_length= column->length - portable_sizeof_char_ptr;
+ _ma_store_blob_length(orig_field_pos, size_length, field_length);
+ memcpy_fixed(orig_field_pos + size_length, &header, sizeof(header));
+ header+= field_length;
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
+ copy_not_changed_fields(info, &info->changed_fields,
+ orig_record, current_record);
+
+ if (share->calc_checksum)
+ {
+ info->new_row.checksum= checksum_delta +
+ (info->cur_row.checksum= (*share->calc_checksum)(info, orig_record));
+ /* verify that record's content is sane */
+ DBUG_ASSERT(info->new_row.checksum ==
+ (*share->calc_checksum)(info, current_record));
+ }
+
+ /* Now records are up to date, execute the update to original values */
+ if (_ma_update_block_record2(info, record_pos, current_record, orig_record,
+ undo_lsn))
+ goto err;
+
+ error= 0;
+err:
+ my_free(current_record, MYF(0));
+ DBUG_RETURN(error);
+}
diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h
new file mode 100644
index 00000000000..a834b4788df
--- /dev/null
+++ b/storage/maria/ma_blockrec.h
@@ -0,0 +1,253 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Storage of records in block
+*/
+
+#define LSN_SIZE 7
+#define DIR_COUNT_SIZE 1 /* Stores number of rows on page */
+#define DIR_FREE_SIZE 1 /* Pointer to first free dir entry */
+#define EMPTY_SPACE_SIZE 2 /* Stores empty space on page */
+#define PAGE_TYPE_SIZE 1
+#define PAGE_SUFFIX_SIZE 4 /* Bytes for checksum */
+#define PAGE_HEADER_SIZE (LSN_SIZE + DIR_COUNT_SIZE + DIR_FREE_SIZE +\
+ EMPTY_SPACE_SIZE + PAGE_TYPE_SIZE)
+#define PAGE_OVERHEAD_SIZE (PAGE_HEADER_SIZE + DIR_ENTRY_SIZE + \
+ PAGE_SUFFIX_SIZE)
+#define BLOCK_RECORD_POINTER_SIZE 6
+
+#define FULL_PAGE_SIZE(block_size) ((block_size) - LSN_SIZE - \
+ PAGE_TYPE_SIZE - PAGE_SUFFIX_SIZE)
+
+#define ROW_EXTENT_PAGE_SIZE 5
+#define ROW_EXTENT_COUNT_SIZE 2
+#define SUB_RANGE_SIZE 2
+#define BLOCK_FILLER_SIZE 2
+#define ROW_EXTENT_SIZE (ROW_EXTENT_PAGE_SIZE + ROW_EXTENT_COUNT_SIZE)
+#define TAIL_BIT 0x8000 /* Bit in page_count to signify tail */
+/* Number of extents reserved MARIA_BITMAP_BLOCKS to store head part */
+#define ELEMENTS_RESERVED_FOR_MAIN_PART 4
+/* This is just used to prealloc a dynamic array */
+#define AVERAGE_BLOB_SIZE 1024L*1024L
+/* Number of pages to store continuous blob parts */
+#define BLOB_SEGMENT_MIN_SIZE 128
+
+/* Fields before 'row->null_field_lengths' used by find_where_to_split_row */
+#define EXTRA_LENGTH_FIELDS 3
+
+/* Size for the different parts in the row header (and head page) */
+#define FLAG_SIZE 1
+#define VERPTR_SIZE 7
+#define DIR_ENTRY_SIZE 4
+#define FIELD_OFFSET_SIZE 2 /* size of pointers to field starts */
+
+/* Minimum header size needed for a new row */
+#define BASE_ROW_HEADER_SIZE FLAG_SIZE
+#define TRANS_ROW_EXTRA_HEADER_SIZE TRANSID_SIZE
+
+#define PAGE_TYPE_MASK 7
+enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE };
+
+#define PAGE_TYPE_OFFSET LSN_SIZE
+#define DIR_COUNT_OFFSET (LSN_SIZE+PAGE_TYPE_SIZE)
+#define DIR_FREE_OFFSET (DIR_COUNT_OFFSET+DIR_COUNT_SIZE)
+#define EMPTY_SPACE_OFFSET (DIR_FREE_OFFSET+DIR_FREE_SIZE)
+
+#define PAGE_CAN_BE_COMPACTED 128 /* Bit in PAGE_TYPE */
+
+/* Bits used for flag uchar (one byte, first in record) */
+#define ROW_FLAG_TRANSID 1
+#define ROW_FLAG_VER_PTR 2
+#define ROW_FLAG_DELETE_TRANSID 4
+#define ROW_FLAG_NULLS_EXTENDED 8
+#define ROW_FLAG_EXTENTS 128
+#define ROW_FLAG_ALL (1+2+4+8+128)
+
+/******** Variables that affects how data pages are utilized ********/
+
+/* Minium size of tail segment */
+#define MIN_TAIL_SIZE 32
+
+/*
+ Fixed length part of Max possible header size; See row data structure
+ table in ma_blockrec.c.
+*/
+#define MAX_FIXED_HEADER_SIZE (FLAG_SIZE + 3 + ROW_EXTENT_SIZE + 3)
+#define TRANS_MAX_FIXED_HEADER_SIZE (MAX_FIXED_HEADER_SIZE + \
+ TRANSID_SIZE + VERPTR_SIZE + \
+ TRANSID_SIZE)
+
+/* We use 1 uchar in record header to store number of directory entries */
+#define MAX_ROWS_PER_PAGE 255
+#define END_OF_DIR_FREE_LIST ((uchar) 255)
+
+/* Bits for MARIA_BITMAP_BLOCKS->used */
+/* We stored data on disk in the block */
+#define BLOCKUSED_USED 1
+/* Bitmap on disk is block->org_bitmap_value ; Happens only on update */
+#define BLOCKUSED_USE_ORG_BITMAP 2
+/* We stored tail data on disk for the block */
+#define BLOCKUSED_TAIL 4
+
+/******* defines that affects allocation (density) of data *******/
+
+/*
+ If the tail part (from the main block or a blob) would use more than 75 % of
+ the size of page, store the tail on a full page instead of a shared
+ tail page.
+*/
+#define MAX_TAIL_SIZE(block_size) ((block_size) *3 / 4)
+
+/* Don't allocate memory for too many row extents on the stack */
+#define ROW_EXTENTS_ON_STACK 32
+
+/* Functions to convert MARIA_RECORD_POS to/from page:offset */
+
+static inline MARIA_RECORD_POS ma_recordpos(ulonglong page, uint dir_entry)
+{
+ DBUG_ASSERT(dir_entry <= 255);
+ return (MARIA_RECORD_POS) ((page << 8) | dir_entry);
+}
+
+static inline my_off_t ma_recordpos_to_page(MARIA_RECORD_POS record_pos)
+{
+ return record_pos >> 8;
+}
+
+static inline uint ma_recordpos_to_dir_entry(MARIA_RECORD_POS record_pos)
+{
+ return (uint) (record_pos & 255);
+}
+
+static inline uchar *dir_entry_pos(uchar *buff, uint block_size, uint pos)
+{
+ return (buff + block_size - DIR_ENTRY_SIZE * pos - PAGE_SUFFIX_SIZE -
+ DIR_ENTRY_SIZE);
+}
+
+/* ma_blockrec.c */
+void _ma_init_block_record_data(void);
+my_bool _ma_once_init_block_record(MARIA_SHARE *share, File dfile);
+my_bool _ma_once_end_block_record(MARIA_SHARE *share);
+my_bool _ma_init_block_record(MARIA_HA *info);
+void _ma_end_block_record(MARIA_HA *info);
+my_bool _ma_check_if_zero(uchar *pos, uint length);
+
+my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec, const uchar *newrec);
+my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record);
+int _ma_read_block_record(MARIA_HA *info, uchar *record,
+ MARIA_RECORD_POS record_pos);
+int _ma_read_block_record2(MARIA_HA *info, uchar *record,
+ uchar *data, uchar *end_of_data);
+int _ma_scan_block_record(MARIA_HA *info, uchar *record,
+ MARIA_RECORD_POS, my_bool);
+my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const uchar *record, MARIA_RECORD_POS pos);
+my_bool _ma_scan_init_block_record(MARIA_HA *info);
+void _ma_scan_end_block_record(MARIA_HA *info);
+int _ma_scan_remember_block_record(MARIA_HA *info,
+ MARIA_RECORD_POS *lastpos);
+void _ma_scan_restore_block_record(MARIA_HA *info,
+ MARIA_RECORD_POS lastpos);
+
+MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info,
+ const uchar *record);
+my_bool _ma_write_block_record(MARIA_HA *info, const uchar *record);
+my_bool _ma_write_abort_block_record(MARIA_HA *info);
+my_bool _ma_compare_block_record(register MARIA_HA *info,
+ register const uchar *record);
+
+/* ma_bitmap.c */
+my_bool _ma_bitmap_init(MARIA_SHARE *share, File file);
+my_bool _ma_bitmap_end(MARIA_SHARE *share);
+my_bool _ma_bitmap_flush(MARIA_SHARE *share);
+my_bool _ma_bitmap_flush_all(MARIA_SHARE *share);
+void _ma_bitmap_reset_cache(MARIA_SHARE *share);
+my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
+ MARIA_BITMAP_BLOCKS *result_blocks);
+my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks);
+my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents,
+ uint count);
+my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head,
+ uint empty_space);
+my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint page_count);
+my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint page_count);
+uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size);
+my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *new_row,
+ ulonglong page, uint free_size,
+ MARIA_BITMAP_BLOCKS *result_blocks);
+my_bool _ma_check_bitmap_data(MARIA_HA *info,
+ enum en_page_type page_type, ulonglong page,
+ uint empty_space, uint *bitmap_pattern);
+my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
+ enum en_page_type page_type,
+ ulonglong page,
+ uint *bitmap_pattern);
+void _ma_bitmap_delete_all(MARIA_SHARE *share);
+int _ma_bitmap_create_first(MARIA_SHARE *share);
+void _ma_bitmap_flushable(MARIA_SHARE *share, int non_flushable_inc);
+#ifndef DBUG_OFF
+void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
+ ulonglong page);
+#endif
+
+uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
+ uint page_type,
+ const uchar *header,
+ const uchar *data,
+ size_t data_length);
+uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
+ uint page_type,
+ const uchar *header);
+uint _ma_apply_redo_free_blocks(MARIA_HA *info, LSN lsn,
+ const uchar *header);
+uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
+ const uchar *header);
+uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
+ LSN lsn, const uchar *header);
+my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header);
+my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, size_t length);
+my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, size_t length);
+
+my_bool write_hook_for_redo(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
+ void *hook_arg);
+my_bool write_hook_for_undo(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
+ void *hook_arg);
+my_bool write_hook_for_redo_delete_all(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg);
+my_bool write_hook_for_undo_row_insert(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg);
+my_bool write_hook_for_undo_row_delete(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg);
+my_bool write_hook_for_undo_row_update(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg);
+my_bool write_hook_for_file_id(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
+ void *hook_arg);
diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c
new file mode 100644
index 00000000000..0cd4a356f70
--- /dev/null
+++ b/storage/maria/ma_cache.c
@@ -0,0 +1,107 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Functions for read record cacheing with maria
+ Used for reading dynamic/compressed records from datafile.
+
+ Can fetch data directly from file (outside cache),
+ if reading a small chunk straight before the cached part (with possible
+ overlap).
+
+ Can be explicitly asked not to use cache (by not setting READING_NEXT in
+ flag) - useful for occasional out-of-cache reads, when the next read is
+ expected to hit the cache again.
+
+ Allows "partial read" errors in the record header (when READING_HEADER flag
+ is set) - unread part is bzero'ed
+
+ Note: out-of-cache reads are enabled for shared IO_CACHE's too,
+ as these reads will be cached by OS cache (and my_pread is always atomic)
+*/
+
+
+#include "maria_def.h"
+
+int _ma_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos, uint length,
+ int flag)
+{
+ uint read_length,in_buff_length;
+ my_off_t offset;
+ uchar *in_buff_pos;
+ DBUG_ENTER("_ma_read_cache");
+
+ if (pos < info->pos_in_file)
+ {
+ read_length=length;
+ if ((my_off_t) read_length > (my_off_t) (info->pos_in_file-pos))
+ read_length=(uint) (info->pos_in_file-pos);
+ info->seek_not_done=1;
+ if (my_pread(info->file,buff,read_length,pos,MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ if (!(length-=read_length))
+ DBUG_RETURN(0);
+ pos+=read_length;
+ buff+=read_length;
+ }
+ if (pos >= info->pos_in_file &&
+ (offset= (my_off_t) (pos - info->pos_in_file)) <
+ (my_off_t) (info->read_end - info->request_pos))
+ {
+ in_buff_pos=info->request_pos+(uint) offset;
+ in_buff_length= min(length,(size_t) (info->read_end-in_buff_pos));
+ memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length);
+ if (!(length-=in_buff_length))
+ DBUG_RETURN(0);
+ pos+=in_buff_length;
+ buff+=in_buff_length;
+ }
+ else
+ in_buff_length=0;
+ if (flag & READING_NEXT)
+ {
+ if (pos != (info->pos_in_file +
+ (uint) (info->read_end - info->request_pos)))
+ {
+ info->pos_in_file=pos; /* Force start here */
+ info->read_pos=info->read_end=info->request_pos; /* Everything used */
+ info->seek_not_done=1;
+ }
+ else
+ info->read_pos=info->read_end; /* All block used */
+ if (!(*info->read_function)(info,buff,length))
+ DBUG_RETURN(0);
+ read_length=info->error;
+ }
+ else
+ {
+ info->seek_not_done=1;
+ if ((read_length=my_pread(info->file,buff,length,pos,MYF(0))) == length)
+ DBUG_RETURN(0);
+ }
+ if (!(flag & READING_HEADER) || (int) read_length == -1 ||
+ read_length+in_buff_length < 3)
+ {
+ DBUG_PRINT("error",
+ ("Error %d reading next-multi-part block (Got %d bytes)",
+ my_errno, (int) read_length));
+ if (!my_errno || my_errno == HA_ERR_FILE_TOO_SHORT)
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(1);
+ }
+ bzero(buff+read_length,MARIA_BLOCK_INFO_HEADER_LENGTH - in_buff_length -
+ read_length);
+ DBUG_RETURN(0);
+} /* _ma_read_cache */
diff --git a/storage/maria/ma_changed.c b/storage/maria/ma_changed.c
new file mode 100644
index 00000000000..4d0964581f6
--- /dev/null
+++ b/storage/maria/ma_changed.c
@@ -0,0 +1,33 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Check if somebody has changed table since last check. */
+
+#include "maria_def.h"
+
+ /* Return 0 if table isn't changed */
+
+int maria_is_changed(MARIA_HA *info)
+{
+ int result;
+ DBUG_ENTER("maria_is_changed");
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(-1);
+ VOID(_ma_writeinfo(info,0));
+ result=(int) info->data_changed;
+ info->data_changed=0;
+ DBUG_PRINT("exit",("result: %d",result));
+ DBUG_RETURN(result);
+}
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
new file mode 100644
index 00000000000..940d021a049
--- /dev/null
+++ b/storage/maria/ma_check.c
@@ -0,0 +1,5959 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Describe, check and repair of MARIA tables */
+
+/*
+ About checksum calculation.
+
+ There are two types of checksums. Table checksum and row checksum.
+
+ Row checksum is an additional uchar at the end of dynamic length
+ records. It must be calculated if the table is configured for them.
+ Otherwise they must not be used. The variable
+ MYISAM_SHARE::calc_checksum determines if row checksums are used.
+ MI_INFO::checksum is used as temporary storage during row handling.
+ For parallel repair we must assure that only one thread can use this
+ variable. There is no problem on the write side as this is done by one
+ thread only. But when checking a record after read this could go
+ wrong. But since all threads read through a common read buffer, it is
+ sufficient if only one thread checks it.
+
+ Table checksum is an eight uchar value in the header of the index file.
+ It can be calculated even if row checksums are not used. The variable
+ MI_CHECK::glob_crc is calculated over all records.
+ MI_SORT_PARAM::calc_checksum determines if this should be done. This
+ variable is not part of MI_CHECK because it must be set per thread for
+ parallel repair. The global glob_crc must be changed by one thread
+ only. And it is sufficient to calculate the checksum once only.
+*/
+
+#include "ma_ftdefs.h"
+#include <myisamchk.h>
+#include <stdarg.h>
+#include <my_getopt.h>
+#ifdef HAVE_SYS_VADVISE_H
+#include <sys/vadvise.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#include "ma_rt_index.h"
+#include "ma_blockrec.h"
+#include "trnman_public.h"
+
+/* Functions defined in this file */
+
+static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
+static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ my_off_t page, uchar *buff, ha_rows *keys,
+ ha_checksum *key_checksum, uint level);
+static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
+static ha_checksum calc_checksum(ha_rows count);
+static int writekeys(MARIA_SORT_PARAM *sort_param);
+static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t pagepos, File new_file);
+static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
+static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
+static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
+static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
+ const void *b);
+static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
+ const uchar *a);
+static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
+static my_off_t get_record_for_key(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ const uchar *key);
+static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
+ reg1 SORT_KEY_BLOCKS *key_block,
+ const uchar *key, my_off_t prev_block);
+static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
+/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
+ uint buffer_length);
+static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
+static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
+static void restore_data_file_type(MARIA_SHARE *share);
+static void change_data_file_descriptor(MARIA_HA *info, File new_file);
+static void unuse_data_file_descriptor(MARIA_HA *info);
+static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
+ MARIA_HA *info, uchar *record);
+static void copy_data_file_state(MARIA_STATE_INFO *to,
+ MARIA_STATE_INFO *from);
+static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info);
+static void report_keypage_fault(HA_CHECK *param, my_off_t position);
+my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
+
+
+void maria_chk_init(HA_CHECK *param)
+{
+ bzero((uchar*) param,sizeof(*param));
+ param->opt_follow_links=1;
+ param->keys_in_use= ~(ulonglong) 0;
+ param->search_after_block=HA_OFFSET_ERROR;
+ param->auto_increment_value= 0;
+ param->use_buffers=USE_BUFFER_INIT;
+ param->read_buffer_length=READ_BUFFER_INIT;
+ param->write_buffer_length=READ_BUFFER_INIT;
+ param->sort_buffer_length=SORT_BUFFER_INIT;
+ param->sort_key_blocks=BUFFERS_WHEN_SORTING;
+ param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
+ param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
+ param->start_check_pos=0;
+ param->max_record_length= LONGLONG_MAX;
+ param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
+ param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+}
+
+ /* Check the status flags for the table */
+
+int maria_chk_status(HA_CHECK *param, register MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ if (maria_is_crashed_on_repair(info))
+ _ma_check_print_warning(param,
+ "Table is marked as crashed and last repair failed");
+ else if (maria_is_crashed(info))
+ _ma_check_print_warning(param,
+ "Table is marked as crashed");
+ if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
+ {
+ /* Don't count this as a real warning, as check can correct this ! */
+ uint save=param->warning_printed;
+ _ma_check_print_warning(param,
+ share->state.open_count==1 ?
+ "%d client is using or hasn't closed the table properly" :
+ "%d clients are using or haven't closed the table properly",
+ share->state.open_count);
+ /* If this will be fixed by the check, forget the warning */
+ if (param->testflag & T_UPDATE_STATE)
+ param->warning_printed=save;
+ }
+ return 0;
+}
+
+/*
+ Check delete links in row data
+*/
+
+int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag)
+{
+ MARIA_SHARE *share= info->s;
+ reg2 ha_rows i;
+ uint delete_link_length;
+ my_off_t empty,next_link,old_link;
+ char buff[22],buff2[22];
+ DBUG_ENTER("maria_chk_del");
+
+ LINT_INIT(old_link);
+
+ if (share->data_file_type == BLOCK_RECORD)
+ DBUG_RETURN(0); /* No delete links here */
+
+ param->record_checksum=0;
+ delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
+ share->rec_reflength+1);
+
+ if (!(test_flag & T_SILENT))
+ puts("- check record delete-chain");
+
+ next_link=share->state.dellink;
+ if (info->state->del == 0)
+ {
+ if (test_flag & T_VERBOSE)
+ {
+ puts("No recordlinks");
+ }
+ }
+ else
+ {
+ if (test_flag & T_VERBOSE)
+ printf("Recordlinks: ");
+ empty=0;
+ for (i= info->state->del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
+ {
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(1);
+ if (test_flag & T_VERBOSE)
+ printf(" %9s",llstr(next_link,buff));
+ if (next_link >= info->state->data_file_length)
+ goto wrong;
+ if (my_pread(info->dfile.file, (uchar*) buff, delete_link_length,
+ next_link,MYF(MY_NABP)))
+ {
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"Can't read delete-link at filepos: %s",
+ llstr(next_link,buff));
+ DBUG_RETURN(1);
+ }
+ if (*buff != '\0')
+ {
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"Record at pos: %s is not remove-marked",
+ llstr(next_link,buff));
+ goto wrong;
+ }
+ if (share->options & HA_OPTION_PACK_RECORD)
+ {
+ my_off_t prev_link=mi_sizekorr(buff+12);
+ if (empty && prev_link != old_link)
+ {
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2));
+ goto wrong;
+ }
+ old_link=next_link;
+ next_link=mi_sizekorr(buff+4);
+ empty+=mi_uint3korr(buff+1);
+ }
+ else
+ {
+ param->record_checksum+=(ha_checksum) next_link;
+ next_link= _ma_rec_pos(info, buff+1);
+ empty+=share->base.pack_reclength;
+ }
+ }
+ if (test_flag & T_VERBOSE)
+ puts("\n");
+ if (empty != info->state->empty)
+ {
+ _ma_check_print_warning(param,
+ "Found %s deleted space in delete link chain. Should be %s",
+ llstr(empty,buff2),
+ llstr(info->state->empty,buff));
+ }
+ if (next_link != HA_OFFSET_ERROR)
+ {
+ _ma_check_print_error(param,
+ "Found more than the expected %s deleted rows in delete link chain",
+ llstr(info->state->del, buff));
+ goto wrong;
+ }
+ if (i != 0)
+ {
+ _ma_check_print_error(param,
+ "Found %s deleted rows in delete link chain. Should be %s",
+ llstr(info->state->del - i, buff2),
+ llstr(info->state->del, buff));
+ goto wrong;
+ }
+ }
+ DBUG_RETURN(0);
+
+wrong:
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"record delete-link-chain corrupted");
+ DBUG_RETURN(1);
+} /* maria_chk_del */
+
+
+ /* Check delete links in index file */
+
+static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
+ my_off_t next_link)
+{
+ MARIA_SHARE *share= info->s;
+ uint block_size= share->block_size;
+ ha_rows records;
+ char llbuff[21], llbuff2[21];
+ uchar *buff;
+ DBUG_ENTER("check_k_link");
+
+ records= (ha_rows) (info->state->key_file_length / block_size);
+ while (next_link != HA_OFFSET_ERROR && records > 0)
+ {
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(1);
+ if (param->testflag & T_VERBOSE)
+ printf("%16s",llstr(next_link,llbuff));
+
+ /* Key blocks must lay within the key file length entirely. */
+ if (next_link + block_size > info->state->key_file_length)
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "Invalid key block position: %s "
+ "key block size: %u file_length: %s",
+ llstr(next_link, llbuff), block_size,
+ llstr(info->state->key_file_length, llbuff2));
+ DBUG_RETURN(1);
+ /* purecov: end */
+ }
+
+ /* Key blocks must be aligned at block_size */
+ if (next_link & (block_size -1))
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "Mis-aligned key block: %s "
+ "minimum key block length: %u",
+ llstr(next_link, llbuff),
+ block_size);
+ DBUG_RETURN(1);
+ /* purecov: end */
+ }
+
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (!(buff= pagecache_read(share->pagecache,
+ &share->kfile, next_link/block_size,
+ DFLT_INIT_HITS,
+ (uchar*) info->buff,
+ PAGECACHE_READ_UNKNOWN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "key cache read error for block: %s",
+ llstr(next_link,llbuff));
+ DBUG_RETURN(1);
+ /* purecov: end */
+ }
+ if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
+ _ma_check_print_error(param, "Page at %s is not delete marked",
+ llstr(next_link, llbuff));
+
+ next_link= mi_sizekorr(buff + share->keypage_header);
+ records--;
+ param->key_file_blocks+=block_size;
+ }
+ if (param->testflag & T_VERBOSE)
+ {
+ if (next_link != HA_OFFSET_ERROR)
+ printf("%16s\n",llstr(next_link,llbuff));
+ else
+ puts("");
+ }
+ DBUG_RETURN (next_link != HA_OFFSET_ERROR);
+} /* check_k_link */
+
+
+ /* Check sizes of files */
+
+int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+ int error;
+ register my_off_t skr,size;
+ char buff[22],buff2[22];
+ DBUG_ENTER("maria_chk_size");
+
+ if (!(param->testflag & T_SILENT))
+ puts("- check file-size");
+
+ /*
+ The following is needed if called externally (not from maria_chk).
+ To get a correct physical size we need to flush them.
+ */
+ if ((error= _ma_flush_table_files(info,
+ MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
+ _ma_check_print_error(param, "Failed to flush data or index file");
+
+ size= my_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
+ if ((skr=(my_off_t) info->state->key_file_length) != size)
+ {
+ /* Don't give error if file generated by mariapack */
+ if (skr > size && maria_is_any_key_active(share->state.key_map))
+ {
+ error=1;
+ _ma_check_print_error(param,
+ "Size of indexfile is: %-8s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ }
+ else if (!(param->testflag & T_VERY_SILENT))
+ _ma_check_print_warning(param,
+ "Size of indexfile is: %-8s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ }
+ if (!(param->testflag & T_VERY_SILENT) &&
+ ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
+ ulonglong2double(info->state->key_file_length) >
+ ulonglong2double(share->base.margin_key_file_length)*0.9)
+ _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
+ llstr(info->state->key_file_length,buff),
+ llstr(share->base.max_key_file_length-1,buff));
+
+ size= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
+ skr=(my_off_t) info->state->data_file_length;
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ skr+= MEMMAP_EXTRA_MARGIN;
+#ifdef USE_RELOC
+ if (info->data_file_type == STATIC_RECORD &&
+ skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
+ skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
+#endif
+ if (skr != size)
+ {
+ info->state->data_file_length=size; /* Skip other errors */
+ if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
+ {
+ error=1;
+ _ma_check_print_error(param,"Size of datafile is: %-9s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ }
+ else
+ {
+ _ma_check_print_warning(param,
+ "Size of datafile is: %-9s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ }
+ }
+ if (!(param->testflag & T_VERY_SILENT) &&
+ !(share->options & HA_OPTION_COMPRESS_RECORD) &&
+ ulonglong2double(info->state->data_file_length) >
+ (ulonglong2double(share->base.max_data_file_length)*0.9))
+ _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
+ llstr(info->state->data_file_length,buff),
+ llstr(share->base.max_data_file_length-1,buff2));
+ DBUG_RETURN(error);
+} /* maria_chk_size */
+
+
+/* Check keys */
+
+int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
+{
+ uint key,found_keys=0,full_text_keys=0,result=0;
+ ha_rows keys;
+ ha_checksum old_record_checksum,init_checksum;
+ my_off_t all_keydata,all_totaldata,key_totlength,length;
+ double *rec_per_key_part;
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *keyinfo;
+ char buff[22],buff2[22];
+ DBUG_ENTER("maria_chk_key");
+
+ if (!(param->testflag & T_SILENT))
+ puts("- check key delete-chain");
+
+ param->key_file_blocks=share->base.keystart;
+ if (check_k_link(param, info, share->state.key_del))
+ {
+ if (param->testflag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"key delete-link-chain corrupted");
+ DBUG_RETURN(-1);
+ }
+
+ if (!(param->testflag & T_SILENT)) puts("- check index reference");
+
+ all_keydata=all_totaldata=key_totlength=0;
+ init_checksum=param->record_checksum;
+ old_record_checksum=0;
+ if (share->data_file_type == STATIC_RECORD)
+ old_record_checksum= (calc_checksum(info->state->records +
+ info->state->del-1) *
+ share->base.pack_reclength);
+ rec_per_key_part= param->new_rec_per_key_part;
+ for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
+ rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
+ {
+ param->key_crc[key]=0;
+ if (! maria_is_key_active(share->state.key_map, key))
+ {
+ /* Remember old statistics for key */
+ memcpy((char*) rec_per_key_part,
+ (char*) (share->state.rec_per_key_part +
+ (uint) (rec_per_key_part - param->new_rec_per_key_part)),
+ keyinfo->keysegs*sizeof(*rec_per_key_part));
+ continue;
+ }
+ found_keys++;
+
+ param->record_checksum=init_checksum;
+
+ bzero((char*) &param->unique_count,sizeof(param->unique_count));
+ bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
+
+ if ((!(param->testflag & T_SILENT)))
+ printf ("- check data record references index: %d\n",key+1);
+ if (keyinfo->flag & HA_FULLTEXT)
+ full_text_keys++;
+ if (share->state.key_root[key] == HA_OFFSET_ERROR)
+ {
+ if (info->state->records != 0 && !(keyinfo->flag & HA_FULLTEXT))
+ _ma_check_print_error(param, "Key tree %u is empty", key + 1);
+ goto do_stat;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, share->state.key_root[key],
+ PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
+ info->buff, 0, 0))
+ {
+ report_keypage_fault(param, share->state.key_root[key]);
+ if (!(param->testflag & T_INFO))
+ DBUG_RETURN(-1);
+ result= -1;
+ continue;
+ }
+ param->key_file_blocks+=keyinfo->block_length;
+ keys=0;
+ param->keydata=param->totaldata=0;
+ param->key_blocks=0;
+ param->max_level=0;
+ if (chk_index(param,info,keyinfo,share->state.key_root[key],info->buff,
+ &keys, param->key_crc+key,1))
+ DBUG_RETURN(-1);
+ if(!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL)))
+ {
+ if (keys != info->state->records)
+ {
+ _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
+ llstr(info->state->records,buff2));
+ if (!(param->testflag & T_INFO))
+ DBUG_RETURN(-1);
+ result= -1;
+ continue;
+ }
+ if ((found_keys - full_text_keys == 1 &&
+ !(share->data_file_type == STATIC_RECORD)) ||
+ (param->testflag & T_DONT_CHECK_CHECKSUM))
+ old_record_checksum= param->record_checksum;
+ else if (old_record_checksum != param->record_checksum)
+ {
+ if (key)
+ _ma_check_print_error(param,
+ "Key %u doesn't point at same records as "
+ "key 1",
+ key+1);
+ else
+ _ma_check_print_error(param,"Key 1 doesn't point at all records");
+ if (!(param->testflag & T_INFO))
+ DBUG_RETURN(-1);
+ result= -1;
+ continue;
+ }
+ }
+ if ((uint) share->base.auto_key -1 == key)
+ {
+ /* Check that auto_increment key is bigger than max key value */
+ ulonglong auto_increment;
+ const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
+ info->lastinx=key;
+ _ma_read_key_record(info, info->rec_buff, 0);
+ auto_increment=
+ ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
+ keyseg->type);
+ if (auto_increment > share->state.auto_increment)
+ {
+ _ma_check_print_warning(param, "Auto-increment value: %s is smaller "
+ "than max used value: %s",
+ llstr(share->state.auto_increment,buff2),
+ llstr(auto_increment, buff));
+ }
+ if (param->testflag & T_AUTO_INC)
+ {
+ set_if_bigger(share->state.auto_increment,
+ auto_increment);
+ set_if_bigger(share->state.auto_increment,
+ param->auto_increment_value);
+ }
+
+ /* Check that there isn't a row with auto_increment = 0 in the table */
+ maria_extra(info,HA_EXTRA_KEYREAD,0);
+ bzero(info->lastkey,keyinfo->seg->length);
+ if (!maria_rkey(info, info->rec_buff, key, (const uchar*) info->lastkey,
+ (key_part_map)1, HA_READ_KEY_EXACT))
+ {
+ /* Don't count this as a real warning, as maria_chk can't correct it */
+ uint save=param->warning_printed;
+ _ma_check_print_warning(param, "Found row where the auto_increment "
+ "column has the value 0");
+ param->warning_printed=save;
+ }
+ maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
+ }
+
+ length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
+ if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
+ printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n",
+ key+1,
+ (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
+ (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
+ my_off_t2double(length)),
+ param->max_level);
+ all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;
+
+do_stat:
+ if (param->testflag & T_STATISTICS)
+ maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ param->notnull_count: NULL,
+ (ulonglong)info->state->records);
+ }
+ if (param->testflag & T_INFO)
+ {
+ if (all_totaldata != 0L && found_keys > 0)
+ printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n",
+ (int) (my_off_t2double(all_keydata)*100.0/
+ my_off_t2double(all_totaldata)),
+ (int) ((my_off_t2double(key_totlength) -
+ my_off_t2double(all_keydata))*100.0/
+ my_off_t2double(key_totlength)));
+ else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
+ puts("");
+ }
+ if (param->key_file_blocks != info->state->key_file_length &&
+ share->state.key_map == ~(ulonglong) 0)
+ _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
+ if (found_keys != full_text_keys)
+ param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */
+ else
+ param->record_checksum=0;
+ DBUG_RETURN(result);
+} /* maria_chk_key */
+
+
+
+static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t page, uchar *buff, ha_rows *keys,
+ ha_checksum *key_checksum, uint level)
+{
+ char llbuff[22],llbuff2[22];
+ DBUG_ENTER("chk_index_down");
+
+ /* Key blocks must lay within the key file length entirely. */
+ if (page + keyinfo->block_length > info->state->key_file_length)
+ {
+ /* purecov: begin tested */
+ /* Give it a chance to fit in the real file size. */
+ my_off_t max_length= my_seek(info->s->kfile.file, 0L, MY_SEEK_END,
+ MYF(MY_THREADSAFE));
+ _ma_check_print_error(param, "Invalid key block position: %s "
+ "key block size: %u file_length: %s",
+ llstr(page, llbuff), keyinfo->block_length,
+ llstr(info->state->key_file_length, llbuff2));
+ if (page + keyinfo->block_length > max_length)
+ goto err;
+ /* Fix the remembered key file length. */
+ info->state->key_file_length= (max_length &
+ ~ (my_off_t) (keyinfo->block_length - 1));
+ /* purecov: end */
+ }
+
+ /* Key blocks must be aligned at block length */
+ if (page & (info->s->block_size -1))
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "Mis-aligned key block: %s "
+ "key block length: %u",
+ llstr(page, llbuff), info->s->block_size);
+ goto err;
+ /* purecov: end */
+ }
+
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, buff, 0, 0))
+ {
+ report_keypage_fault(param, page);
+ goto err;
+ }
+ param->key_file_blocks+=keyinfo->block_length;
+ if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level))
+ goto err;
+
+ DBUG_RETURN(0);
+
+ /* purecov: begin tested */
+err:
+ DBUG_RETURN(1);
+ /* purecov: end */
+}
+
+
+/*
+ "Ignore NULLs" statistics collection method: process first index tuple.
+
+ SYNOPSIS
+ maria_collect_stats_nonulls_first()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ key IN Key values tuple
+
+ DESCRIPTION
+ Process the first index tuple - find out which prefix tuples don't
+ contain NULLs, and update the array of notnull counters accordingly.
+*/
+
+static
+void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
+ const uchar *key)
+{
+ uint first_null, kp;
+ first_null= ha_find_null(keyseg, (uchar*) key) - keyseg;
+ /*
+ All prefix tuples that don't include keypart_{first_null} are not-null
+ tuples (and all others aren't), increment counters for them.
+ */
+ for (kp= 0; kp < first_null; kp++)
+ notnull[kp]++;
+}
+
+
+/*
+ "Ignore NULLs" statistics collection method: process next index tuple.
+
+ SYNOPSIS
+ maria_collect_stats_nonulls_next()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ prev_key IN Previous key values tuple
+ last_key IN Next key values tuple
+
+ DESCRIPTION
+ Process the next index tuple:
+ 1. Find out which prefix tuples of last_key don't contain NULLs, and
+ update the array of notnull counters accordingly.
+ 2. Find the first keypart number where the prev_key and last_key tuples
+ are different(A), or last_key has NULL value(B), and return it, so the
+ caller can count number of unique tuples for each key prefix. We don't
+ need (B) to be counted, and that is compensated back in
+ maria_update_key_parts().
+
+ RETURN
+ 1 + number of first keypart where values differ or last_key tuple has NULL
+*/
+
+static
+int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
+ const uchar *prev_key,
+ const uchar *last_key)
+{
+ uint diffs[2];
+ uint first_null_seg, kp;
+ HA_KEYSEG *seg;
+
+ /*
+ Find the first keypart where values are different or either of them is
+ NULL. We get results in diffs array:
+ diffs[0]= 1 + number of first different keypart
+ diffs[1]=offset: (last_key + diffs[1]) points to first value in
+ last_key that is NULL or different from corresponding
+ value in prev_key.
+ */
+ ha_key_cmp(keyseg, (uchar*) prev_key, (uchar*) last_key, USE_WHOLE_KEY,
+ SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
+ seg= keyseg + diffs[0] - 1;
+
+ /* Find first NULL in last_key */
+ first_null_seg= ha_find_null(seg, (uchar*) last_key + diffs[1]) - keyseg;
+ for (kp= 0; kp < first_null_seg; kp++)
+ notnull[kp]++;
+
+ /*
+ Return 1+ number of first key part where values differ. Don't care if
+ these were NULLs and not .... We compensate for that in
+ maria_update_key_parts.
+ */
+ return diffs[0];
+}
+
+
+/* Check if index is ok */
+
+static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t page, uchar *buff, ha_rows *keys,
+ ha_checksum *key_checksum, uint level)
+{
+ int flag;
+ uint used_length,comp_flag,nod_flag,key_length=0;
+ uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
+ my_off_t next_page,record;
+ MARIA_SHARE *share= info->s;
+ char llbuff[22];
+ uint diff_pos[2];
+ DBUG_ENTER("chk_index");
+ DBUG_DUMP("buff", buff, _ma_get_page_used(share, buff));
+
+ /* TODO: implement appropriate check for RTree keys */
+ if (keyinfo->flag & HA_SPATIAL)
+ DBUG_RETURN(0);
+
+ if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not enough memory for keyblock");
+ DBUG_RETURN(-1);
+ }
+
+ if (keyinfo->flag & HA_NOSAME)
+ comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* Not real duplicates */
+ else
+ comp_flag=SEARCH_SAME; /* Keys in positionorder */
+
+ _ma_get_used_and_nod(share, buff, used_length, nod_flag);
+ keypos= buff + share->keypage_header + nod_flag;
+ endpos= buff + used_length;
+
+ param->keydata+= used_length;
+ param->totaldata+= keyinfo->block_length; /* INFO */
+ param->key_blocks++;
+ if (level > param->max_level)
+ param->max_level=level;
+
+ if (_ma_get_keynr(share, buff) != (uint) (keyinfo - share->keyinfo))
+ _ma_check_print_error(param, "Page at %s is not marked for index %u",
+ llstr(page, llbuff),
+ (uint) (keyinfo - share->keyinfo));
+
+ if (used_length > keyinfo->block_length)
+ {
+ _ma_check_print_error(param,"Wrong pageinfo at page: %s",
+ llstr(page,llbuff));
+ goto err;
+ }
+ for ( ;; )
+ {
+ if (*_ma_killed_ptr(param))
+ goto err;
+ memcpy(info->lastkey, key, key_length);
+ info->lastkey_length= key_length;
+ if (nod_flag)
+ {
+ next_page= _ma_kpos(nod_flag,keypos);
+ if (chk_index_down(param,info,keyinfo,next_page,
+ temp_buff,keys,key_checksum,level+1))
+ goto err;
+ }
+ old_keypos=keypos;
+ if (keypos >= endpos ||
+ (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0)
+ break;
+ if (keypos > endpos)
+ {
+ _ma_check_print_error(param,"Wrong key block length at page: %s",
+ llstr(page,llbuff));
+ goto err;
+ }
+ if ((*keys)++ &&
+ (flag=ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key,
+ key_length, comp_flag, diff_pos)) >=0)
+ {
+ DBUG_DUMP("old", info->lastkey, info->lastkey_length);
+ DBUG_DUMP("new", key, key_length);
+ DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
+
+ if (comp_flag & SEARCH_FIND && flag == 0)
+ _ma_check_print_error(param,"Found duplicated key at page %s",
+ llstr(page,llbuff));
+ else
+ _ma_check_print_error(param,"Key in wrong position at page %s",
+ llstr(page,llbuff));
+ goto err;
+ }
+ if (param->testflag & T_STATISTICS)
+ {
+ if (*keys != 1L) /* not first_key */
+ {
+ if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
+ ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key,
+ USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
+ diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
+ param->notnull_count,
+ info->lastkey, key);
+ }
+ param->unique_count[diff_pos[0]-1]++;
+ }
+ else
+ {
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
+ key);
+ }
+ }
+ (*key_checksum)+= maria_byte_checksum((uchar*) key,
+ key_length- share->rec_reflength);
+ record= _ma_dpos(info,0,key+key_length);
+ if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
+ {
+ uint off;
+ int subkeys;
+ get_key_full_length_rdonly(off, key);
+ subkeys=ft_sintXkorr(key+off);
+ if (subkeys < 0)
+ {
+ ha_rows tmp_keys=0;
+ if (chk_index_down(param,info,&share->ft2_keyinfo,record,
+ temp_buff,&tmp_keys,key_checksum,1))
+ goto err;
+ if (tmp_keys + subkeys)
+ {
+ _ma_check_print_error(param,
+ "Number of words in the 2nd level tree "
+ "does not match the number in the header. "
+ "Parent word in on the page %s, offset %u",
+ llstr(page,llbuff), (uint) (old_keypos-buff));
+ goto err;
+ }
+ (*keys)+=tmp_keys-1;
+ continue;
+ }
+ /* fall through */
+ }
+ if (record >= info->state->data_file_length)
+ {
+#ifndef DBUG_OFF
+ char llbuff2[22], llbuff3[22];
+#endif
+ _ma_check_print_error(param,"Found key at page %s that points to record outside datafile",llstr(page,llbuff));
+ DBUG_PRINT("test",("page: %s record: %s filelength: %s",
+ llstr(page,llbuff),llstr(record,llbuff2),
+ llstr(info->state->data_file_length,llbuff3)));
+ DBUG_DUMP("key",(uchar*) key,key_length);
+ DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos));
+ goto err;
+ }
+ param->record_checksum+= (ha_checksum) record;
+ }
+ if (keypos != endpos)
+ {
+ _ma_check_print_error(param,
+ "Keyblock size at page %s is not correct. "
+ "Block length: %u key length: %u",
+ llstr(page, llbuff), used_length,
+ (uint) (keypos - buff));
+ goto err;
+ }
+ my_afree((uchar*) temp_buff);
+ DBUG_RETURN(0);
+ err:
+ my_afree((uchar*) temp_buff);
+ DBUG_RETURN(1);
+} /* chk_index */
+
+
+ /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */
+
+static ha_checksum calc_checksum(ha_rows count)
+{
+ ulonglong sum,a,b;
+ DBUG_ENTER("calc_checksum");
+
+ sum=0;
+ a=count; b=count+1;
+ if (a & 1)
+ b>>=1;
+ else
+ a>>=1;
+ while (b)
+ {
+ if (b & 1)
+ sum+=a;
+ a<<=1; b>>=1;
+ }
+ DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
+ DBUG_RETURN((ha_checksum) sum);
+} /* calc_checksum */
+
+
+ /* Calc length of key in normal isam */
+
+static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
+{
+ uint length;
+ HA_KEYSEG *keyseg;
+ DBUG_ENTER("isam_key_length");
+
+ length= info->s->rec_reflength;
+ for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
+ length+= keyseg->length;
+
+ DBUG_PRINT("exit",("length: %d",length));
+ DBUG_RETURN(length);
+} /* key_length */
+
+
+
+static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
+ char *buff)
+{
+ if (info->s->data_file_type != BLOCK_RECORD)
+ llstr(recpos, buff);
+ else
+ {
+ my_off_t page= ma_recordpos_to_page(recpos);
+ uint row= ma_recordpos_to_dir_entry(recpos);
+ char *end= longlong10_to_str(page, buff, 10);
+ *(end++)= ':';
+ longlong10_to_str(row, end, 10);
+ }
+}
+
+
+/*
+ Check that keys in records exist in index tree
+
+ SYNOPSIS
+ check_keys_in_record()
+ param Check paramenter
+ info Maria handler
+ extend Type of check (extended or normal)
+ start_recpos Position to row
+ record Record buffer
+
+ NOTES
+ This function also calculates record checksum & number of rows
+*/
+
+static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ my_off_t start_recpos, uchar *record)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *keyinfo;
+ char llbuff[22+4];
+ uint key;
+
+ param->tmp_record_checksum+= (ha_checksum) start_recpos;
+ param->records++;
+ if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0)
+ {
+ printf("%s\r", llstr(param->records, llbuff));
+ VOID(fflush(stdout));
+ }
+
+ /* Check if keys match the record */
+ for (key=0, keyinfo= share->keyinfo; key < share->base.keys;
+ key++,keyinfo++)
+ {
+ if (maria_is_key_active(share->state.key_map, key))
+ {
+ if(!(keyinfo->flag & HA_FULLTEXT))
+ {
+ uint key_length= _ma_make_key(info,key,info->lastkey,record,
+ start_recpos);
+ if (extend)
+ {
+ /* We don't need to lock the key tree here as we don't allow
+ concurrent threads when running maria_chk
+ */
+ int search_result=
+#ifdef HAVE_RTREE_KEYS
+ (keyinfo->flag & HA_SPATIAL) ?
+ maria_rtree_find_first(info, key, info->lastkey, key_length,
+ MBR_EQUAL | MBR_DATA) :
+#endif
+ _ma_search(info,keyinfo,info->lastkey,key_length,
+ SEARCH_SAME, share->state.key_root[key]);
+ if (search_result)
+ {
+ record_pos_to_txt(info, start_recpos, llbuff);
+ _ma_check_print_error(param,
+ "Record at: %14s "
+ "Can't find key for index: %2d",
+ llbuff, key+1);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ return -1;
+ }
+ }
+ else
+ param->tmp_key_crc[key]+=
+ maria_byte_checksum((uchar*) info->lastkey, key_length);
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Functions to loop through all rows and check if they are ok
+
+ NOTES
+ One function for each record format
+
+ RESULT
+ 0 ok
+ -1 Interrupted by user
+ 1 Error
+*/
+
+static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ uchar *record)
+{
+ MARIA_SHARE *share= info->s;
+ my_off_t start_recpos, pos;
+ char llbuff[22];
+
+ pos= 0;
+ while (pos < info->state->data_file_length)
+ {
+ if (*_ma_killed_ptr(param))
+ return -1;
+ if (my_b_read(&param->read_cache,(uchar*) record,
+ share->base.pack_reclength))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s",
+ my_errno, llstr(pos, llbuff));
+ return 1;
+ }
+ start_recpos= pos;
+ pos+= share->base.pack_reclength;
+ param->splits++;
+ if (*record == '\0')
+ {
+ param->del_blocks++;
+ param->del_length+= share->base.pack_reclength;
+ continue; /* Record removed */
+ }
+ param->glob_crc+= _ma_static_checksum(info,record);
+ param->used+= share->base.pack_reclength;
+ if (check_keys_in_record(param, info, extend, start_recpos, record))
+ return 1;
+ }
+ return 0;
+}
+
+
+static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ uchar *record)
+{
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share= info->s;
+ my_off_t start_recpos, start_block, pos;
+ uchar *to;
+ ulong left_length;
+ uint b_type;
+ char llbuff[22],llbuff2[22],llbuff3[22];
+ DBUG_ENTER("check_dynamic_record");
+
+ LINT_INIT(left_length);
+ LINT_INIT(start_recpos);
+ LINT_INIT(to);
+
+ pos= 0;
+ while (pos < info->state->data_file_length)
+ {
+ my_bool got_error= 0;
+ int flag;
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(-1);
+
+ flag= block_info.second_read=0;
+ block_info.next_filepos=pos;
+ do
+ {
+ if (_ma_read_cache(&param->read_cache,(uchar*) block_info.header,
+ (start_block=block_info.next_filepos),
+ sizeof(block_info.header),
+ (flag ? 0 : READING_NEXT) | READING_HEADER))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at "
+ "position: %s",
+ my_errno, llstr(start_block, llbuff));
+ DBUG_RETURN(1);
+ }
+
+ if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
+ {
+ _ma_check_print_error(param,"Wrong aligned block at %s",
+ llstr(start_block,llbuff));
+ DBUG_RETURN(1);
+ }
+ b_type= _ma_get_block_info(&block_info,-1,start_block);
+ if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & BLOCK_SYNC_ERROR)
+ {
+ if (flag)
+ {
+ _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
+ (int) block_info.header[0],
+ llstr(start_block,llbuff));
+ DBUG_RETURN(1);
+ }
+ pos=block_info.filepos+block_info.block_len;
+ goto next;
+ }
+ if (b_type & BLOCK_DELETED)
+ {
+ if (block_info.block_len < share->base.min_block_length)
+ {
+ _ma_check_print_error(param,
+ "Deleted block with impossible length %lu at %s",
+ block_info.block_len,llstr(pos,llbuff));
+ DBUG_RETURN(1);
+ }
+ if ((block_info.next_filepos != HA_OFFSET_ERROR &&
+ block_info.next_filepos >= info->state->data_file_length) ||
+ (block_info.prev_filepos != HA_OFFSET_ERROR &&
+ block_info.prev_filepos >= info->state->data_file_length))
+ {
+ _ma_check_print_error(param,"Delete link points outside datafile at %s",
+ llstr(pos,llbuff));
+ DBUG_RETURN(1);
+ }
+ param->del_blocks++;
+ param->del_length+= block_info.block_len;
+ param->splits++;
+ pos= block_info.filepos+block_info.block_len;
+ goto next;
+ }
+ _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
+ block_info.header[0],block_info.header[1],
+ block_info.header[2],
+ llstr(start_block,llbuff));
+ DBUG_RETURN(1);
+ }
+ if (info->state->data_file_length < block_info.filepos+
+ block_info.block_len)
+ {
+ _ma_check_print_error(param,
+ "Recordlink that points outside datafile at %s",
+ llstr(pos,llbuff));
+ got_error=1;
+ break;
+ }
+ param->splits++;
+ if (!flag++) /* First block */
+ {
+ start_recpos=pos;
+ pos=block_info.filepos+block_info.block_len;
+ if (block_info.rec_len > (uint) share->base.max_pack_length)
+ {
+ _ma_check_print_error(param,"Found too long record (%lu) at %s",
+ (ulong) block_info.rec_len,
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ break;
+ }
+ if (share->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ share->base.extra_rec_buff_size))
+
+ {
+ _ma_check_print_error(param,
+ "Not enough memory (%lu) for blob at %s",
+ (ulong) block_info.rec_len,
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ break;
+ }
+ }
+ to= info->rec_buff;
+ left_length= block_info.rec_len;
+ }
+ if (left_length < block_info.data_len)
+ {
+ _ma_check_print_error(param,"Found too long record (%lu) at %s",
+ (ulong) block_info.data_len,
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ break;
+ }
+ if (_ma_read_cache(&param->read_cache,(uchar*) to,block_info.filepos,
+ (uint) block_info.data_len,
+ flag == 1 ? READING_NEXT : 0))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s", my_errno, llstr(block_info.filepos, llbuff));
+
+ DBUG_RETURN(1);
+ }
+ to+=block_info.data_len;
+ param->link_used+= block_info.filepos-start_block;
+ param->used+= block_info.filepos - start_block + block_info.data_len;
+ param->empty+= block_info.block_len-block_info.data_len;
+ left_length-= block_info.data_len;
+ if (left_length)
+ {
+ if (b_type & BLOCK_LAST)
+ {
+ _ma_check_print_error(param,
+ "Wrong record length %s of %s at %s",
+ llstr(block_info.rec_len-left_length,llbuff),
+ llstr(block_info.rec_len, llbuff2),
+ llstr(start_recpos,llbuff3));
+ got_error=1;
+ break;
+ }
+ if (info->state->data_file_length < block_info.next_filepos)
+ {
+ _ma_check_print_error(param,
+ "Found next-recordlink that points outside datafile at %s",
+ llstr(block_info.filepos,llbuff));
+ got_error=1;
+ break;
+ }
+ }
+ } while (left_length);
+
+ if (! got_error)
+ {
+ if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
+ MY_FILE_ERROR)
+ {
+ _ma_check_print_error(param,"Found wrong record at %s",
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ }
+ else
+ {
+ ha_checksum checksum= 0;
+ if (share->calc_checksum)
+ checksum= (*share->calc_checksum)(info, record);
+
+ if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
+ {
+ if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
+ test(share->calc_checksum), checksum))
+ {
+ _ma_check_print_error(param,"Found wrong packed record at %s",
+ llstr(start_recpos,llbuff));
+ got_error= 1;
+ }
+ }
+ param->glob_crc+= checksum;
+ }
+
+ if (! got_error)
+ {
+ if (check_keys_in_record(param, info, extend, start_recpos, record))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ }
+ else if (!flag)
+ pos= block_info.filepos+block_info.block_len;
+next:;
+ }
+ DBUG_RETURN(0);
+}
+
+
+static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ uchar *record)
+{
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share= info->s;
+ my_off_t start_recpos, pos;
+ char llbuff[22];
+ bool got_error= 0;
+ DBUG_ENTER("check_compressed_record");
+
+ pos= share->pack.header_length; /* Skip header */
+ while (pos < info->state->data_file_length)
+ {
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(-1);
+
+ if (_ma_read_cache(&param->read_cache,(uchar*) block_info.header, pos,
+ share->pack.ref_length, READING_NEXT))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s",
+ my_errno, llstr(pos, llbuff));
+ DBUG_RETURN(1);
+ }
+
+ start_recpos= pos;
+ param->splits++;
+ VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, &info->rec_buff_size, -1,
+ start_recpos));
+ pos=block_info.filepos+block_info.rec_len;
+ if (block_info.rec_len < (uint) share->min_pack_length ||
+ block_info.rec_len > (uint) share->max_pack_length)
+ {
+ _ma_check_print_error(param,
+ "Found block with wrong recordlength: %lu at %s",
+ block_info.rec_len, llstr(start_recpos,llbuff));
+ got_error=1;
+ goto end;
+ }
+ if (_ma_read_cache(&param->read_cache,(uchar*) info->rec_buff,
+ block_info.filepos, block_info.rec_len, READING_NEXT))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s",
+ my_errno, llstr(block_info.filepos, llbuff));
+ DBUG_RETURN(1);
+ }
+ if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
+ info->rec_buff, block_info.rec_len))
+ {
+ _ma_check_print_error(param,"Found wrong record at %s",
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ goto end;
+ }
+ param->glob_crc+= (*share->calc_checksum)(info,record);
+ param->link_used+= (block_info.filepos - start_recpos);
+ param->used+= (pos-start_recpos);
+
+end:
+ if (! got_error)
+ {
+ if (check_keys_in_record(param, info, extend, start_recpos, record))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ got_error= 0; /* Reset for next loop */
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Check if layout on head or tail page is ok
+
+ NOTES
+ This is for rows-in-block format.
+*/
+
+static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
+ my_off_t page_pos, uchar *page,
+ uint row_count, uint head_empty,
+ uint *real_rows_found)
+{
+ uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
+ uint free_entries, prev_free_entry;
+ uchar *dir_entry;
+ char llbuff[22];
+ my_bool error_in_free_list= 0;
+ DBUG_ENTER("check_page_layout");
+
+ block_size= info->s->block_size;
+ empty= 0;
+ last_row_end= PAGE_HEADER_SIZE;
+ *real_rows_found= 0;
+
+ /* Check free directory list */
+ free_entry= (uint) page[DIR_FREE_OFFSET];
+ free_entries= 0;
+ prev_free_entry= END_OF_DIR_FREE_LIST;
+ while (free_entry != END_OF_DIR_FREE_LIST)
+ {
+ uchar *dir;
+ if (free_entry > row_count)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Directory free entry points outside "
+ "directory",
+ llstr(page_pos, llbuff));
+ error_in_free_list= 1;
+ break;
+ }
+ dir= dir_entry_pos(page, block_size, free_entry);
+ if (uint2korr(dir) != 0)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Directory free entry points to "
+ "not deleted entry",
+ llstr(page_pos, llbuff));
+ error_in_free_list= 1;
+ break;
+ }
+ if (dir[2] != prev_free_entry)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Directory free list back pointer "
+ "points to wrong entry",
+ llstr(page_pos, llbuff));
+ error_in_free_list= 1;
+ break;
+ }
+ prev_free_entry= free_entry;
+ free_entry= dir[3];
+ free_entries++;
+ }
+
+ /* Check directry */
+ dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
+ first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
+ PAGE_SUFFIX_SIZE);
+ for (row= 0 ; row < row_count ; row++)
+ {
+ uint pos, length;
+ dir_entry-= DIR_ENTRY_SIZE;
+ pos= uint2korr(dir_entry);
+ if (!pos)
+ {
+ free_entries--;
+ if (row == row_count -1)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: First entry in directory is 0",
+ llstr(page_pos, llbuff));
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ continue; /* Deleted row */
+ }
+ (*real_rows_found)++;
+ length= uint2korr(dir_entry+2);
+ param->used+= length;
+ if (pos < last_row_end)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u overlapps with previous row",
+ llstr(page_pos, llbuff), row);
+ DBUG_RETURN(1);
+ }
+ empty+= (pos - last_row_end);
+ last_row_end= pos + length;
+ if (last_row_end > first_dir_entry)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u overlapps with directory",
+ llstr(page_pos, llbuff), row);
+ DBUG_RETURN(1);
+ }
+ }
+ empty+= (first_dir_entry - last_row_end);
+
+ if (empty != head_empty)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Wrong empty size. Stored: %5u Actual: %5u",
+ llstr(page_pos, llbuff), head_empty, empty);
+ param->err_count++;
+ }
+ if (free_entries != 0 && !error_in_free_list)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Directory free link don't include "
+ "all free entries",
+ llstr(page_pos, llbuff));
+ param->err_count++;
+ }
+ DBUG_RETURN(param->err_count &&
+ (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
+}
+
+
+/*
+ Check all rows on head page
+
+ NOTES
+ This is for rows-in-block format.
+
+ Before this, we have already called check_page_layout(), so
+ we know the block is logicaly correct (even if the rows may not be that)
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+
+static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
+ int extend, my_off_t page_pos, uchar *page_buff,
+ uint row_count)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *dir_entry;
+ uint row;
+ char llbuff[22], llbuff2[22];
+ DBUG_ENTER("check_head_page");
+
+ dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
+ for (row= 0 ; row < row_count ; row++)
+ {
+ uint pos, length, flag;
+ dir_entry-= DIR_ENTRY_SIZE;
+ pos= uint2korr(dir_entry);
+ if (!pos)
+ continue;
+ length= uint2korr(dir_entry+2);
+ if (length < share->base.min_block_length)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u is too short (%d bytes)",
+ llstr(page_pos, llbuff), row, length);
+ DBUG_RETURN(1);
+ }
+ flag= (uint) (uchar) page_buff[pos];
+ if (flag & ~(ROW_FLAG_ALL))
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u has wrong flag: %d",
+ llstr(page_pos, llbuff), row, flag);
+
+ DBUG_PRINT("info", ("rowid: %s page: %lu row: %u",
+ llstr(ma_recordpos(page_pos/share->block_size, row),
+ llbuff),
+ (ulong) (page_pos / share->block_size), row));
+ if (_ma_read_block_record2(info, record, page_buff+pos,
+ page_buff+pos+length))
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3d is crashed",
+ llstr(page_pos, llbuff), row);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ continue;
+ }
+ if (share->calc_checksum)
+ {
+ ha_checksum checksum= (*share->calc_checksum)(info, record);
+ if (info->cur_row.checksum != (checksum & 255))
+ _ma_check_print_error(param, "Page %9s: Row %3d has wrong checksum",
+ llstr(page_pos, llbuff), row);
+ param->glob_crc+= checksum;
+ }
+ if (info->cur_row.extents_count)
+ {
+ uchar *extents= info->cur_row.extents;
+ uint i;
+ /* Check that bitmap has the right marker for the found extents */
+ for (i= 0 ; i < info->cur_row.extents_count ; i++)
+ {
+ uint page, page_count, page_type;
+ page= uint5korr(extents);
+ page_count= uint2korr(extents+5);
+ extents+= ROW_EXTENT_SIZE;
+ page_type= BLOB_PAGE;
+ if (page_count & TAIL_BIT)
+ {
+ page_count= 1;
+ page_type= TAIL_PAGE;
+ }
+ for ( ; page_count--; page++)
+ {
+ uint bitmap_pattern;
+ if (_ma_check_if_right_bitmap_type(info, page_type, page,
+ &bitmap_pattern))
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row: %3d has an extent with wrong information in bitmap: Page %9s Page_type: %d Bitmap: %d",
+ llstr(page_pos, llbuff), row,
+ llstr(page * share->bitmap.block_size,
+ llbuff2),
+ page_type,
+ bitmap_pattern);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ }
+ param->full_page_count+= info->cur_row.full_page_count;
+ param->tail_count+= info->cur_row.tail_count;
+ if (check_keys_in_record(param, info, extend,
+ ma_recordpos(page_pos/share->block_size, row),
+ record))
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Check if rows-in-block data file is consistent
+*/
+
+static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ uchar *record)
+{
+ MARIA_SHARE *share= info->s;
+ my_off_t pos;
+ uchar *page_buff, *bitmap_buff, *data;
+ char llbuff[22], llbuff2[22];
+ uint block_size= share->block_size;
+ ha_rows full_page_count, tail_count;
+ my_bool full_dir;
+ uint offset_page, offset;
+
+ LINT_INIT(full_dir);
+
+ if (_ma_scan_init_block_record(info))
+ {
+ _ma_check_print_error(param, "got error %d when initializing scan",
+ my_errno);
+ return 1;
+ }
+ bitmap_buff= info->scan.bitmap_buff;
+ page_buff= info->scan.page_buff;
+ full_page_count= tail_count= 0;
+ param->full_page_count= param->tail_count= 0;
+ param->used= param->link_used= 0;
+
+ for (pos= 0;
+ pos < info->state->data_file_length;
+ pos+= block_size)
+ {
+ uint row_count, real_row_count, empty_space, page_type, bitmap_pattern;
+ LINT_INIT(row_count);
+ LINT_INIT(empty_space);
+
+ if (*_ma_killed_ptr(param))
+ {
+ _ma_scan_end_block_record(info);
+ return -1;
+ }
+ if (((pos / block_size) % share->bitmap.pages_covered) == 0)
+ {
+ /* Bitmap page */
+ if (pagecache_read(share->pagecache,
+ &info->s->bitmap.file,
+ (pos / block_size), 1,
+ bitmap_buff,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Got error: %d when reading datafile",
+ llstr(pos, llbuff), my_errno);
+ goto err;
+ }
+ param->used+= block_size;
+ param->link_used+= block_size;
+ continue;
+ }
+ /* Skip pages marked as empty in bitmap */
+ offset_page= (((pos / block_size) % share->bitmap.pages_covered) -1) * 3;
+ offset= offset_page & 7;
+ data= bitmap_buff + offset_page / 8;
+ bitmap_pattern= uint2korr(data);
+ param->splits++;
+ if (!((bitmap_pattern >> offset) & 7))
+ {
+ param->empty+= block_size;
+ param->del_blocks++;
+ continue;
+ }
+
+ if (pagecache_read(share->pagecache,
+ &info->dfile,
+ (pos / block_size), 1,
+ page_buff,
+ share->page_type,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Got error: %d when reading datafile",
+ llstr(pos, llbuff), my_errno);
+ goto err;
+ }
+ page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
+ if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Found wrong page type %d",
+ llstr(pos, llbuff), page_type);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ goto err;
+ continue;
+ }
+ switch ((enum en_page_type) page_type) {
+ case UNALLOCATED_PAGE:
+ case MAX_PAGE_TYPE:
+ default:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
+ case HEAD_PAGE:
+ row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET];
+ empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
+ param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ full_dir= row_count == MAX_ROWS_PER_PAGE;
+ break;
+ case TAIL_PAGE:
+ row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET];
+ empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
+ param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ full_dir= row_count == MAX_ROWS_PER_PAGE;
+ break;
+ case BLOB_PAGE:
+ full_page_count++;
+ full_dir= 0;
+ empty_space= block_size; /* for error reporting */
+ param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE);
+ param->used+= block_size;
+ break;
+ }
+ if (_ma_check_bitmap_data(info, page_type, pos / block_size,
+ full_dir ? 0 : empty_space,
+ &bitmap_pattern))
+ {
+ if (bitmap_pattern == ~(uint) 0)
+ _ma_check_print_error(param,
+ "Page: %9s: Wrong bitmap for data on page",
+ llstr(pos, llbuff));
+ else
+ _ma_check_print_error(param,
+ "Page %9s: Wrong data in bitmap. Page_type: %d empty_space: %u Bitmap-bits: %d",
+ llstr(pos, llbuff), page_type, empty_space,
+ bitmap_pattern);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ goto err;
+ }
+ if ((enum en_page_type) page_type == BLOB_PAGE)
+ continue;
+ param->empty+= empty_space;
+ if (check_page_layout(param, info, pos, page_buff, row_count,
+ empty_space, &real_row_count))
+ goto err;
+ if ((enum en_page_type) page_type == TAIL_PAGE)
+ {
+ tail_count+= real_row_count;
+ continue;
+ }
+ if (check_head_page(param, info, record, extend, pos, page_buff,
+ row_count))
+ goto err;
+ }
+
+ /* Verify that rest of bitmap is zero */
+
+ if ((pos / block_size) % share->bitmap.pages_covered)
+ {
+ /* Not at end of bitmap */
+ uint bitmap_pattern;
+ offset_page= (((pos / block_size) % share->bitmap.pages_covered) -1) * 3;
+ offset= offset_page & 7;
+ data= bitmap_buff + offset_page / 8;
+ bitmap_pattern= uint2korr(data);
+ if (((bitmap_pattern >> offset)) ||
+ (data + 2 < bitmap_buff + share->bitmap.total_size &&
+ _ma_check_if_zero(data+2, bitmap_buff + share->bitmap.total_size -
+ data - 2)))
+ {
+ ulonglong bitmap_page;
+ bitmap_page= pos / block_size / share->bitmap.pages_covered;
+ bitmap_page*= share->bitmap.pages_covered;
+
+ _ma_check_print_error(param, "Bitmap at %s has pages reserved outside of data file length",
+ llstr(bitmap_page, llbuff));
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
+ bitmap_page););
+ }
+ }
+
+ _ma_scan_end_block_record(info);
+
+ if (full_page_count != param->full_page_count)
+ _ma_check_print_error(param, "Full page count read through records was %s but we found %s pages while scanning table",
+ llstr(param->full_page_count, llbuff),
+ llstr(full_page_count, llbuff2));
+ if (tail_count != param->tail_count)
+ _ma_check_print_error(param, "Tail count read through records was %s but we found %s tails while scanning table",
+ llstr(param->tail_count, llbuff),
+ llstr(tail_count, llbuff2));
+
+ /* Update splits to avoid warning */
+ share->state.split= param->splits;
+ info->state->del= param->del_blocks;
+ return param->error_printed != 0;
+
+err:
+ _ma_scan_end_block_record(info);
+ return 1;
+}
+
+
+/* Check that record-link is ok */
+
+int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend)
+{
+ MARIA_SHARE *share= info->s;
+ int error;
+ uchar *record;
+ char llbuff[22],llbuff2[22],llbuff3[22];
+ DBUG_ENTER("maria_chk_data_link");
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (extend)
+ puts("- check records and index references");
+ else
+ puts("- check record links");
+ }
+
+ if (!(record= (uchar*) my_malloc(share->base.pack_reclength,MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for record");
+ DBUG_RETURN(-1);
+ }
+ param->records= param->del_blocks= 0;
+ param->used= param->link_used= param->splits= param->del_length= 0;
+ param->tmp_record_checksum= param->glob_crc= 0;
+ param->err_count= 0;
+
+ error= 0;
+ param->empty= share->pack.header_length;
+
+ bzero((char*) param->tmp_key_crc,
+ share->base.keys * sizeof(param->tmp_key_crc[0]));
+
+ switch (share->data_file_type) {
+ case BLOCK_RECORD:
+ error= check_block_record(param, info, extend, record);
+ break;
+ case STATIC_RECORD:
+ error= check_static_record(param, info, extend, record);
+ break;
+ case DYNAMIC_RECORD:
+ error= check_dynamic_record(param, info, extend, record);
+ break;
+ case COMPRESSED_RECORD:
+ error= check_compressed_record(param, info, extend, record);
+ break;
+ } /* switch */
+
+ if (error)
+ goto err;
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+ if (param->records != info->state->records)
+ {
+ _ma_check_print_error(param,
+ "Record-count is not ok; found %-10s Should be: %s",
+ llstr(param->records,llbuff),
+ llstr(info->state->records,llbuff2));
+ error=1;
+ }
+ else if (param->record_checksum &&
+ param->record_checksum != param->tmp_record_checksum)
+ {
+ _ma_check_print_error(param,
+ "Key pointers and record positions doesn't match");
+ error=1;
+ }
+ else if (param->glob_crc != info->state->checksum &&
+ (share->options &
+ (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
+ {
+ _ma_check_print_warning(param,
+ "Record checksum is not the same as checksum stored in the index file");
+ error=1;
+ }
+ else if (!extend)
+ {
+ uint key;
+ for (key=0 ; key < share->base.keys; key++)
+ {
+ if (param->tmp_key_crc[key] != param->key_crc[key] &&
+ !(share->keyinfo[key].flag & (HA_FULLTEXT | HA_SPATIAL)))
+ {
+ _ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records",
+ key+1);
+ error=1;
+ }
+ }
+ }
+
+ if (param->del_length != info->state->empty)
+ {
+ _ma_check_print_warning(param,
+ "Found %s deleted space. Should be %s",
+ llstr(param->del_length,llbuff2),
+ llstr(info->state->empty,llbuff));
+ }
+ if (param->used + param->empty + param->del_length !=
+ info->state->data_file_length)
+ {
+ _ma_check_print_warning(param,
+ "Found %s record data and %s unused data and %s deleted data",
+ llstr(param->used, llbuff),
+ llstr(param->empty,llbuff2),
+ llstr(param->del_length,llbuff3));
+ _ma_check_print_warning(param,
+ "Total %s Should be: %s",
+ llstr((param->used+param->empty+param->del_length),
+ llbuff),
+ llstr(info->state->data_file_length,llbuff2));
+ }
+ if (param->del_blocks != info->state->del)
+ {
+ _ma_check_print_warning(param,
+ "Found %10s deleted blocks Should be: %s",
+ llstr(param->del_blocks,llbuff),
+ llstr(info->state->del,llbuff2));
+ }
+ if (param->splits != share->state.split)
+ {
+ _ma_check_print_warning(param,
+ "Found %10s parts Should be: %s parts",
+ llstr(param->splits, llbuff),
+ llstr(share->state.split,llbuff2));
+ }
+ if (param->testflag & T_INFO)
+ {
+ if (param->warning_printed || param->error_printed)
+ puts("");
+ if (param->used != 0 && ! param->error_printed)
+ {
+ if (param->records)
+ {
+ printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n",
+ llstr(param->records,llbuff),
+ (long)((param->used - param->link_used)/param->records),
+ (share->base.blobs ? 0.0 :
+ (ulonglong2double((ulonglong) share->base.reclength *
+ param->records)-
+ my_off_t2double(param->used))/
+ ulonglong2double((ulonglong) share->base.reclength *
+ param->records)*100.0));
+ printf("Recordspace used:%9.0f%% Empty space:%12d%% Blocks/Record: %6.2f\n",
+ (ulonglong2double(param->used - param->link_used)/
+ ulonglong2double(param->used-param->link_used+param->empty)*100.0),
+ (!param->records ? 100 :
+ (int) (ulonglong2double(param->del_length+param->empty)/
+ my_off_t2double(param->used)*100.0)),
+ ulonglong2double(param->splits - param->del_blocks) /
+ param->records);
+ }
+ else
+ printf("Records:%18s\n", "0");
+ }
+ printf("Record blocks:%12s Delete blocks:%10s\n",
+ llstr(param->splits - param->del_blocks, llbuff),
+ llstr(param->del_blocks, llbuff2));
+ printf("Record data: %12s Deleted data: %10s\n",
+ llstr(param->used - param->link_used,llbuff),
+ llstr(param->del_length, llbuff2));
+ printf("Lost space: %12s Linkdata: %10s\n",
+ llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
+ }
+ my_free((uchar*) record,MYF(0));
+ DBUG_RETURN (error);
+
+ err:
+ my_free((uchar*) record,MYF(0));
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(1);
+} /* maria_chk_data_link */
+
+
+/**
+ @brief Initialize variables for repair
+*/
+
+static int initialize_variables_for_repair(HA_CHECK *param,
+ MARIA_SORT_INFO *sort_info,
+ MARIA_SORT_PARAM *sort_param,
+ MARIA_HA *info,
+ uint rep_quick)
+{
+ MARIA_SHARE *share= info->s;
+
+ bzero((char*) sort_info, sizeof(*sort_info));
+ bzero((char*) sort_param, sizeof(*sort_param));
+
+ param->testflag|= T_REP; /* for easy checking */
+ if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ param->testflag|= T_CALC_CHECKSUM;
+ param->glob_crc= 0;
+ if (rep_quick)
+ param->testflag|= T_QUICK;
+ else
+ param->testflag&= ~T_QUICK;
+ param->org_key_map= share->state.key_map;
+
+ sort_param->sort_info= sort_info;
+ sort_param->fix_datafile= (my_bool) (! rep_quick);
+ sort_param->calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
+ sort_info->info= sort_info->new_info= info;
+ sort_info->param= param;
+ set_data_file_type(sort_info, info->s);
+ sort_info->org_data_file_type= share->data_file_type;
+
+ bzero(&info->rec_cache, sizeof(info->rec_cache));
+ info->rec_cache.file= info->dfile.file;
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ /* calculate max_records */
+ /*
+ The physical size of the data file is sometimes used during repair (see
+ sort_info.filelength further below); We need to flush to have it exact.
+ We flush the state because our maria_open(HA_OPEN_COPY) will want to read
+ it from disk. Index file will be recreated.
+ */
+ if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_FORCE_WRITE,
+ (param->testflag & T_CREATE_MISSING_KEYS) ?
+ FLUSH_FORCE_WRITE : FLUSH_IGNORE_CHANGED) ||
+ (share->changed && _ma_state_info_write(share, 1|2|4)))
+ return(1);
+
+ sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
+ if ((param->testflag & T_CREATE_MISSING_KEYS) ||
+ sort_info->org_data_file_type == COMPRESSED_RECORD)
+ sort_info->max_records= info->state->records;
+ else
+ {
+ ulong rec_length;
+ rec_length= max(share->base.min_pack_length,
+ share->base.min_block_length);
+ sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
+ }
+ return 0;
+}
+
+
+/*
+ Recover old table by reading each record and writing all keys
+
+ NOTES
+ Save new datafile-name in temp_filename.
+ We overwrite the index file as we go (writekeys() for example), so if we
+ crash during this the table is unusable and user (or Recovery in the
+ future) must repeat the REPAIR/OPTIMIZE operation. We could use a
+ temporary index file in the future (drawback: more disk space).
+
+ IMPLEMENTATION (for hard repair with block format)
+ - Create new, unrelated MARIA_HA of the table
+ - Create new datafile and associate it with new handler
+ - Reset all statistic information in new handler
+ - Copy all data to new handler with normal write operations
+ - Move state of new handler to old handler
+ - Close new handler
+ - Close data file in old handler
+ - Rename old data file to new data file.
+ - Reopen data file in old handler
+*/
+
+int maria_repair(HA_CHECK *param, register MARIA_HA *info,
+ char *name, uint rep_quick)
+{
+ int error, got_error;
+ uint i;
+ ha_rows start_records,new_header_length;
+ my_off_t del;
+ File new_file;
+ MARIA_SHARE *share= info->s;
+ char llbuff[22],llbuff2[22];
+ MARIA_SORT_INFO sort_info;
+ MARIA_SORT_PARAM sort_param;
+ my_bool block_record, scan_inited= 0;
+ enum data_file_type org_data_file_type= share->data_file_type;
+ myf sync_dir= ((share->now_transactional && !share->temporary) ?
+ MY_SYNC_DIR : 0);
+ DBUG_ENTER("maria_repair");
+
+ got_error= 1;
+ new_file= -1;
+ start_records= info->state->records;
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- recovering (with keycache) MARIA-table '%s'\n",name);
+ printf("Data records: %s\n", llstr(start_records, llbuff));
+ }
+
+ if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
+ rep_quick))
+ goto err;
+
+ new_header_length= ((param->testflag & T_UNPACK) ? 0L :
+ share->pack.header_length);
+
+ if (!rep_quick)
+ {
+ /* Get real path for data file */
+ if ((new_file= my_create(fn_format(param->temp_filename,
+ share->data_file_name, "",
+ DATA_TMP_EXT, 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (new_header_length &&
+ maria_filecopy(param, new_file, info->dfile.file, 0L,
+ new_header_length, "datafile-header"))
+ goto err;
+ share->state.dellink= HA_OFFSET_ERROR;
+ info->rec_cache.file= new_file; /* For sort_delete_record */
+ if (share->data_file_type == BLOCK_RECORD ||
+ (param->testflag & T_UNPACK))
+ {
+ if (create_new_data_handle(&sort_param, new_file))
+ goto err;
+ sort_info.new_info->rec_cache.file= new_file;
+ }
+ }
+
+ block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;
+
+ if (org_data_file_type != BLOCK_RECORD)
+ {
+ /* We need a read buffer to read rows in big blocks */
+ if (init_io_cache(&param->read_cache, info->dfile.file,
+ (uint) param->read_buffer_length,
+ READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
+ goto err;
+ }
+ if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
+ {
+ /* When writing to not block records, we need a write buffer */
+ if (!rep_quick)
+ {
+ if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
+ (uint) param->write_buffer_length,
+ WRITE_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
+ goto err;
+ sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
+ }
+ }
+ else if (block_record)
+ {
+ scan_inited= 1;
+ if (maria_scan_init(sort_info.info))
+ goto err;
+ }
+
+ if (!(sort_param.record=(uchar*) my_malloc((uint) share->base.pack_reclength,
+ MYF(0))) ||
+ _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
+ share->base.default_rec_buff_size))
+ {
+ _ma_check_print_error(param, "Not enough memory for extra record");
+ goto err;
+ }
+
+ sort_param.read_cache=param->read_cache;
+ sort_param.pos=sort_param.max_pos=share->pack.header_length;
+ sort_param.filepos=new_header_length;
+ param->read_cache.end_of_file= sort_info.filelength;
+ sort_param.master=1;
+ sort_info.max_records= ~(ha_rows) 0;
+
+ del=info->state->del;
+ info->state->records=info->state->del=share->state.split=0;
+ info->state->empty=0;
+
+ /*
+ Clear all keys. Note that all key blocks allocated until now remain
+ "dead" parts of the key file. (Bug #4692)
+ */
+ for (i=0 ; i < share->base.keys ; i++)
+ share->state.key_root[i]= HA_OFFSET_ERROR;
+
+ /* Drop the delete chain. */
+ share->state.key_del= HA_OFFSET_ERROR;
+
+ /*
+ If requested, activate (enable) all keys in key_map. In this case,
+ all indexes will be (re-)built.
+ */
+ if (param->testflag & T_CREATE_MISSING_KEYS)
+ maria_set_all_keys_active(share->state.key_map, share->base.keys);
+
+ info->state->key_file_length=share->base.keystart;
+
+ maria_lock_memory(param); /* Everything is alloced */
+
+ /* Re-create all keys, which are set in key_map. */
+ while (!(error=sort_get_next_record(&sort_param)))
+ {
+ if (block_record && _ma_sort_write_record(&sort_param))
+ goto err;
+
+ if (writekeys(&sort_param))
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY)
+ goto err;
+ DBUG_DUMP("record",(uchar*) sort_param.record,share->base.pack_reclength);
+ _ma_check_print_info(param,
+ "Duplicate key %2d for record at %10s against new record at %10s",
+ info->errkey+1,
+ llstr(sort_param.start_recpos,llbuff),
+ llstr(info->dup_key_pos,llbuff2));
+ if (param->testflag & T_VERBOSE)
+ {
+ VOID(_ma_make_key(info,(uint) info->errkey,info->lastkey,
+ sort_param.record,0L));
+ _ma_print_key(stdout,share->keyinfo[info->errkey].seg,info->lastkey,
+ USE_WHOLE_KEY);
+ }
+ sort_info.dupp++;
+ if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
+ {
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ param->error_printed=1;
+ goto err;
+ }
+ /* purecov: begin tested */
+ if (block_record)
+ {
+ sort_info.new_info->state->records--;
+ if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
+ {
+ _ma_check_print_error(param,"Couldn't delete duplicate row");
+ goto err;
+ }
+ continue;
+ }
+ /* purecov: end */
+ }
+ if (!block_record)
+ {
+ if (_ma_sort_write_record(&sort_param))
+ goto err;
+ /* Filepos is pointer to where next row will be stored */
+ sort_param.current_filepos= sort_param.filepos;
+ }
+ }
+ if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) ||
+ flush_io_cache(&sort_info.new_info->rec_cache) ||
+ param->read_cache.error < 0)
+ goto err;
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+ if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0)))
+ {
+ _ma_check_print_warning(param,
+ "Can't change size of indexfile, error: %d",
+ my_errno);
+ goto err;
+ }
+
+ if (rep_quick && del+sort_info.dupp != info->state->del)
+ {
+ _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
+ _ma_check_print_error(param,"Run recovery again without -q");
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ goto err;
+ }
+
+ if (param->testflag & T_SAFE_REPAIR)
+ {
+ /* Don't repair if we loosed more than one row */
+ if (sort_info.new_info->state->records+1 < start_records)
+ {
+ info->state->records=start_records;
+ goto err;
+ }
+ }
+
+ VOID(end_io_cache(&sort_info.new_info->rec_cache));
+ info->opt_flag&= ~WRITE_CACHE_USED;
+ if (_ma_flush_table_files_after_repair(param, info))
+ goto err;
+
+ if (!rep_quick)
+ {
+ sort_info.new_info->state->data_file_length= sort_param.filepos;
+ if (sort_info.new_info != sort_info.info)
+ {
+ MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
+ if (maria_close(sort_info.new_info))
+ {
+ _ma_check_print_error(param, "Got error %d on close", my_errno);
+ goto err;
+ }
+ copy_data_file_state(&share->state, &save_state);
+ new_file= -1;
+ sort_info.new_info= info;
+ }
+ share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
+
+ /* Replace the actual file with the temporary file */
+ if (new_file >= 0)
+ my_close(new_file, MYF(MY_WME));
+ new_file= -1;
+ change_data_file_descriptor(info, -1);
+ if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
+ DATA_TMP_EXT,
+ (param->testflag & T_BACKUP_DATA ?
+ MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
+ sync_dir) ||
+ _ma_open_datafile(info, share, -1))
+ {
+ goto err;
+ }
+ }
+ else
+ {
+ info->state->data_file_length= sort_param.max_pos;
+ }
+ if (param->testflag & T_CALC_CHECKSUM)
+ info->state->checksum= param->glob_crc;
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (start_records != info->state->records)
+ printf("Data records: %s\n", llstr(info->state->records,llbuff));
+ if (sort_info.dupp)
+ _ma_check_print_warning(param,
+ "%s records have been removed",
+ llstr(sort_info.dupp,llbuff));
+ }
+
+ got_error= 0;
+ /* If invoked by external program that uses thr_lock */
+ if (&share->state.state != info->state)
+ memcpy( &share->state.state, info->state, sizeof(*info->state));
+
+err:
+ if (scan_inited)
+ maria_scan_end(sort_info.info);
+
+ VOID(end_io_cache(&param->read_cache));
+ VOID(end_io_cache(&sort_info.new_info->rec_cache));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ /* this below could fail, shouldn't we detect error? */
+ if (got_error)
+ {
+ if (! param->error_printed)
+ _ma_check_print_error(param,"%d for record at pos %s",my_errno,
+ llstr(sort_param.start_recpos,llbuff));
+ (void) _ma_flush_table_files_after_repair(param, info);
+ if (sort_info.new_info && sort_info.new_info != sort_info.info)
+ {
+ unuse_data_file_descriptor(sort_info.new_info);
+ maria_close(sort_info.new_info);
+ }
+ if (new_file >= 0)
+ {
+ VOID(my_close(new_file,MYF(0)));
+ VOID(my_delete(param->temp_filename, MYF(MY_WME)));
+ }
+ maria_mark_crashed_on_repair(info);
+ }
+ else if (sync_dir)
+ {
+ /*
+ Now that we have flushed and forced everything, we can bump
+ create_rename_lsn:
+ */
+ write_log_record_for_repair(param, info);
+ }
+ my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ if (!got_error && (param->testflag & T_UNPACK))
+ restore_data_file_type(share);
+ share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
+ STATE_NOT_ANALYZED);
+ share->state.changed&= ~STATE_NOT_OPTIMIZED_ROWS;
+ DBUG_RETURN(got_error);
+}
+
+
+/* Uppdate keyfile when doing repair */
+
+static int writekeys(MARIA_SORT_PARAM *sort_param)
+{
+ uint i;
+ uchar *key;
+ MARIA_HA *info= sort_param->sort_info->info;
+ MARIA_SHARE *share= info->s;
+ uchar *buff= sort_param->record;
+ my_off_t filepos= sort_param->current_filepos;
+ DBUG_ENTER("writekeys");
+
+ key= info->lastkey+share->base.max_key_length;
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ if (share->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_add(info,i, key,buff,filepos))
+ goto err;
+ }
+#ifdef HAVE_SPATIAL
+ else if (share->keyinfo[i].flag & HA_SPATIAL)
+ {
+ uint key_length= _ma_make_key(info,i,key,buff,filepos);
+ if (maria_rtree_insert(info, i, key, key_length))
+ goto err;
+ }
+#endif /*HAVE_SPATIAL*/
+ else
+ {
+ uint key_length= _ma_make_key(info,i,key,buff,filepos);
+ if (_ma_ck_write(info,i,key,key_length))
+ goto err;
+ }
+ }
+ }
+ DBUG_RETURN(0);
+
+ err:
+ if (my_errno == HA_ERR_FOUND_DUPP_KEY)
+ {
+ info->errkey=(int) i; /* This key was found */
+ while ( i-- > 0 )
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ if (share->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if (_ma_ft_del(info,i,key,buff,filepos))
+ break;
+ }
+ else
+ {
+ uint key_length= _ma_make_key(info,i,key,buff,filepos);
+ if (_ma_ck_delete(info,i,key,key_length))
+ break;
+ }
+ }
+ }
+ }
+ /* Remove checksum that was added to glob_crc in sort_get_next_record */
+ if (sort_param->calc_checksum)
+ sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
+ DBUG_PRINT("error",("errno: %d",my_errno));
+ DBUG_RETURN(-1);
+} /* writekeys */
+
+
+ /* Change all key-pointers that points to a records */
+
+int maria_movepoint(register MARIA_HA *info, uchar *record,
+ MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
+ uint prot_key)
+{
+ register uint i;
+ uchar *key;
+ uint key_length;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_movepoint");
+
+ key= info->lastkey+share->base.max_key_length;
+ for (i=0 ; i < share->base.keys; i++)
+ {
+ if (i != prot_key && maria_is_key_active(share->state.key_map, i))
+ {
+ key_length= _ma_make_key(info,i,key,record,oldpos);
+ if (share->keyinfo[i].flag & HA_NOSAME)
+ { /* Change pointer direct */
+ uint nod_flag;
+ MARIA_KEYDEF *keyinfo;
+ keyinfo=share->keyinfo+i;
+ if (_ma_search(info,keyinfo,key,USE_WHOLE_KEY,
+ (uint) (SEARCH_SAME | SEARCH_SAVE_BUFF),
+ share->state.key_root[i]))
+ DBUG_RETURN(-1);
+ nod_flag= _ma_test_if_nod(share, info->buff);
+ _ma_dpointer(info,info->int_keypos-nod_flag-
+ share->rec_reflength,newpos);
+ if (_ma_write_keypage(info, keyinfo, info->last_keypage,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
+ info->buff))
+ DBUG_RETURN(-1);
+ }
+ else
+ { /* Change old key to new */
+ if (_ma_ck_delete(info,i,key,key_length))
+ DBUG_RETURN(-1);
+ key_length= _ma_make_key(info,i,key,record,newpos);
+ if (_ma_ck_write(info,i,key,key_length))
+ DBUG_RETURN(-1);
+ }
+ }
+ }
+ DBUG_RETURN(0);
+} /* maria_movepoint */
+
+
+ /* Tell system that we want all memory for our cache */
+
+void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
+{
+#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */
+ if (param->opt_maria_lock_memory)
+ {
+ int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */
+ if (geteuid() == 0 && success != 0)
+ _ma_check_print_warning(param,
+ "Failed to lock memory. errno %d",my_errno);
+ }
+#endif
+} /* maria_lock_memory */
+
+
+/**
+ Flush all changed blocks to disk so that we can say "at the end of repair,
+ the table is fully ok on disk".
+
+ It is a requirement for transactional tables.
+ We release blocks as it's unlikely that they would all be needed soon.
+
+ @param param description of the repair operation
+ @param info table
+*/
+
+int _ma_flush_table_files_after_repair(HA_CHECK *param, MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+ if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_RELEASE, FLUSH_RELEASE) ||
+ _ma_state_info_write(share, 1|4) ||
+ (share->base.born_transactional && _ma_sync_table_files(info)))
+ {
+ _ma_check_print_error(param,"%d when trying to write bufferts",my_errno);
+ return 1;
+ }
+ return 0;
+} /* _ma_flush_table_files_after_repair */
+
+
+ /* Sort index for more efficent reads */
+
+int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
+{
+ reg2 uint key;
+ reg1 MARIA_KEYDEF *keyinfo;
+ File new_file;
+ my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
+ uint r_locks,w_locks;
+ int old_lock;
+ MARIA_SHARE *share= info->s;
+ MARIA_STATE_INFO old_state;
+ myf sync_dir= (share->now_transactional && !share->temporary) ?
+ MY_SYNC_DIR : 0;
+ DBUG_ENTER("maria_sort_index");
+
+ /* cannot sort index files with R-tree indexes */
+ for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
+ key++,keyinfo++)
+ if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
+ DBUG_RETURN(0);
+
+ if (!(param->testflag & T_SILENT))
+ printf("- Sorting index for MARIA-table '%s'\n",name);
+
+ /* Get real path for index file */
+ fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
+ if ((new_file=my_create(fn_format(param->temp_filename,param->temp_filename,
+ "", INDEX_TMP_EXT,2+4),
+ 0,param->tmpfile_createflag,MYF(0))) <= 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ DBUG_RETURN(-1);
+ }
+ if (maria_filecopy(param, new_file, share->kfile.file, 0L,
+ (ulong) share->base.keystart, "headerblock"))
+ goto err;
+
+ param->new_file_pos=share->base.keystart;
+ for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
+ key++,keyinfo++)
+ {
+ if (! maria_is_key_active(share->state.key_map, key))
+ continue;
+
+ if (share->state.key_root[key] != HA_OFFSET_ERROR)
+ {
+ index_pos[key]=param->new_file_pos; /* Write first block here */
+ if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
+ new_file))
+ goto err;
+ }
+ else
+ index_pos[key]= HA_OFFSET_ERROR; /* No blocks */
+ }
+
+ /* Flush key cache for this file if we are calling this outside maria_chk */
+ flush_pagecache_blocks(share->pagecache, &share->kfile,
+ FLUSH_IGNORE_CHANGED);
+
+ share->state.version=(ulong) time((time_t*) 0);
+ old_state= share->state; /* save state if not stored */
+ r_locks= share->r_locks;
+ w_locks= share->w_locks;
+ old_lock= info->lock_type;
+
+ /* Put same locks as old file */
+ share->r_locks= share->w_locks= share->tot_locks= 0;
+ (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
+ pthread_mutex_lock(&share->intern_lock);
+ VOID(my_close(share->kfile.file, MYF(MY_WME)));
+ share->kfile.file = -1;
+ pthread_mutex_unlock(&share->intern_lock);
+ VOID(my_close(new_file,MYF(MY_WME)));
+ if (maria_change_to_newfile(share->index_file_name, MARIA_NAME_IEXT,
+ INDEX_TMP_EXT, sync_dir) ||
+ _ma_open_keyfile(share))
+ goto err2;
+ info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */
+ _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */
+ info->lock_type= old_lock;
+ share->r_locks= r_locks;
+ share->w_locks= w_locks;
+ share->tot_locks= r_locks+w_locks;
+ share->state= old_state; /* Restore old state */
+
+ info->state->key_file_length=param->new_file_pos;
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ for (key=0 ; key < share->base.keys ; key++)
+ share->state.key_root[key]=index_pos[key];
+ share->state.key_del= HA_OFFSET_ERROR;
+
+ share->state.changed&= ~STATE_NOT_SORTED_PAGES;
+ DBUG_RETURN(0);
+
+err:
+ VOID(my_close(new_file,MYF(MY_WME)));
+err2:
+ VOID(my_delete(param->temp_filename,MYF(MY_WME)));
+ DBUG_RETURN(-1);
+} /* maria_sort_index */
+
+
+/**
+ @brief put CRC on the page
+
+ @param buff reference on the page buffer.
+ @param pos position of the page in the file.
+ @param length length of the page
+*/
+
+static void put_crc(char *buff, my_off_t pos, MARIA_SHARE *share)
+{
+ maria_page_crc_set_index(buff, pos / share->block_size, (uchar*) share);
+}
+
+
+ /* Sort records recursive using one index */
+
+static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t pagepos, File new_file)
+{
+ uint length,nod_flag,used_length, key_length;
+ uchar *buff,*keypos,*endpos;
+ uchar key[HA_MAX_POSSIBLE_KEY_BUFF];
+ my_off_t new_page_pos,next_page;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("sort_one_index");
+
+ /* cannot walk over R-tree indices */
+ DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
+ new_page_pos=param->new_file_pos;
+ param->new_file_pos+=keyinfo->block_length;
+
+ if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not enough memory for key block");
+ DBUG_RETURN(-1);
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, pagepos,PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, buff, 0, 0))
+ {
+ report_keypage_fault(param, pagepos);
+ goto err;
+ }
+ if ((nod_flag=_ma_test_if_nod(share, buff)) || keyinfo->flag & HA_FULLTEXT)
+ {
+ used_length= _ma_get_page_used(share, buff);
+ keypos=buff + share->keypage_header + nod_flag;
+ endpos=buff + used_length;
+ for ( ;; )
+ {
+ if (nod_flag)
+ {
+ next_page= _ma_kpos(nod_flag,keypos);
+ /* Save new pos */
+ _ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
+ if (sort_one_index(param,info,keyinfo,next_page,new_file))
+ {
+ DBUG_PRINT("error",
+ ("From page: %ld, keyoffset: %lu used_length: %d",
+ (ulong) pagepos, (ulong) (keypos - buff),
+ (int) used_length));
+ DBUG_DUMP("buff",(uchar*) buff,used_length);
+ goto err;
+ }
+ }
+ if (keypos >= endpos ||
+ (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0)
+ break;
+ DBUG_ASSERT(keypos <= endpos);
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ uint off;
+ int subkeys;
+ get_key_full_length_rdonly(off, key);
+ subkeys=ft_sintXkorr(key+off);
+ if (subkeys < 0)
+ {
+ next_page= _ma_dpos(info,0,key+key_length);
+ _ma_dpointer(info,keypos-nod_flag-share->rec_reflength,
+ param->new_file_pos); /* Save new pos */
+ if (sort_one_index(param,info,&share->ft2_keyinfo,
+ next_page,new_file))
+ goto err;
+ }
+ }
+ }
+ }
+
+ /* Fill block with zero and write it to the new index file */
+ length= _ma_get_page_used(share, buff);
+ bzero((uchar*) buff+length,keyinfo->block_length-length);
+ put_crc(buff, new_page_pos, share);
+ if (my_pwrite(new_file,(uchar*) buff,(uint) keyinfo->block_length,
+ new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL)))
+ {
+ _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
+ goto err;
+ }
+ my_afree((uchar*) buff);
+ DBUG_RETURN(0);
+err:
+ my_afree((uchar*) buff);
+ DBUG_RETURN(1);
+} /* sort_one_index */
+
+
+ /*
+ Let temporary file replace old file.
+ This assumes that the new file was created in the same
+ directory as given by realpath(filename).
+ This will ensure that any symlinks that are used will still work.
+ Copy stats from old file to new file, deletes orignal and
+ changes new file name to old file name
+ */
+
+int maria_change_to_newfile(const char * filename, const char * old_ext,
+ const char * new_ext, myf MyFlags)
+{
+ char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
+#ifdef USE_RAID
+ if (raid_chunks)
+ return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4),
+ fn_format(new_filename,filename,"",new_ext,2+4),
+ raid_chunks,
+ MYF(MY_WME | MY_LINK_WARNING | MyFlags));
+#endif
+ /* Get real path to filename */
+ (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
+ return my_redel(old_filename,
+ fn_format(new_filename,old_filename,"",new_ext,2+4),
+ MYF(MY_WME | MY_LINK_WARNING | MyFlags));
+} /* maria_change_to_newfile */
+
+
+/* Copy a block between two files */
+
+int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
+ my_off_t length, const char *type)
+{
+ char tmp_buff[IO_SIZE],*buff;
+ ulong buff_length;
+ DBUG_ENTER("maria_filecopy");
+
+ buff_length=(ulong) min(param->write_buffer_length,length);
+ if (!(buff=my_malloc(buff_length,MYF(0))))
+ {
+ buff=tmp_buff; buff_length=IO_SIZE;
+ }
+
+ VOID(my_seek(from,start,MY_SEEK_SET,MYF(0)));
+ while (length > buff_length)
+ {
+ if (my_read(from,(uchar*) buff,buff_length,MYF(MY_NABP)) ||
+ my_write(to,(uchar*) buff,buff_length,param->myf_rw))
+ goto err;
+ length-= buff_length;
+ }
+ if (my_read(from,(uchar*) buff,(uint) length,MYF(MY_NABP)) ||
+ my_write(to,(uchar*) buff,(uint) length,param->myf_rw))
+ goto err;
+ if (buff != tmp_buff)
+ my_free(buff,MYF(0));
+ DBUG_RETURN(0);
+err:
+ if (buff != tmp_buff)
+ my_free(buff,MYF(0));
+ _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
+ type,my_errno);
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Repair table or given index using sorting
+
+ SYNOPSIS
+ maria_repair_by_sort()
+ param Repair parameters
+ info MARIA handler to repair
+ name Name of table (for warnings)
+ rep_quick set to <> 0 if we should not change data file
+
+ RESULT
+ 0 ok
+ <>0 Error
+*/
+
+int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
+ const char * name, uint rep_quick)
+{
+ int got_error;
+ uint i;
+ ha_rows start_records;
+ my_off_t new_header_length, org_header_length, del;
+ File new_file;
+ MARIA_SORT_PARAM sort_param;
+ MARIA_SHARE *share= info->s;
+ HA_KEYSEG *keyseg;
+ double *rec_per_key_part;
+ char llbuff[22];
+ MARIA_SORT_INFO sort_info;
+ ulonglong key_map= share->state.key_map;
+ myf sync_dir= ((share->now_transactional && !share->temporary) ?
+ MY_SYNC_DIR : 0);
+ my_bool scan_inited= 0;
+ DBUG_ENTER("maria_repair_by_sort");
+
+ got_error= 1;
+ new_file= -1;
+ start_records= info->state->records;
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- recovering (with sort) MARIA-table '%s'\n",name);
+ printf("Data records: %s\n", llstr(start_records,llbuff));
+ }
+
+ if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
+ rep_quick))
+ goto err;
+
+ org_header_length= share->pack.header_length;
+ new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
+ sort_param.filepos= new_header_length;
+
+ if (!rep_quick)
+ {
+ /* Get real path for data file */
+ if ((new_file=my_create(fn_format(param->temp_filename,
+ share->data_file_name, "",
+ DATA_TMP_EXT, 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (new_header_length &&
+ maria_filecopy(param, new_file, info->dfile.file, 0L,
+ new_header_length, "datafile-header"))
+ goto err;
+
+ share->state.dellink= HA_OFFSET_ERROR;
+ info->rec_cache.file= new_file; /* For sort_delete_record */
+ if (share->data_file_type == BLOCK_RECORD ||
+ (param->testflag & T_UNPACK))
+ {
+ if (create_new_data_handle(&sort_param, new_file))
+ goto err;
+ sort_info.new_info->rec_cache.file= new_file;
+ }
+ }
+
+ if (!(sort_info.key_block=
+ alloc_key_blocks(param,
+ (uint) param->sort_key_blocks,
+ share->base.max_key_block_length)))
+ goto err;
+ sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
+
+ if (share->data_file_type != BLOCK_RECORD)
+ {
+ /* We need a read buffer to read rows in big blocks */
+ if (init_io_cache(&param->read_cache, info->dfile.file,
+ (uint) param->read_buffer_length,
+ READ_CACHE, org_header_length, 1, MYF(MY_WME)))
+ goto err;
+ }
+ if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
+ {
+ /* When writing to not block records, we need a write buffer */
+ if (!rep_quick)
+ {
+ if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
+ (uint) param->write_buffer_length,
+ WRITE_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
+ goto err;
+ sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
+ }
+ }
+
+ if (!(sort_param.record=(uchar*) my_malloc((uint) share->base.pack_reclength,
+ MYF(0))) ||
+ _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
+ share->base.default_rec_buff_size))
+ {
+ _ma_check_print_error(param, "Not enough memory for extra record");
+ goto err;
+ }
+
+ if (!(param->testflag & T_CREATE_MISSING_KEYS))
+ {
+ /* Clear the pointers to the given rows */
+ for (i=0 ; i < share->base.keys ; i++)
+ share->state.key_root[i]= HA_OFFSET_ERROR;
+ share->state.key_del= HA_OFFSET_ERROR;
+ info->state->key_file_length=share->base.keystart;
+ }
+ else
+ key_map= ~key_map; /* Create the missing keys */
+
+ param->read_cache.end_of_file= sort_info.filelength;
+ sort_param.wordlist=NULL;
+ init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
+
+ sort_param.key_cmp=sort_key_cmp;
+ sort_param.lock_in_memory=maria_lock_memory;
+ sort_param.tmpdir=param->tmpdir;
+ sort_param.master =1;
+
+ del=info->state->del;
+
+ rec_per_key_part= param->new_rec_per_key_part;
+ for (sort_param.key=0 ; sort_param.key < share->base.keys ;
+ rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
+ {
+ sort_param.keyinfo=share->keyinfo+sort_param.key;
+ if (! maria_is_key_active(key_map, sort_param.key))
+ {
+ /* Remember old statistics for key */
+ memcpy((char*) rec_per_key_part,
+ (char*) (share->state.rec_per_key_part +
+ (uint) (rec_per_key_part - param->new_rec_per_key_part)),
+ sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
+ continue;
+ }
+
+ if ((!(param->testflag & T_SILENT)))
+ printf ("- Fixing index %d\n",sort_param.key+1);
+
+ sort_param.read_cache=param->read_cache;
+ sort_param.seg=sort_param.keyinfo->seg;
+ sort_param.max_pos= sort_param.pos= org_header_length;
+ keyseg=sort_param.seg;
+ bzero((char*) sort_param.unique,sizeof(sort_param.unique));
+ sort_param.key_length=share->rec_reflength;
+ for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
+ {
+ sort_param.key_length+=keyseg[i].length;
+ if (keyseg[i].flag & HA_SPACE_PACK)
+ sort_param.key_length+=get_pack_length(keyseg[i].length);
+ if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ sort_param.key_length+=2 + test(keyseg[i].length >= 127);
+ if (keyseg[i].flag & HA_NULL_PART)
+ sort_param.key_length++;
+ }
+ info->state->records=info->state->del=share->state.split=0;
+ info->state->empty=0;
+
+ if (sort_param.keyinfo->flag & HA_FULLTEXT)
+ {
+ uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
+ sort_param.keyinfo->seg->charset->mbmaxlen;
+ sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
+ /*
+ fulltext indexes may have much more entries than the
+ number of rows in the table. We estimate the number here.
+
+ Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
+ */
+ if (sort_param.keyinfo->ftparser_nr == 0)
+ {
+ /*
+ for built-in parser the number of generated index entries
+ cannot be larger than the size of the data file divided
+ by the minimal word's length
+ */
+ sort_info.max_records=
+ (ha_rows) (sort_info.filelength/ft_min_word_len+1);
+ }
+ else
+ {
+ /*
+ for external plugin parser we cannot tell anything at all :(
+ so, we'll use all the sort memory and start from ~10 buffpeks.
+ (see _create_index_by_sort)
+ */
+ sort_info.max_records=
+ 10*param->sort_buffer_length/sort_param.key_length;
+ }
+
+ sort_param.key_read= sort_maria_ft_key_read;
+ sort_param.key_write= sort_maria_ft_key_write;
+ }
+ else
+ {
+ sort_param.key_read= sort_key_read;
+ sort_param.key_write= sort_key_write;
+ }
+
+ if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
+ {
+ scan_inited= 1;
+ if (maria_scan_init(sort_info.info))
+ goto err;
+ }
+ if (_ma_create_index_by_sort(&sort_param,
+ (my_bool) (!(param->testflag & T_VERBOSE)),
+ (size_t) param->sort_buffer_length))
+ {
+ param->retry_repair=1;
+ goto err;
+ }
+ if (scan_inited)
+ {
+ scan_inited= 0;
+ maria_scan_end(sort_info.info);
+ }
+
+ /* No need to calculate checksum again. */
+ sort_param.calc_checksum= 0;
+ free_root(&sort_param.wordroot, MYF(0));
+
+ /* Set for next loop */
+ sort_info.max_records= (ha_rows) sort_info.new_info->state->records;
+
+ if (param->testflag & T_STATISTICS)
+ maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
+ sort_param.unique,
+ (param->stats_method ==
+ MI_STATS_METHOD_IGNORE_NULLS ?
+ sort_param.notnull : NULL),
+ (ulonglong) info->state->records);
+ maria_set_key_active(share->state.key_map, sort_param.key);
+
+ if (sort_param.fix_datafile)
+ {
+ param->read_cache.end_of_file=sort_param.filepos;
+ if (maria_write_data_suffix(&sort_info,1) ||
+ end_io_cache(&sort_info.new_info->rec_cache))
+ goto err;
+ sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;
+
+ if (param->testflag & T_SAFE_REPAIR)
+ {
+ /* Don't repair if we loosed more than one row */
+ if (info->state->records+1 < start_records)
+ {
+ info->state->records=start_records;
+ goto err;
+ }
+ }
+
+ if (_ma_flush_table_files_after_repair(param, info))
+ goto err;
+
+ sort_info.new_info->state->data_file_length= sort_param.filepos;
+ if (sort_info.new_info != sort_info.info)
+ {
+ MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
+ if (maria_close(sort_info.new_info))
+ {
+ _ma_check_print_error(param, "Got error %d on close", my_errno);
+ goto err;
+ }
+ copy_data_file_state(&share->state, &save_state);
+ new_file= -1;
+ sort_info.new_info= info;
+ }
+
+ share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
+
+ /* Replace the actual file with the temporary file */
+ if (new_file >= 0)
+ {
+ my_close(new_file, MYF(MY_WME));
+ new_file= -1;
+ }
+ change_data_file_descriptor(info, -1);
+ if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
+ DATA_TMP_EXT,
+ (param->testflag & T_BACKUP_DATA ?
+ MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
+ sync_dir) ||
+ _ma_open_datafile(info, share, -1))
+ {
+ goto err;
+ }
+ if (param->testflag & T_UNPACK)
+ restore_data_file_type(share);
+
+ org_header_length= share->pack.header_length;
+ sort_info.org_data_file_type= share->data_file_type;
+ sort_info.filelength= info->state->data_file_length;
+ sort_param.fix_datafile=0;
+ }
+ else
+ info->state->data_file_length=sort_param.max_pos;
+
+ param->read_cache.file= info->dfile.file; /* re-init read cache */
+ reinit_io_cache(&param->read_cache,READ_CACHE,share->pack.header_length,
+ 1,1);
+ }
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+
+ if (rep_quick && del+sort_info.dupp != info->state->del)
+ {
+ _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
+ _ma_check_print_error(param,"Run recovery again without -q");
+ got_error=1;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ goto err;
+ }
+
+ if (rep_quick & T_FORCE_UNIQUENESS)
+ {
+ my_off_t skr= (info->state->data_file_length +
+ (sort_info.org_data_file_type == COMPRESSED_RECORD) ?
+ MEMMAP_EXTRA_MARGIN : 0);
+#ifdef USE_RELOC
+ if (sort_info.org_data_file_type == STATIC_RECORD &&
+ skr < share->base.reloc*share->base.min_pack_length)
+ skr=share->base.reloc*share->base.min_pack_length;
+#endif
+ if (skr != sort_info.filelength)
+ if (my_chsize(info->dfile.file, skr, 0, MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of datafile, error: %d",
+ my_errno);
+ }
+
+ if (param->testflag & T_CALC_CHECKSUM)
+ info->state->checksum=param->glob_crc;
+
+ if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of indexfile, error: %d",
+ my_errno);
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (start_records != info->state->records)
+ printf("Data records: %s\n", llstr(info->state->records,llbuff));
+ if (sort_info.dupp)
+ _ma_check_print_warning(param,
+ "%s records have been removed",
+ llstr(sort_info.dupp,llbuff));
+ }
+ got_error=0;
+
+ if (&share->state.state != info->state)
+ memcpy( &share->state.state, info->state, sizeof(*info->state));
+
+err:
+ if (scan_inited)
+ maria_scan_end(sort_info.info);
+
+ VOID(end_io_cache(&sort_info.new_info->rec_cache));
+ VOID(end_io_cache(&param->read_cache));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ if (got_error)
+ {
+ if (! param->error_printed)
+ _ma_check_print_error(param,"%d when fixing table",my_errno);
+ (void) _ma_flush_table_files_after_repair(param, info);
+ if (sort_info.new_info && sort_info.new_info != sort_info.info)
+ {
+ unuse_data_file_descriptor(sort_info.new_info);
+ maria_close(sort_info.new_info);
+ }
+ if (new_file >= 0)
+ {
+ VOID(my_close(new_file,MYF(0)));
+ VOID(my_delete(param->temp_filename, MYF(MY_WME)));
+ }
+ maria_mark_crashed_on_repair(info);
+ }
+ else
+ {
+ if (key_map == share->state.key_map)
+ share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
+ /*
+ Now that we have flushed and forced everything, we can bump
+ create_rename_lsn:
+ */
+ write_log_record_for_repair(param, info);
+ }
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+ share->state.changed&= ~STATE_NOT_OPTIMIZED_ROWS;
+
+ my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
+ my_free((uchar*) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
+ my_free((uchar*) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(got_error);
+}
+
+/*
+ Threaded repair of table using sorting
+
+ SYNOPSIS
+ maria_repair_parallel()
+ param Repair parameters
+ info MARIA handler to repair
+ name Name of table (for warnings)
+ rep_quick set to <> 0 if we should not change data file
+
+ DESCRIPTION
+ Same as maria_repair_by_sort but do it multithreaded
+ Each key is handled by a separate thread.
+ TODO: make a number of threads a parameter
+
+ In parallel repair we use one thread per index. There are two modes:
+
+ Quick
+
+ Only the indexes are rebuilt. All threads share a read buffer.
+ Every thread that needs fresh data in the buffer enters the shared
+ cache lock. The last thread joining the lock reads the buffer from
+ the data file and wakes all other threads.
+
+ Non-quick
+
+ The data file is rebuilt and all indexes are rebuilt to point to
+ the new record positions. One thread is the master thread. It
+ reads from the old data file and writes to the new data file. It
+ also creates one of the indexes. The other threads read from a
+ buffer which is filled by the master. If they need fresh data,
+ they enter the shared cache lock. If the masters write buffer is
+ full, it flushes it to the new data file and enters the shared
+ cache lock too. When all threads joined in the lock, the master
+ copies its write buffer to the read buffer for the other threads
+ and wakes them.
+
+ RESULT
+ 0 ok
+ <>0 Error
+*/
+
+int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
+ const char * name, uint rep_quick)
+{
+#ifndef THREAD
+ return maria_repair_by_sort(param, info, name, rep_quick);
+#else
+ int got_error;
+ uint i,key, total_key_length, istep;
+ ha_rows start_records;
+ my_off_t new_header_length,del;
+ File new_file;
+ MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
+ MARIA_SHARE *share= info->s;
+ double *rec_per_key_part;
+ HA_KEYSEG *keyseg;
+ char llbuff[22];
+ IO_CACHE new_data_cache; /* For non-quick repair. */
+ IO_CACHE_SHARE io_share;
+ MARIA_SORT_INFO sort_info;
+ ulonglong key_map=share->state.key_map;
+ pthread_attr_t thr_attr;
+ myf sync_dir= (share->now_transactional && !share->temporary) ?
+ MY_SYNC_DIR : 0;
+ DBUG_ENTER("maria_repair_parallel");
+
+ got_error= 1;
+ new_file= -1;
+ start_records= info->state->records;
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- parallel recovering (with sort) MARIA-table '%s'\n",name);
+ printf("Data records: %s\n", llstr(start_records, llbuff));
+ }
+
+ if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
+ rep_quick))
+ goto err;
+
+ new_header_length= ((param->testflag & T_UNPACK) ? 0 :
+ share->pack.header_length);
+
+ /*
+ Quick repair (not touching data file, rebuilding indexes):
+ {
+ Read cache is (MI_CHECK *param)->read_cache using info->dfile.file.
+ }
+
+ Non-quick repair (rebuilding data file and indexes):
+ {
+ Master thread:
+
+ Read cache is (MI_CHECK *param)->read_cache using info->dfile.file.
+ Write cache is (MI_INFO *info)->rec_cache using new_file.
+
+ Slave threads:
+
+ Read cache is new_data_cache synced to master rec_cache.
+
+ The final assignment of the filedescriptor for rec_cache is done
+ after the cache creation.
+
+ Don't check file size on new_data_cache, as the resulting file size
+ is not known yet.
+
+ As rec_cache and new_data_cache are synced, write_buffer_length is
+ used for the read cache 'new_data_cache'. Both start at the same
+ position 'new_header_length'.
+ }
+ */
+ DBUG_PRINT("info", ("is quick repair: %d", rep_quick));
+
+ /* Initialize pthread structures before goto err. */
+ pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&sort_info.cond, 0);
+
+ if (!(sort_info.key_block=
+ alloc_key_blocks(param, (uint) param->sort_key_blocks,
+ share->base.max_key_block_length)) ||
+ init_io_cache(&param->read_cache, info->dfile.file,
+ (uint) param->read_buffer_length,
+ READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) ||
+ (!rep_quick &&
+ (init_io_cache(&info->rec_cache, info->dfile.file,
+ (uint) param->write_buffer_length,
+ WRITE_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) ||
+ init_io_cache(&new_data_cache, -1,
+ (uint) param->write_buffer_length,
+ READ_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))))
+ goto err;
+ sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
+ info->opt_flag|=WRITE_CACHE_USED;
+ info->rec_cache.file= info->dfile.file; /* for sort_delete_record */
+
+ if (!rep_quick)
+ {
+ /* Get real path for data file */
+ if ((new_file= my_create(fn_format(param->temp_filename,
+ share->data_file_name, "",
+ DATA_TMP_EXT,
+ 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (new_header_length &&
+ maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
+ "datafile-header"))
+ goto err;
+ if (param->testflag & T_UNPACK)
+ restore_data_file_type(share);
+ share->state.dellink= HA_OFFSET_ERROR;
+ info->rec_cache.file=new_file;
+ }
+
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ if (!(param->testflag & T_CREATE_MISSING_KEYS))
+ {
+ /*
+ Flush key cache for this file if we are calling this outside
+ maria_chk
+ */
+ flush_pagecache_blocks(share->pagecache, &share->kfile,
+ FLUSH_IGNORE_CHANGED);
+ /* Clear the pointers to the given rows */
+ for (i=0 ; i < share->base.keys ; i++)
+ share->state.key_root[i]= HA_OFFSET_ERROR;
+ share->state.key_del= HA_OFFSET_ERROR;
+ info->state->key_file_length=share->base.keystart;
+ }
+ else
+ {
+ if (flush_pagecache_blocks(share->pagecache, &share->kfile,
+ FLUSH_FORCE_WRITE))
+ goto err;
+ key_map= ~key_map; /* Create the missing keys */
+ }
+
+ param->read_cache.end_of_file= sort_info.filelength;
+
+ /*
+ +1 below is required hack for parallel repair mode.
+ The info->state->records value, that is compared later
+ to sort_info.max_records and cannot exceed it, is
+ increased in sort_key_write. In maria_repair_by_sort, sort_key_write
+ is called after sort_key_read, where the comparison is performed,
+ but in parallel mode master thread can call sort_key_write
+ before some other repair thread calls sort_key_read.
+ Furthermore I'm not even sure +1 would be enough.
+ May be sort_info.max_records shold be always set to max value in
+ parallel mode.
+ */
+ sort_info.max_records++;
+
+ del=info->state->del;
+
+ if (!(sort_param=(MARIA_SORT_PARAM *)
+ my_malloc((uint) share->base.keys *
+ (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
+ MYF(MY_ZEROFILL))))
+ {
+ _ma_check_print_error(param,"Not enough memory for key!");
+ goto err;
+ }
+ total_key_length=0;
+ rec_per_key_part= param->new_rec_per_key_part;
+ info->state->records=info->state->del=share->state.split=0;
+ info->state->empty=0;
+
+ for (i=key=0, istep=1 ; key < share->base.keys ;
+ rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
+ {
+ sort_param[i].key=key;
+ sort_param[i].keyinfo=share->keyinfo+key;
+ sort_param[i].seg=sort_param[i].keyinfo->seg;
+ if (! maria_is_key_active(key_map, key))
+ {
+ /* Remember old statistics for key */
+ memcpy((char*) rec_per_key_part,
+ (char*) (share->state.rec_per_key_part+
+ (uint) (rec_per_key_part - param->new_rec_per_key_part)),
+ sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
+ istep=0;
+ continue;
+ }
+ istep=1;
+ if ((!(param->testflag & T_SILENT)))
+ printf ("- Fixing index %d\n",key+1);
+ if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
+ {
+ sort_param[i].key_read=sort_maria_ft_key_read;
+ sort_param[i].key_write=sort_maria_ft_key_write;
+ }
+ else
+ {
+ sort_param[i].key_read=sort_key_read;
+ sort_param[i].key_write=sort_key_write;
+ }
+ sort_param[i].key_cmp=sort_key_cmp;
+ sort_param[i].lock_in_memory=maria_lock_memory;
+ sort_param[i].tmpdir=param->tmpdir;
+ sort_param[i].sort_info=&sort_info;
+ sort_param[i].master=0;
+ sort_param[i].fix_datafile=0;
+ sort_param[i].calc_checksum= 0;
+
+ sort_param[i].filepos=new_header_length;
+ sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
+
+ sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
+ (share->base.pack_reclength * i));
+ if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
+ share->base.default_rec_buff_size))
+ {
+ _ma_check_print_error(param,"Not enough memory!");
+ goto err;
+ }
+ sort_param[i].key_length=share->rec_reflength;
+ for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
+ keyseg++)
+ {
+ sort_param[i].key_length+=keyseg->length;
+ if (keyseg->flag & HA_SPACE_PACK)
+ sort_param[i].key_length+=get_pack_length(keyseg->length);
+ if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ sort_param[i].key_length+=2 + test(keyseg->length >= 127);
+ if (keyseg->flag & HA_NULL_PART)
+ sort_param[i].key_length++;
+ }
+ total_key_length+=sort_param[i].key_length;
+
+ if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
+ {
+ uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
+ sort_param[i].keyinfo->seg->charset->mbmaxlen;
+ sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
+ init_alloc_root(&sort_param[i].wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
+ }
+ }
+ sort_info.total_keys=i;
+ sort_param[0].master= 1;
+ sort_param[0].fix_datafile= (my_bool)(! rep_quick);
+ sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
+
+ sort_info.got_error=0;
+ pthread_mutex_lock(&sort_info.mutex);
+
+ /*
+ Initialize the I/O cache share for use with the read caches and, in
+ case of non-quick repair, the write cache. When all threads join on
+ the cache lock, the writer copies the write cache contents to the
+ read caches.
+ */
+ if (i > 1)
+ {
+ if (rep_quick)
+ init_io_cache_share(&param->read_cache, &io_share, NULL, i);
+ else
+ init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
+ }
+ else
+ io_share.total_threads= 0; /* share not used */
+
+ (void) pthread_attr_init(&thr_attr);
+ (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
+
+ for (i=0 ; i < sort_info.total_keys ; i++)
+ {
+ /*
+ Copy the properly initialized IO_CACHE structure so that every
+ thread has its own copy. In quick mode param->read_cache is shared
+ for use by all threads. In non-quick mode all threads but the
+ first copy the shared new_data_cache, which is synchronized to the
+ write cache of the first thread. The first thread copies
+ param->read_cache, which is not shared.
+ */
+ sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
+ new_data_cache);
+ DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx",
+ i, (long) &sort_param[i].read_cache));
+
+ /*
+ two approaches: the same amount of memory for each thread
+ or the memory for the same number of keys for each thread...
+ In the second one all the threads will fill their sort_buffers
+ (and call write_keys) at the same time, putting more stress on i/o.
+ */
+ sort_param[i].sortbuff_size=
+#ifndef USING_SECOND_APPROACH
+ param->sort_buffer_length/sort_info.total_keys;
+#else
+ param->sort_buffer_length*sort_param[i].key_length/total_key_length;
+#endif
+ if (pthread_create(&sort_param[i].thr, &thr_attr,
+ _ma_thr_find_all_keys,
+ (void *) (sort_param+i)))
+ {
+ _ma_check_print_error(param,"Cannot start a repair thread");
+ /* Cleanup: Detach from the share. Avoid others to be blocked. */
+ if (io_share.total_threads)
+ remove_io_thread(&sort_param[i].read_cache);
+ DBUG_PRINT("error", ("Cannot start a repair thread"));
+ sort_info.got_error=1;
+ }
+ else
+ sort_info.threads_running++;
+ }
+ (void) pthread_attr_destroy(&thr_attr);
+
+ /* waiting for all threads to finish */
+ while (sort_info.threads_running)
+ pthread_cond_wait(&sort_info.cond, &sort_info.mutex);
+ pthread_mutex_unlock(&sort_info.mutex);
+
+ if ((got_error= _ma_thr_write_keys(sort_param)))
+ {
+ param->retry_repair=1;
+ goto err;
+ }
+ got_error=1; /* Assume the following may go wrong */
+
+ if (sort_param[0].fix_datafile)
+ {
+ /*
+ Append some nuls to the end of a memory mapped file. Destroy the
+ write cache. The master thread did already detach from the share
+ by remove_io_thread() in sort.c:thr_find_all_keys().
+ */
+ if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache))
+ goto err;
+ if (param->testflag & T_SAFE_REPAIR)
+ {
+ /* Don't repair if we loosed more than one row */
+ if (info->state->records+1 < start_records)
+ {
+ info->state->records=start_records;
+ goto err;
+ }
+ }
+ share->state.state.data_file_length= info->state->data_file_length=
+ sort_param->filepos;
+ /* Only whole records */
+ share->state.version=(ulong) time((time_t*) 0);
+ /*
+ Exchange the data file descriptor of the table, so that we use the
+ new file from now on.
+ */
+ my_close(info->dfile.file, MYF(0));
+ info->dfile.file= new_file;
+ share->pack.header_length=(ulong) new_header_length;
+ }
+ else
+ info->state->data_file_length=sort_param->max_pos;
+
+ if (rep_quick && del+sort_info.dupp != info->state->del)
+ {
+ _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
+ _ma_check_print_error(param,"Run recovery again without -q");
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ goto err;
+ }
+
+ if (rep_quick & T_FORCE_UNIQUENESS)
+ {
+ my_off_t skr= (info->state->data_file_length +
+ (sort_info.org_data_file_type == COMPRESSED_RECORD) ?
+ MEMMAP_EXTRA_MARGIN : 0);
+#ifdef USE_RELOC
+ if (sort_info.org_data_file_type == STATIC_RECORD &&
+ skr < share->base.reloc*share->base.min_pack_length)
+ skr=share->base.reloc*share->base.min_pack_length;
+#endif
+ if (skr != sort_info.filelength)
+ if (my_chsize(info->dfile.file, skr, 0, MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of datafile, error: %d",
+ my_errno);
+ }
+ if (param->testflag & T_CALC_CHECKSUM)
+ info->state->checksum=param->glob_crc;
+
+ if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of indexfile, error: %d", my_errno);
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (start_records != info->state->records)
+ printf("Data records: %s\n", llstr(info->state->records,llbuff));
+ if (sort_info.dupp)
+ _ma_check_print_warning(param,
+ "%s records have been removed",
+ llstr(sort_info.dupp,llbuff));
+ }
+ got_error=0;
+
+ if (&share->state.state != info->state)
+ memcpy(&share->state.state, info->state, sizeof(*info->state));
+
+err:
+ /*
+ Destroy the write cache. The master thread did already detach from
+ the share by remove_io_thread() or it was not yet started (if the
+ error happend before creating the thread).
+ */
+ VOID(end_io_cache(&sort_info.new_info->rec_cache));
+ VOID(end_io_cache(&param->read_cache));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ /*
+ Destroy the new data cache in case of non-quick repair. All slave
+ threads did either detach from the share by remove_io_thread()
+ already or they were not yet started (if the error happend before
+ creating the threads).
+ */
+ if (!rep_quick)
+ VOID(end_io_cache(&new_data_cache));
+ got_error|= _ma_flush_table_files_after_repair(param, info);
+ if (!got_error)
+ {
+ /* Replace the actual file with the temporary file */
+ if (new_file >= 0)
+ {
+ my_close(new_file,MYF(0));
+ info->dfile.file= new_file= -1;
+ if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
+ DATA_TMP_EXT,
+ MYF((param->testflag & T_BACKUP_DATA ?
+ MY_REDEL_MAKE_BACKUP : 0) |
+ sync_dir)) ||
+ _ma_open_datafile(info,share,-1))
+ got_error=1;
+ }
+ }
+ if (got_error)
+ {
+ if (! param->error_printed)
+ _ma_check_print_error(param,"%d when fixing table",my_errno);
+ if (new_file >= 0)
+ {
+ VOID(my_close(new_file,MYF(0)));
+ VOID(my_delete(param->temp_filename, MYF(MY_WME)));
+ if (info->dfile.file == new_file)
+ info->dfile.file= -1;
+ }
+ maria_mark_crashed_on_repair(info);
+ }
+ else if (key_map == share->state.key_map)
+ share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+ share->state.changed&= ~STATE_NOT_OPTIMIZED_ROWS;
+
+ pthread_cond_destroy (&sort_info.cond);
+ pthread_mutex_destroy(&sort_info.mutex);
+
+ my_free((uchar*) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
+ my_free((uchar*) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
+ my_free((uchar*) sort_param,MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ if (!got_error && (param->testflag & T_UNPACK))
+ restore_data_file_type(share);
+ DBUG_RETURN(got_error);
+#endif /* THREAD */
+}
+
+ /* Read next record and return next key */
+
+static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
+{
+ int error;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ MARIA_HA *info= sort_info->info;
+ DBUG_ENTER("sort_key_read");
+
+ if ((error=sort_get_next_record(sort_param)))
+ DBUG_RETURN(error);
+ if (info->state->records == sort_info->max_records)
+ {
+ _ma_check_print_error(sort_info->param,
+ "Key %d - Found too many records; Can't continue",
+ sort_param->key+1);
+ DBUG_RETURN(1);
+ }
+ if (_ma_sort_write_record(sort_param))
+ DBUG_RETURN(1);
+
+ sort_param->real_key_length=
+ (info->s->rec_reflength+
+ _ma_make_key(info, sort_param->key, key,
+ sort_param->record, sort_param->current_filepos));
+#ifdef HAVE_purify
+ bzero(key+sort_param->real_key_length,
+ (sort_param->key_length-sort_param->real_key_length));
+#endif
+ DBUG_RETURN(0);
+} /* sort_key_read */
+
+
+static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
+{
+ int error;
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ MARIA_HA *info=sort_info->info;
+ FT_WORD *wptr=0;
+ DBUG_ENTER("sort_maria_ft_key_read");
+
+ if (!sort_param->wordlist)
+ {
+ for (;;)
+ {
+ free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
+ if ((error=sort_get_next_record(sort_param)))
+ DBUG_RETURN(error);
+ if ((error= _ma_sort_write_record(sort_param)))
+ DBUG_RETURN(error);
+ if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
+ &sort_param->wordroot)))
+
+ DBUG_RETURN(1);
+ if (wptr->pos)
+ break;
+ }
+ sort_param->wordptr=sort_param->wordlist=wptr;
+ }
+ else
+ {
+ error=0;
+ wptr=(FT_WORD*)(sort_param->wordptr);
+ }
+
+ sort_param->real_key_length=(info->s->rec_reflength+
+ _ma_ft_make_key(info, sort_param->key,
+ key, wptr++,
+ sort_param->current_filepos));
+#ifdef HAVE_purify
+ if (sort_param->key_length > sort_param->real_key_length)
+ bzero(key+sort_param->real_key_length,
+ (sort_param->key_length-sort_param->real_key_length));
+#endif
+ if (!wptr->pos)
+ {
+ free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
+ sort_param->wordlist=0;
+ }
+ else
+ sort_param->wordptr=(void*)wptr;
+
+ DBUG_RETURN(error);
+} /* sort_maria_ft_key_read */
+
+
+/*
+ Read next record from file using parameters in sort_info.
+
+ SYNOPSIS
+ sort_get_next_record()
+ sort_param Information about and for the sort process
+
+ NOTES
+ Dynamic Records With Non-Quick Parallel Repair
+
+ For non-quick parallel repair we use a synchronized read/write
+ cache. This means that one thread is the master who fixes the data
+ file by reading each record from the old data file and writing it
+ to the new data file. By doing this the records in the new data
+ file are written contiguously. Whenever the write buffer is full,
+ it is copied to the read buffer. The slaves read from the read
+ buffer, which is not associated with a file. Thus read_cache.file
+ is -1. When using _mi_read_cache(), the slaves must always set
+ flag to READING_NEXT so that the function never tries to read from
+ file. This is safe because the records are contiguous. There is no
+ need to read outside the cache. This condition is evaluated in the
+ variable 'parallel_flag' for quick reference. read_cache.file must
+ be >= 0 in every other case.
+
+ RETURN
+ -1 end of file
+ 0 ok
+ sort_param->current_filepos points to record position.
+ sort_param->record contains record
+ sort_param->max_pos contains position to last byte read
+ > 0 error
+*/
+
+static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
+{
+ int searching;
+ int parallel_flag;
+ uint found_record,b_type,left_length;
+ my_off_t pos;
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ MARIA_HA *info=sort_info->info;
+ MARIA_SHARE *share= info->s;
+ char llbuff[22],llbuff2[22];
+ DBUG_ENTER("sort_get_next_record");
+
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(1);
+
+ switch (sort_info->org_data_file_type) {
+ case BLOCK_RECORD:
+ {
+ for (;;)
+ {
+ int flag;
+
+ if (info != sort_info->new_info)
+ {
+ /* Safe scanning */
+ flag= _ma_safe_scan_block_record(sort_info, info,
+ sort_param->record);
+ }
+ else
+ {
+ /*
+ Scan on clean table.
+ It requires a reliable data_file_length so we set it.
+ */
+ info->state->data_file_length= sort_info->filelength;
+ flag= _ma_scan_block_record(info, sort_param->record,
+ info->cur_row.nextpos, 1);
+ }
+ if (!flag)
+ {
+ if (sort_param->calc_checksum)
+ {
+ ha_checksum checksum;
+ checksum= (*share->calc_check_checksum)(info, sort_param->record);
+ if (share->calc_checksum &&
+ info->cur_row.checksum != (checksum & 255))
+ {
+ if (param->testflag & T_VERBOSE)
+ {
+ record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
+ _ma_check_print_info(param,
+ "Found record with wrong checksum at %s",
+ llbuff);
+ }
+ continue;
+ }
+ info->cur_row.checksum= checksum;
+ param->glob_crc+= checksum;
+ }
+ sort_param->start_recpos= sort_param->current_filepos=
+ info->cur_row.lastpos;
+ DBUG_RETURN(0);
+ }
+ if (flag == HA_ERR_END_OF_FILE)
+ {
+ sort_param->max_pos= info->state->data_file_length;
+ DBUG_RETURN(-1);
+ }
+ /* Retry only if wrong record, not if disk error */
+ if (flag != HA_ERR_WRONG_IN_RECORD)
+ DBUG_RETURN(flag);
+ }
+ break;
+ }
+ case STATIC_RECORD:
+ for (;;)
+ {
+ if (my_b_read(&sort_param->read_cache,sort_param->record,
+ share->base.pack_reclength))
+ {
+ if (sort_param->read_cache.error)
+ param->out_flag |= O_DATA_LOST;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(-1);
+ }
+ sort_param->start_recpos=sort_param->pos;
+ if (!sort_param->fix_datafile)
+ {
+ sort_param->current_filepos= sort_param->pos;
+ if (sort_param->master)
+ share->state.split++;
+ }
+ sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
+ if (*sort_param->record)
+ {
+ if (sort_param->calc_checksum)
+ param->glob_crc+= (info->cur_row.checksum=
+ _ma_static_checksum(info,sort_param->record));
+ DBUG_RETURN(0);
+ }
+ if (!sort_param->fix_datafile && sort_param->master)
+ {
+ info->state->del++;
+ info->state->empty+=share->base.pack_reclength;
+ }
+ }
+ case DYNAMIC_RECORD:
+ {
+ uchar *to;
+ LINT_INIT(to);
+ ha_checksum checksum= 0;
+
+ pos=sort_param->pos;
+ searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
+ parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
+ for (;;)
+ {
+ found_record=block_info.second_read= 0;
+ left_length=1;
+ if (searching)
+ {
+ pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ sort_param->start_recpos=pos;
+ }
+ do
+ {
+ if (pos > sort_param->max_pos)
+ sort_param->max_pos=pos;
+ if (pos & (MARIA_DYN_ALIGN_SIZE-1))
+ {
+ if ((param->testflag & T_VERBOSE) || searching == 0)
+ _ma_check_print_info(param,"Wrong aligned block at %s",
+ llstr(pos,llbuff));
+ if (searching)
+ goto try_next;
+ }
+ if (found_record && pos == param->search_after_block)
+ _ma_check_print_info(param,"Block: %s used by record at %s",
+ llstr(param->search_after_block,llbuff),
+ llstr(sort_param->start_recpos,llbuff2));
+ if (_ma_read_cache(&sort_param->read_cache,
+ (uchar*) block_info.header,pos,
+ MARIA_BLOCK_INFO_HEADER_LENGTH,
+ (! found_record ? READING_NEXT : 0) |
+ parallel_flag | READING_HEADER))
+ {
+ if (found_record)
+ {
+ _ma_check_print_info(param,
+ "Can't read whole record at %s (errno: %d)",
+ llstr(sort_param->start_recpos,llbuff),errno);
+ goto try_next;
+ }
+ DBUG_RETURN(-1);
+ }
+ if (searching && ! sort_param->fix_datafile)
+ {
+ param->error_printed=1;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(1); /* Something wrong with data */
+ }
+ b_type= _ma_get_block_info(&block_info,-1,pos);
+ if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
+ ((b_type & BLOCK_FIRST) &&
+ (block_info.rec_len < (uint) share->base.min_pack_length ||
+ block_info.rec_len > (uint) share->base.max_pack_length)))
+ {
+ uint i;
+ if (param->testflag & T_VERBOSE || searching == 0)
+ _ma_check_print_info(param,
+ "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
+ block_info.header[0],block_info.header[1],
+ block_info.header[2],llstr(pos,llbuff));
+ if (found_record)
+ goto try_next;
+ block_info.second_read=0;
+ searching=1;
+ /* Search after block in read header string */
+ for (i=MARIA_DYN_ALIGN_SIZE ;
+ i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
+ i+= MARIA_DYN_ALIGN_SIZE)
+ if (block_info.header[i] >= 1 &&
+ block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
+ break;
+ pos+=(ulong) i;
+ sort_param->start_recpos=pos;
+ continue;
+ }
+ if (b_type & BLOCK_DELETED)
+ {
+ bool error=0;
+ if (block_info.block_len+ (uint) (block_info.filepos-pos) <
+ share->base.min_block_length)
+ {
+ if (!searching)
+ _ma_check_print_info(param,
+ "Deleted block with impossible length %lu "
+ "at %s",
+ block_info.block_len,llstr(pos,llbuff));
+ error=1;
+ }
+ else
+ {
+ if ((block_info.next_filepos != HA_OFFSET_ERROR &&
+ block_info.next_filepos >=
+ info->state->data_file_length) ||
+ (block_info.prev_filepos != HA_OFFSET_ERROR &&
+ block_info.prev_filepos >= info->state->data_file_length))
+ {
+ if (!searching)
+ _ma_check_print_info(param,
+ "Delete link points outside datafile at %s",
+ llstr(pos,llbuff));
+ error=1;
+ }
+ }
+ if (error)
+ {
+ if (found_record)
+ goto try_next;
+ searching=1;
+ pos+= MARIA_DYN_ALIGN_SIZE;
+ sort_param->start_recpos=pos;
+ block_info.second_read=0;
+ continue;
+ }
+ }
+ else
+ {
+ if (block_info.block_len+ (uint) (block_info.filepos-pos) <
+ share->base.min_block_length ||
+ block_info.block_len > (uint) share->base.max_pack_length+
+ MARIA_SPLIT_LENGTH)
+ {
+ if (!searching)
+ _ma_check_print_info(param,
+ "Found block with impossible length %lu "
+ "at %s; Skipped",
+ block_info.block_len+
+ (uint) (block_info.filepos-pos),
+ llstr(pos,llbuff));
+ if (found_record)
+ goto try_next;
+ searching=1;
+ pos+= MARIA_DYN_ALIGN_SIZE;
+ sort_param->start_recpos=pos;
+ block_info.second_read=0;
+ continue;
+ }
+ }
+ if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ {
+ if (!sort_param->fix_datafile && sort_param->master &&
+ (b_type & BLOCK_DELETED))
+ {
+ info->state->empty+=block_info.block_len;
+ info->state->del++;
+ share->state.split++;
+ }
+ if (found_record)
+ goto try_next;
+ if (searching)
+ {
+ pos+=MARIA_DYN_ALIGN_SIZE;
+ sort_param->start_recpos=pos;
+ }
+ else
+ pos=block_info.filepos+block_info.block_len;
+ block_info.second_read=0;
+ continue;
+ }
+
+ if (!sort_param->fix_datafile && sort_param->master)
+ share->state.split++;
+ if (! found_record++)
+ {
+ sort_param->find_length=left_length=block_info.rec_len;
+ sort_param->start_recpos=pos;
+ if (!sort_param->fix_datafile)
+ sort_param->current_filepos= sort_param->start_recpos;
+ if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
+ sort_param->pos=block_info.filepos+1;
+ else
+ sort_param->pos=block_info.filepos+block_info.block_len;
+ if (share->base.blobs)
+ {
+ if (_ma_alloc_buffer(&sort_param->rec_buff,
+ &sort_param->rec_buff_size,
+ block_info.rec_len +
+ share->base.extra_rec_buff_size))
+
+ {
+ if (param->max_record_length >= block_info.rec_len)
+ {
+ _ma_check_print_error(param,"Not enough memory for blob at %s (need %lu)",
+ llstr(sort_param->start_recpos,llbuff),
+ (ulong) block_info.rec_len);
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ _ma_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped",
+ llstr(sort_param->start_recpos,llbuff),
+ (ulong) block_info.rec_len);
+ goto try_next;
+ }
+ }
+ }
+ to= sort_param->rec_buff;
+ }
+ if (left_length < block_info.data_len || ! block_info.data_len)
+ {
+ _ma_check_print_info(param,
+ "Found block with too small length at %s; "
+ "Skipped",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ if (block_info.filepos + block_info.data_len >
+ sort_param->read_cache.end_of_file)
+ {
+ _ma_check_print_info(param,
+ "Found block that points outside data file "
+ "at %s",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ /*
+ Copy information that is already read. Avoid accessing data
+ below the cache start. This could happen if the header
+ streched over the end of the previous buffer contents.
+ */
+ {
+ uint header_len= (uint) (block_info.filepos - pos);
+ uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);
+
+ if (prefetch_len > block_info.data_len)
+ prefetch_len= block_info.data_len;
+ if (prefetch_len)
+ {
+ memcpy(to, block_info.header + header_len, prefetch_len);
+ block_info.filepos+= prefetch_len;
+ block_info.data_len-= prefetch_len;
+ left_length-= prefetch_len;
+ to+= prefetch_len;
+ }
+ }
+ if (block_info.data_len &&
+ _ma_read_cache(&sort_param->read_cache,to,block_info.filepos,
+ block_info.data_len,
+ (found_record == 1 ? READING_NEXT : 0) |
+ parallel_flag))
+ {
+ _ma_check_print_info(param,
+ "Read error for block at: %s (error: %d); Skipped",
+ llstr(block_info.filepos,llbuff),my_errno);
+ goto try_next;
+ }
+ left_length-=block_info.data_len;
+ to+=block_info.data_len;
+ pos=block_info.next_filepos;
+ if (pos == HA_OFFSET_ERROR && left_length)
+ {
+ _ma_check_print_info(param,"Wrong block with wrong total length starting at %s",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH > sort_param->read_cache.end_of_file)
+ {
+ _ma_check_print_info(param,"Found link that points at %s (outside data file) at %s",
+ llstr(pos,llbuff2),
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ } while (left_length);
+
+ if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
+ sort_param->find_length) != MY_FILE_ERROR)
+ {
+ if (sort_param->read_cache.error < 0)
+ DBUG_RETURN(1);
+ if (sort_param->calc_checksum)
+ checksum= (share->calc_check_checksum)(info, sort_param->record);
+ if ((param->testflag & (T_EXTEND | T_REP)) || searching)
+ {
+ if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
+ sort_param->find_length,
+ (param->testflag & T_QUICK) &&
+ sort_param->calc_checksum &&
+ test(share->calc_checksum), checksum))
+ {
+ _ma_check_print_info(param,"Found wrong packed record at %s",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ }
+ if (sort_param->calc_checksum)
+ param->glob_crc+= checksum;
+ DBUG_RETURN(0);
+ }
+ if (!searching)
+ _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
+ sort_param->key+1,
+ llstr(sort_param->start_recpos,llbuff));
+ try_next:
+ pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
+ searching=1;
+ }
+ }
+ case COMPRESSED_RECORD:
+ for (searching=0 ;; searching=1, sort_param->pos++)
+ {
+ if (_ma_read_cache(&sort_param->read_cache,(uchar*) block_info.header,
+ sort_param->pos,
+ share->pack.ref_length,READING_NEXT))
+ DBUG_RETURN(-1);
+ if (searching && ! sort_param->fix_datafile)
+ {
+ param->error_printed=1;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(1); /* Something wrong with data */
+ }
+ sort_param->start_recpos=sort_param->pos;
+ if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
+ &sort_param->rec_buff,
+ &sort_param->rec_buff_size, -1,
+ sort_param->pos))
+ DBUG_RETURN(-1);
+ if (!block_info.rec_len &&
+ sort_param->pos + MEMMAP_EXTRA_MARGIN ==
+ sort_param->read_cache.end_of_file)
+ DBUG_RETURN(-1);
+ if (block_info.rec_len < (uint) share->min_pack_length ||
+ block_info.rec_len > (uint) share->max_pack_length)
+ {
+ if (! searching)
+ _ma_check_print_info(param,
+ "Found block with wrong recordlength: %lu "
+ "at %s\n",
+ block_info.rec_len,
+ llstr(sort_param->pos,llbuff));
+ continue;
+ }
+ if (_ma_read_cache(&sort_param->read_cache,(uchar*) sort_param->rec_buff,
+ block_info.filepos, block_info.rec_len,
+ READING_NEXT))
+ {
+ if (! searching)
+ _ma_check_print_info(param,"Couldn't read whole record from %s",
+ llstr(sort_param->pos,llbuff));
+ continue;
+ }
+#ifdef HAVE_purify
+ bzero(sort_param->rec_buff + block_info.rec_len,
+ share->base.extra_rec_buff_size);
+#endif
+ if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
+ sort_param->rec_buff, block_info.rec_len))
+ {
+ if (! searching)
+ _ma_check_print_info(param,"Found wrong record at %s",
+ llstr(sort_param->pos,llbuff));
+ continue;
+ }
+ if (!sort_param->fix_datafile)
+ {
+ sort_param->current_filepos= sort_param->pos;
+ if (sort_param->master)
+ share->state.split++;
+ }
+ sort_param->max_pos= (sort_param->pos=block_info.filepos+
+ block_info.rec_len);
+ info->packed_length=block_info.rec_len;
+
+ if (sort_param->calc_checksum)
+ {
+ info->cur_row.checksum= (*share->calc_check_checksum)(info,
+ sort_param->
+ record);
+ param->glob_crc+= info->cur_row.checksum;
+ }
+ DBUG_RETURN(0);
+ }
+ }
+ DBUG_RETURN(1); /* Impossible */
+}
+
+
+/**
+ @brief Write record to new file.
+
+ @fn _ma_sort_write_record()
+ @param sort_param Sort parameters.
+
+ @note
+ This is only called by a master thread if parallel repair is used.
+
+ @return
+ @retval 0 OK
+ sort_param->current_filepos points to inserted record for
+ block_records and to the place for the next record for
+ other row types.
+ sort_param->filepos points to end of file
+ @retval 1 Error
+*/
+
+int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
+{
+ int flag;
+ uint length;
+ ulong block_length,reclength;
+ uchar *from;
+ uchar block_buff[8];
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param= sort_info->param;
+ MARIA_HA *info= sort_info->new_info;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_sort_write_record");
+
+ if (sort_param->fix_datafile)
+ {
+ sort_param->current_filepos= sort_param->filepos;
+ switch (sort_info->new_data_file_type) {
+ case BLOCK_RECORD:
+ if ((sort_param->current_filepos=
+ (*share->write_record_init)(info, sort_param->record)) ==
+ HA_OFFSET_ERROR)
+ DBUG_RETURN(1);
+ /* Pointer to end of file */
+ sort_param->filepos= info->state->data_file_length;
+ break;
+ case STATIC_RECORD:
+ if (my_b_write(&info->rec_cache,sort_param->record,
+ share->base.pack_reclength))
+ {
+ _ma_check_print_error(param,"%d when writing to datafile",my_errno);
+ DBUG_RETURN(1);
+ }
+ sort_param->filepos+=share->base.pack_reclength;
+ share->state.split++;
+ break;
+ case DYNAMIC_RECORD:
+ if (! info->blobs)
+ from=sort_param->rec_buff;
+ else
+ {
+ /* must be sure that local buffer is big enough */
+ reclength=share->base.pack_reclength+
+ _ma_calc_total_blob_length(info,sort_param->record)+
+ ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER;
+ if (sort_info->buff_length < reclength)
+ {
+ if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
+ MYF(MY_FREE_ON_ERROR |
+ MY_ALLOW_ZERO_PTR))))
+ DBUG_RETURN(1);
+ sort_info->buff_length=reclength;
+ }
+ from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
+ }
+ /* We can use info->checksum here as only one thread calls this */
+ info->cur_row.checksum= (*share->calc_check_checksum)(info,
+ sort_param->
+ record);
+ reclength= _ma_rec_pack(info,from,sort_param->record);
+ flag=0;
+
+ do
+ {
+ block_length=reclength+ 3 + test(reclength >= (65520-3));
+ if (block_length < share->base.min_block_length)
+ block_length=share->base.min_block_length;
+ info->update|=HA_STATE_WRITE_AT_END;
+ block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
+ if (block_length > MARIA_MAX_BLOCK_LENGTH)
+ block_length=MARIA_MAX_BLOCK_LENGTH;
+ if (_ma_write_part_record(info,0L,block_length,
+ sort_param->filepos+block_length,
+ &from,&reclength,&flag))
+ {
+ _ma_check_print_error(param,"%d when writing to datafile",my_errno);
+ DBUG_RETURN(1);
+ }
+ sort_param->filepos+=block_length;
+ share->state.split++;
+ } while (reclength);
+ break;
+ case COMPRESSED_RECORD:
+ reclength=info->packed_length;
+ length= _ma_save_pack_length((uint) share->pack.version, block_buff,
+ reclength);
+ if (share->base.blobs)
+ length+= _ma_save_pack_length((uint) share->pack.version,
+ block_buff + length, info->blob_length);
+ if (my_b_write(&info->rec_cache,block_buff,length) ||
+ my_b_write(&info->rec_cache,(uchar*) sort_param->rec_buff,reclength))
+ {
+ _ma_check_print_error(param,"%d when writing to datafile",my_errno);
+ DBUG_RETURN(1);
+ }
+ sort_param->filepos+=reclength+length;
+ share->state.split++;
+ break;
+ }
+ }
+ if (sort_param->master)
+ {
+ info->state->records++;
+ if ((param->testflag & T_WRITE_LOOP) &&
+ (info->state->records % WRITE_COUNT) == 0)
+ {
+ char llbuff[22];
+ printf("%s\r", llstr(info->state->records,llbuff));
+ VOID(fflush(stdout));
+ }
+ }
+ DBUG_RETURN(0);
+} /* _ma_sort_write_record */
+
+
+/* Compare two keys from _ma_create_index_by_sort */
+
+static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
+ const void *b)
+{
+ uint not_used[2];
+ return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b),
+ USE_WHOLE_KEY, SEARCH_SAME, not_used));
+} /* sort_key_cmp */
+
+
+static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
+{
+ uint diff_pos[2];
+ char llbuff[22],llbuff2[22];
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param= sort_info->param;
+ int cmp;
+
+ if (sort_info->key_block->inited)
+ {
+ cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
+ (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
+ diff_pos);
+ if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
+ ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
+ (uchar*) a, USE_WHOLE_KEY,
+ SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
+ sort_param->notnull,
+ sort_info->key_block->lastkey,
+ a);
+ }
+ sort_param->unique[diff_pos[0]-1]++;
+ }
+ else
+ {
+ cmp= -1;
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
+ a);
+ }
+ if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
+ {
+ sort_info->dupp++;
+ sort_info->info->cur_row.lastpos= get_record_for_key(sort_info->info,
+ sort_param->keyinfo,
+ a);
+ _ma_check_print_warning(param,
+ "Duplicate key for record at %10s against record at %10s",
+ llstr(sort_info->info->cur_row.lastpos, llbuff),
+ llstr(get_record_for_key(sort_info->info,
+ sort_param->keyinfo,
+ sort_info->key_block->
+ lastkey),
+ llbuff2));
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ if (sort_info->param->testflag & T_VERBOSE)
+ _ma_print_key(stdout,sort_param->seg, a, USE_WHOLE_KEY);
+ return (sort_delete_record(sort_param));
+ }
+#ifndef DBUG_OFF
+ if (cmp > 0)
+ {
+ _ma_check_print_error(param,
+ "Internal error: Keys are not in order from sort");
+ return(1);
+ }
+#endif
+ return (sort_insert_key(sort_param, sort_info->key_block,
+ a, HA_OFFSET_ERROR));
+} /* sort_key_write */
+
+
+int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
+{
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ SORT_KEY_BLOCKS *key_block=sort_info->key_block;
+ MARIA_SHARE *share=sort_info->info->s;
+ uint val_off, val_len;
+ int error;
+ SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
+ uchar *from, *to;
+
+ val_len=share->ft2_keyinfo.keylength;
+ get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
+ to= maria_ft_buf->lastkey+val_off;
+
+ if (maria_ft_buf->buf)
+ {
+ /* flushing first-level tree */
+ error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
+ HA_OFFSET_ERROR);
+ for (from=to+val_len;
+ !error && from < maria_ft_buf->buf;
+ from+= val_len)
+ {
+ memcpy(to, from, val_len);
+ error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
+ HA_OFFSET_ERROR);
+ }
+ return error;
+ }
+ /* flushing second-level tree keyblocks */
+ error=_ma_flush_pending_blocks(sort_param);
+ /* updating lastkey with second-level tree info */
+ ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
+ _ma_dpointer(sort_info->info, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
+ share->state.key_root[sort_param->key]);
+ /* restoring first level tree data in sort_info/sort_param */
+ sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
+ sort_param->keyinfo=share->keyinfo+sort_param->key;
+ share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
+ /* writing lastkey in first-level tree */
+ return error ? error :
+ sort_insert_key(sort_param,sort_info->key_block,
+ maria_ft_buf->lastkey,HA_OFFSET_ERROR);
+}
+
+
+static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
+ const uchar *a)
+{
+ uint a_len, val_off, val_len, error;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ SORT_FT_BUF *ft_buf= sort_info->ft_buf;
+ SORT_KEY_BLOCKS *key_block= sort_info->key_block;
+ MARIA_SHARE *share= sort_info->info->s;
+
+ val_len=HA_FT_WLEN+share->base.rec_reflength;
+ get_key_full_length_rdonly(a_len, (uchar *)a);
+
+ if (!ft_buf)
+ {
+ /*
+ use two-level tree only if key_reflength fits in rec_reflength place
+ and row format is NOT static - for _ma_dpointer not to garble offsets
+ */
+ if ((share->base.key_reflength <=
+ share->base.rec_reflength) &&
+ (share->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
+ ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
+ sizeof(SORT_FT_BUF), MYF(MY_WME));
+
+ if (!ft_buf)
+ {
+ sort_param->key_write=sort_key_write;
+ return sort_key_write(sort_param, a);
+ }
+ sort_info->ft_buf= ft_buf;
+ goto word_init_ft_buf; /* no need to duplicate the code */
+ }
+ get_key_full_length_rdonly(val_off, ft_buf->lastkey);
+
+ if (ha_compare_text(sort_param->seg->charset,
+ ((uchar *)a)+1,a_len-1,
+ (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0)
+ {
+ uchar *p;
+ if (!ft_buf->buf) /* store in second-level tree */
+ {
+ ft_buf->count++;
+ return sort_insert_key(sort_param,key_block,
+ a + a_len, HA_OFFSET_ERROR);
+ }
+
+ /* storing the key in the buffer. */
+ memcpy (ft_buf->buf, (char *)a+a_len, val_len);
+ ft_buf->buf+=val_len;
+ if (ft_buf->buf < ft_buf->end)
+ return 0;
+
+ /* converting to two-level tree */
+ p=ft_buf->lastkey+val_off;
+
+ while (key_block->inited)
+ key_block++;
+ sort_info->key_block=key_block;
+ sort_param->keyinfo= &share->ft2_keyinfo;
+ ft_buf->count=(ft_buf->buf - p)/val_len;
+
+ /* flushing buffer to second-level tree */
+ for (error=0; !error && p < ft_buf->buf; p+= val_len)
+ error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
+ ft_buf->buf=0;
+ return error;
+ }
+
+ /* flushing buffer */
+ if ((error=_ma_sort_ft_buf_flush(sort_param)))
+ return error;
+
+word_init_ft_buf:
+ a_len+=val_len;
+ memcpy(ft_buf->lastkey, a, a_len);
+ ft_buf->buf=ft_buf->lastkey+a_len;
+ /*
+ 32 is just a safety margin here
+ (at least max(val_len, sizeof(nod_flag)) should be there).
+ May be better performance could be achieved if we'd put
+ (sort_info->keyinfo->block_length-32)/XXX
+ instead.
+ TODO: benchmark the best value for XXX.
+ */
+ ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
+ return 0;
+} /* sort_maria_ft_key_write */
+
+
+ /* get pointer to record from a key */
+
+static my_off_t get_record_for_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ const uchar *key)
+{
+ return _ma_dpos(info,0, key + _ma_keylength(keyinfo, key));
+} /* get_record_for_key */
+
+
+ /* Insert a key in sort-key-blocks */
+
+static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
+ register SORT_KEY_BLOCKS *key_block,
+ const uchar *key,
+ my_off_t prev_block)
+{
+ uint a_length,t_length,nod_flag;
+ my_off_t filepos,key_file_length;
+ uchar *anc_buff,*lastkey;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ MARIA_HA *info= sort_info->info;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("sort_insert_key");
+
+ anc_buff= key_block->buff;
+ lastkey=key_block->lastkey;
+ nod_flag= (key_block == sort_info->key_block ? 0 :
+ share->base.key_reflength);
+
+ if (!key_block->inited)
+ {
+ key_block->inited=1;
+ if (key_block == sort_info->key_block_end)
+ {
+ _ma_check_print_error(param,"To many key-block-levels; Try increasing sort_key_blocks");
+ DBUG_RETURN(1);
+ }
+ a_length= share->keypage_header + nod_flag;
+ key_block->end_pos= anc_buff + share->keypage_header;
+ bzero(anc_buff, share->keypage_header);
+ _ma_store_keynr(share, anc_buff, (uint) (sort_param->keyinfo -
+ share->keyinfo));
+ lastkey=0; /* No previous key in block */
+ }
+ else
+ a_length= _ma_get_page_used(share, anc_buff);
+
+ /* Save pointer to previous block */
+ if (nod_flag)
+ {
+ _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
+ _ma_kpointer(info,key_block->end_pos,prev_block);
+ }
+
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
+ (uchar*) 0,lastkey,lastkey,key,
+ &s_temp);
+ (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
+ a_length+=t_length;
+ _ma_store_page_used(share, anc_buff, a_length);
+ key_block->end_pos+=t_length;
+ if (a_length <= keyinfo->block_length)
+ {
+ VOID(_ma_move_key(keyinfo, key_block->lastkey, key));
+ key_block->last_length=a_length-t_length;
+ DBUG_RETURN(0);
+ }
+
+ /* Fill block with end-zero and write filled block */
+ _ma_store_page_used(share, anc_buff, key_block->last_length);
+ bzero(anc_buff+key_block->last_length,
+ keyinfo->block_length- key_block->last_length);
+ key_file_length=info->state->key_file_length;
+ if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
+ DBUG_RETURN(1);
+
+ /* If we read the page from the key cache, we have to write it back to it */
+ if (page_link->changed)
+ {
+ pop_dynamic(&info->pinned_pages);
+ if (_ma_write_keypage(info, keyinfo, filepos,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ DFLT_INIT_HITS, anc_buff))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ put_crc(anc_buff, filepos, share);
+ if (my_pwrite(share->kfile.file, anc_buff,
+ (uint) keyinfo->block_length, filepos, param->myf_rw))
+ DBUG_RETURN(1);
+ }
+ DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
+
+ /* Write separator-key to block in next level */
+ if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
+ DBUG_RETURN(1);
+
+ /* clear old block and write new key in it */
+ key_block->inited=0;
+ DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
+} /* sort_insert_key */
+
+
+/* Delete record when we found a duplicated key */
+
+static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
+{
+ uint i;
+ int old_file,error;
+ uchar *key;
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
+ DBUG_ENTER("sort_delete_record");
+
+ if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
+ {
+ _ma_check_print_error(param,
+ "Quick-recover aborted; Run recovery without switch -q or with "
+ "switch -qq");
+ DBUG_RETURN(1);
+ }
+ if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
+ {
+ _ma_check_print_error(param,
+ "Recover aborted; Can't run standard recovery on "
+ "compressed tables with errors in data-file. "
+ "Use 'maria_chk --safe-recover' to fix it");
+ DBUG_RETURN(1);
+ }
+
+ old_file= row_info->dfile.file;
+ /* This only affects static and dynamic row formats */
+ row_info->dfile.file= row_info->rec_cache.file;
+ if (sort_info->current_key)
+ {
+ key= key_info->lastkey + key_info->s->base.max_key_length;
+ if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
+ key_info->cur_row.lastpos)) &&
+ error != HA_ERR_RECORD_DELETED)
+ {
+ _ma_check_print_error(param,"Can't read record to be removed");
+ row_info->dfile.file= old_file;
+ DBUG_RETURN(1);
+ }
+
+ for (i=0 ; i < sort_info->current_key ; i++)
+ {
+ uint key_length= _ma_make_key(key_info, i, key, sort_param->record,
+ key_info->cur_row.lastpos);
+ if (_ma_ck_delete(key_info, i, key, key_length))
+ {
+ _ma_check_print_error(param,
+ "Can't delete key %d from record to be removed",
+ i+1);
+ row_info->dfile.file= old_file;
+ DBUG_RETURN(1);
+ }
+ }
+ if (sort_param->calc_checksum)
+ param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
+ sort_param->record);
+ }
+ error= (flush_io_cache(&row_info->rec_cache) ||
+ (*row_info->s->delete_record)(row_info, sort_param->record));
+ row_info->dfile.file= old_file; /* restore actual value */
+ row_info->state->records--;
+ DBUG_RETURN(error);
+} /* sort_delete_record */
+
+
+/* Fix all pending blocks and flush everything to disk */
+
+int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
+{
+ uint nod_flag,length;
+ my_off_t filepos,key_file_length;
+ SORT_KEY_BLOCKS *key_block;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ myf myf_rw=sort_info->param->myf_rw;
+ MARIA_HA *info=sort_info->info;
+ MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ DBUG_ENTER("_ma_flush_pending_blocks");
+
+ filepos= HA_OFFSET_ERROR; /* if empty file */
+ nod_flag=0;
+ for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
+ {
+ key_block->inited=0;
+ length= _ma_get_page_used(info->s, key_block->buff);
+ if (nod_flag)
+ _ma_kpointer(info,key_block->end_pos,filepos);
+ key_file_length=info->state->key_file_length;
+ bzero(key_block->buff+length, keyinfo->block_length-length);
+ if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
+ HA_OFFSET_ERROR)
+ DBUG_RETURN(1);
+
+ /* If we read the page from the key cache, we have to write it back */
+ if (page_link->changed)
+ {
+ pop_dynamic(&info->pinned_pages);
+ if (_ma_write_keypage(info, keyinfo, filepos,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ DFLT_INIT_HITS, key_block->buff))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ put_crc(key_block->buff, filepos, info->s);
+ if (my_pwrite(info->s->kfile.file, key_block->buff,
+ (uint) keyinfo->block_length,filepos, myf_rw))
+ DBUG_RETURN(1);
+ }
+ DBUG_DUMP("buff",key_block->buff,length);
+ nod_flag=1;
+ }
+ info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
+ DBUG_RETURN(0);
+} /* _ma_flush_pending_blocks */
+
+ /* alloc space and pointers for key_blocks */
+
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
+ uint buffer_length)
+{
+ reg1 uint i;
+ SORT_KEY_BLOCKS *block;
+ DBUG_ENTER("alloc_key_blocks");
+
+ if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
+ buffer_length+IO_SIZE)*blocks,
+ MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
+ return(0);
+ }
+ for (i=0 ; i < blocks ; i++)
+ {
+ block[i].inited=0;
+ block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
+ }
+ DBUG_RETURN(block);
+} /* alloc_key_blocks */
+
+
+ /* Check if file is almost full */
+
+int maria_test_if_almost_full(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ return 0;
+ return my_seek(share->kfile.file, 0L, MY_SEEK_END,
+ MYF(MY_THREADSAFE))/10*9 >
+ (my_off_t) share->base.max_key_file_length ||
+ my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
+ (my_off_t) share->base.max_data_file_length;
+}
+
+ /* Recreate table with bigger more alloced record-data */
+
+int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
+{
+ int error;
+ MARIA_HA info;
+ MARIA_SHARE share;
+ MARIA_KEYDEF *keyinfo,*key,*key_end;
+ HA_KEYSEG *keysegs,*keyseg;
+ MARIA_COLUMNDEF *columndef,*column,*end;
+ MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
+ MARIA_STATUS_INFO status_info;
+ uint unpack,key_parts;
+ ha_rows max_records;
+ ulonglong file_length,tmp_length;
+ MARIA_CREATE_INFO create_info;
+ DBUG_ENTER("maria_recreate_table");
+
+ error=1; /* Default error */
+ info= **org_info;
+ status_info= (*org_info)->state[0];
+ info.state= &status_info;
+ share= *(*org_info)->s;
+ unpack= (share.options & HA_OPTION_COMPRESS_RECORD) &&
+ (param->testflag & T_UNPACK);
+ if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
+ share.base.keys)))
+ DBUG_RETURN(0);
+ memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
+ (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));
+
+ key_parts= share.base.all_key_parts;
+ if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
+ (key_parts+share.base.keys))))
+ {
+ my_afree((uchar*) keyinfo);
+ DBUG_RETURN(1);
+ }
+ if (!(columndef=(MARIA_COLUMNDEF*)
+ my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
+ {
+ my_afree((uchar*) keyinfo);
+ my_afree((uchar*) keysegs);
+ DBUG_RETURN(1);
+ }
+ if (!(uniquedef=(MARIA_UNIQUEDEF*)
+ my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
+ {
+ my_afree((uchar*) columndef);
+ my_afree((uchar*) keyinfo);
+ my_afree((uchar*) keysegs);
+ DBUG_RETURN(1);
+ }
+
+ /* Copy the column definitions */
+ memcpy((uchar*) columndef,(uchar*) share.columndef,
+ (size_t) (sizeof(MARIA_COLUMNDEF)*(share.base.fields+1)));
+ for (column=columndef, end= columndef+share.base.fields;
+ column != end ;
+ column++)
+ {
+ if (unpack && !(share.options & HA_OPTION_PACK_RECORD) &&
+ column->type != FIELD_BLOB &&
+ column->type != FIELD_VARCHAR &&
+ column->type != FIELD_CHECK)
+ column->type=(int) FIELD_NORMAL;
+ }
+
+ /* Change the new key to point at the saved key segments */
+ memcpy((uchar*) keysegs,(uchar*) share.keyparts,
+ (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
+ share.state.header.uniques)));
+ keyseg=keysegs;
+ for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
+ {
+ key->seg=keyseg;
+ for (; keyseg->type ; keyseg++)
+ {
+ if (param->language)
+ keyseg->language=param->language; /* change language */
+ }
+ keyseg++; /* Skip end pointer */
+ }
+
+ /*
+ Copy the unique definitions and change them to point at the new key
+ segments
+ */
+ memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
+ (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
+ for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
+ u_ptr != u_end ; u_ptr++)
+ {
+ u_ptr->seg=keyseg;
+ keyseg+=u_ptr->keysegs+1;
+ }
+ if (share.options & HA_OPTION_COMPRESS_RECORD)
+ share.base.records=max_records=info.state->records;
+ else if (share.base.min_pack_length)
+ max_records=(ha_rows) (my_seek(info.dfile.file, 0L, MY_SEEK_END,
+ MYF(0)) /
+ (ulong) share.base.min_pack_length);
+ else
+ max_records=0;
+ unpack= (share.data_file_type == COMPRESSED_RECORD) &&
+ (param->testflag & T_UNPACK);
+ share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;
+
+ file_length=(ulonglong) my_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
+ tmp_length= file_length+file_length/10;
+ set_if_bigger(file_length,param->max_data_file_length);
+ set_if_bigger(file_length,tmp_length);
+ set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);
+
+ VOID(maria_close(*org_info));
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=max(max_records,share.base.records);
+ create_info.reloc_rows=share.base.reloc;
+ create_info.old_options=(share.options |
+ (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
+
+ create_info.data_file_length=file_length;
+ create_info.auto_increment=share.state.auto_increment;
+ create_info.language = (param->language ? param->language :
+ share.state.header.language);
+ create_info.key_file_length= status_info.key_file_length;
+ create_info.org_data_file_type= ((enum data_file_type)
+ share.state.header.org_data_file_type);
+
+ /*
+ Allow for creating an auto_increment key. This has an effect only if
+ an auto_increment key exists in the original table.
+ */
+ create_info.with_auto_increment= TRUE;
+ create_info.null_bytes= share.base.null_bytes;
+ /*
+ We don't have to handle symlinks here because we are using
+ HA_DONT_TOUCH_DATA
+ */
+ if (maria_create(filename, share.data_file_type,
+ share.base.keys - share.state.header.uniques,
+ keyinfo, share.base.fields, columndef,
+ share.state.header.uniques, uniquedef,
+ &create_info,
+ HA_DONT_TOUCH_DATA))
+ {
+ _ma_check_print_error(param,
+ "Got error %d when trying to recreate indexfile",
+ my_errno);
+ goto end;
+ }
+ *org_info=maria_open(filename,O_RDWR,
+ (param->testflag & T_WAIT_FOREVER) ? HA_OPEN_WAIT_IF_LOCKED :
+ (param->testflag & T_DESCRIPT) ? HA_OPEN_IGNORE_IF_LOCKED :
+ HA_OPEN_ABORT_IF_LOCKED);
+ if (!*org_info)
+ {
+ _ma_check_print_error(param,
+ "Got error %d when trying to open re-created indexfile",
+ my_errno);
+ goto end;
+ }
+ /* We are modifing */
+ (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
+ VOID(_ma_readinfo(*org_info,F_WRLCK,0));
+ (*org_info)->state->records=info.state->records;
+ if (share.state.create_time)
+ (*org_info)->s->state.create_time=share.state.create_time;
+ (*org_info)->s->state.unique=(*org_info)->this_unique=
+ share.state.unique;
+ (*org_info)->state->checksum=info.state->checksum;
+ (*org_info)->state->del=info.state->del;
+ (*org_info)->s->state.dellink=share.state.dellink;
+ (*org_info)->state->empty=info.state->empty;
+ (*org_info)->state->data_file_length=info.state->data_file_length;
+ if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
+ UPDATE_OPEN_COUNT))
+ goto end;
+ error=0;
+end:
+ my_afree((uchar*) uniquedef);
+ my_afree((uchar*) keyinfo);
+ my_afree((uchar*) columndef);
+ my_afree((uchar*) keysegs);
+ DBUG_RETURN(error);
+}
+
+
+ /* write suffix to data file if neaded */
+
+int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
+{
+ MARIA_HA *info=sort_info->new_info;
+
+ if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
+ {
+ uchar buff[MEMMAP_EXTRA_MARGIN];
+ bzero(buff,sizeof(buff));
+ if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
+ {
+ _ma_check_print_error(sort_info->param,
+ "%d when writing to datafile",my_errno);
+ return 1;
+ }
+ sort_info->param->read_cache.end_of_file+=sizeof(buff);
+ }
+ return 0;
+}
+
+
+/* Update state and maria_chk time of indexfile */
+
+int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
+{
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_update_state_info");
+
+ if (update & UPDATE_OPEN_COUNT)
+ {
+ share->state.open_count=0;
+ share->global_changed=0;
+ }
+ if (update & UPDATE_STAT)
+ {
+ uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
+ share->state.records_at_analyze= info->state->records;
+ share->state.changed&= ~STATE_NOT_ANALYZED;
+ if (info->state->records)
+ {
+ for (i=0; i<key_parts; i++)
+ {
+ if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
+ share->state.changed|= STATE_NOT_ANALYZED;
+ }
+ }
+ }
+ if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
+ {
+ if (update & UPDATE_TIME)
+ {
+ share->state.check_time= (long) time((time_t*) 0);
+ if (!share->state.create_time)
+ share->state.create_time=share->state.check_time;
+ }
+ /*
+ When tables are locked we haven't synched the share state and the
+ real state for a while so we better do it here before synching
+ the share state to disk. Only when table is write locked is it
+ necessary to perform this synch.
+ */
+ if (info->lock_type == F_WRLCK)
+ share->state.state= *info->state;
+ if (_ma_state_info_write(share, 1|2))
+ goto err;
+ share->changed=0;
+ }
+ { /* Force update of status */
+ int error;
+ uint r_locks=share->r_locks,w_locks=share->w_locks;
+ share->r_locks= share->w_locks= share->tot_locks= 0;
+ error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
+ share->r_locks=r_locks;
+ share->w_locks=w_locks;
+ share->tot_locks=r_locks+w_locks;
+ if (!error)
+ DBUG_RETURN(0);
+ }
+err:
+ _ma_check_print_error(param,"%d when updating keyfile",my_errno);
+ DBUG_RETURN(1);
+}
+
+/*
+ Update auto increment value for a table
+ When setting the 'repair_only' flag we only want to change the
+ old auto_increment value if its wrong (smaller than some given key).
+ The reason is that we shouldn't change the auto_increment value
+ for a table without good reason when only doing a repair; If the
+ user have inserted and deleted rows, the auto_increment value
+ may be bigger than the biggest current row and this is ok.
+
+ If repair_only is not set, we will update the flag to the value in
+ param->auto_increment is bigger than the biggest key.
+*/
+
+void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
+ my_bool repair_only)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *record;
+ DBUG_ENTER("update_auto_increment_key");
+
+ if (!share->base.auto_key ||
+ ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
+ {
+ if (!(param->testflag & T_VERY_SILENT))
+ _ma_check_print_info(param,
+ "Table: %s doesn't have an auto increment key\n",
+ param->isam_file_name);
+ DBUG_VOID_RETURN;
+ }
+ if (!(param->testflag & T_SILENT) &&
+ !(param->testflag & T_REP))
+ printf("Updating MARIA file: %s\n", param->isam_file_name);
+ /*
+ We have to use an allocated buffer instead of info->rec_buff as
+ _ma_put_key_in_record() may use info->rec_buff
+ */
+ if (!(record= (uchar*) my_malloc((uint) share->base.pack_reclength,
+ MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for extra record");
+ DBUG_VOID_RETURN;
+ }
+
+ maria_extra(info,HA_EXTRA_KEYREAD,0);
+ if (maria_rlast(info, record, share->base.auto_key-1))
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
+ my_free((char*) record, MYF(0));
+ _ma_check_print_error(param,"%d when reading last record",my_errno);
+ DBUG_VOID_RETURN;
+ }
+ if (!repair_only)
+ share->state.auto_increment=param->auto_increment_value;
+ }
+ else
+ {
+ const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
+ ulonglong auto_increment=
+ ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
+ set_if_bigger(share->state.auto_increment,auto_increment);
+ if (!repair_only)
+ set_if_bigger(share->state.auto_increment, param->auto_increment_value);
+ }
+ maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
+ my_free((char*) record, MYF(0));
+ maria_update_state_info(param, info, UPDATE_AUTO_INC);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Update statistics for each part of an index
+
+ SYNOPSIS
+ maria_update_key_parts()
+ keyinfo IN Index information (only key->keysegs used)
+ rec_per_key_part OUT Store statistics here
+ unique IN Array of (#distinct tuples)
+ notnull_tuples IN Array of (#tuples), or NULL
+ records Number of records in the table
+
+ DESCRIPTION
+ This function is called produce index statistics values from unique and
+ notnull_tuples arrays after these arrays were produced with sequential
+ index scan (the scan is done in two places: chk_index() and
+ sort_key_write()).
+
+ This function handles all 3 index statistics collection methods.
+
+ Unique is an array:
+ unique[0]= (#different values of {keypart1}) - 1
+ unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
+ ...
+
+ For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
+ notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
+ notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
+ keypart{i} are not NULL)
+ ...
+ For all other statistics collection methods notnull_tuples==NULL.
+
+ Output is an array:
+ rec_per_key_part[k] =
+ = E(#records in the table such that keypart_1=c_1 AND ... AND
+ keypart_k=c_k for arbitrary constants c_1 ... c_k)
+
+ = {assuming that values have uniform distribution and index contains all
+ tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
+ index tuples}
+
+ = #tuples-in-the-index / #distinct-tuples-in-the-index.
+
+ The #tuples-in-the-index and #distinct-tuples-in-the-index have different
+ meaning depending on which statistics collection method is used:
+
+ MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
+ NULLS_EQUAL NULL == NULL all tuples in table
+ NULLS_NOT_EQUAL NULL != NULL all tuples in table
+ IGNORE_NULLS n/a tuples that don't have NULLs
+*/
+
+void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
+ ulonglong *unique, ulonglong *notnull,
+ ulonglong records)
+{
+ ulonglong count=0, unique_tuples;
+ ulonglong tuples= records;
+ uint parts;
+ double tmp;
+ for (parts=0 ; parts < keyinfo->keysegs ; parts++)
+ {
+ count+=unique[parts];
+ unique_tuples= count + 1;
+ if (notnull)
+ {
+ tuples= notnull[parts];
+ /*
+ #(unique_tuples not counting tuples with NULLs) =
+ #(unique_tuples counting tuples with NULLs as different) -
+ #(tuples with NULLs)
+ */
+ unique_tuples -= (records - notnull[parts]);
+ }
+
+ if (unique_tuples == 0)
+ tmp= 1;
+ else if (count == 0)
+ tmp= ulonglong2double(tuples); /* 1 unique tuple */
+ else
+ tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
+
+ /*
+ for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
+ let's ensure it is not
+ */
+ set_if_bigger(tmp,1);
+
+ *rec_per_key_part++= tmp;
+ }
+}
+
+
+static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
+{
+ ha_checksum crc;
+ const uchar *end=buf+length;
+ for (crc=0; buf != end; buf++)
+ crc=((crc << 1) + *((uchar*) buf)) +
+ test(crc & (((ha_checksum) 1) << (8*sizeof(ha_checksum)-1)));
+ return crc;
+}
+
+static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
+{
+ uint key_maxlength=key->maxlength;
+ if (key->flag & HA_FULLTEXT)
+ {
+ uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
+ key->seg->charset->mbmaxlen;
+ key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
+ }
+ return (key->flag & HA_SPATIAL) ||
+ (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
+ ((ulonglong) rows * key_maxlength >
+ (ulonglong) maria_max_temp_length));
+}
+
+/*
+ Deactivate all not unique index that can be recreated fast
+ These include packed keys on which sorting will use more temporary
+ space than the max allowed file length or for which the unpacked keys
+ will take much more space than packed keys.
+ Note that 'rows' may be zero for the case when we don't know how many
+ rows we will put into the file.
+ */
+
+void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *key=share->keyinfo;
+ uint i;
+
+ DBUG_ASSERT(info->state->records == 0 &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) &&
+ ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1)
+ {
+ maria_clear_key_active(share->state.key_map, i);
+ info->update|= HA_STATE_CHANGED;
+ }
+ }
+}
+
+
+/*
+ Return TRUE if we can use repair by sorting
+ One can set the force argument to force to use sorting
+ even if the temporary file would be quite big!
+*/
+
+my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
+ ulonglong key_map, my_bool force)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *key=share->keyinfo;
+ uint i;
+
+ /*
+ maria_repair_by_sort only works if we have at least one key. If we don't
+ have any keys, we should use the normal repair.
+ */
+ if (! maria_is_any_key_active(key_map))
+ return FALSE; /* Can't use sort */
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!force && maria_too_big_key_for_sort(key,rows))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+/**
+ @brief Create a new handle for manipulation the new record file
+
+ @note
+ It's ok for Recovery to have two MARIA_SHARE on the same index file
+ because the one we create here is not transactional
+*/
+
+my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
+{
+
+ MARIA_SORT_INFO *sort_info= param->sort_info;
+ MARIA_HA *info= sort_info->info;
+ MARIA_HA *new_info;
+ DBUG_ENTER("create_new_data_handle");
+
+ if (!(sort_info->new_info= maria_open(info->s->open_file_name, O_RDWR,
+ HA_OPEN_COPY | HA_OPEN_FOR_REPAIR)))
+ DBUG_RETURN(1);
+
+ new_info= sort_info->new_info;
+ pagecache_file_init(new_info->s->bitmap.file, &maria_page_crc_check_bitmap,
+ (new_info->s->options & HA_OPTION_PAGE_CHECKSUM ?
+ &maria_page_crc_set_normal :
+ &maria_page_filler_set_bitmap),
+ &maria_page_write_failure, new_info->s);
+ pagecache_file_init(new_info->dfile, &maria_page_crc_check_data,
+ (new_info->s->options & HA_OPTION_PAGE_CHECKSUM ?
+ &maria_page_crc_set_normal :
+ &maria_page_filler_set_normal),
+ &maria_page_write_failure, new_info->s);
+ change_data_file_descriptor(new_info, new_file);
+ maria_lock_database(new_info, F_EXTRA_LCK);
+ if ((sort_info->param->testflag & T_UNPACK) &&
+ info->s->data_file_type == COMPRESSED_RECORD)
+ {
+ (*new_info->s->once_end)(new_info->s);
+ (*new_info->s->end)(new_info);
+ restore_data_file_type(new_info->s);
+ _ma_setup_functions(new_info->s);
+ if ((*new_info->s->once_init)(new_info->s, new_file) ||
+ (*new_info->s->init)(new_info))
+ DBUG_RETURN(1);
+ }
+ _ma_reset_status(new_info);
+ if (_ma_initialize_data_file(new_info->s, new_file))
+ DBUG_RETURN(1);
+
+ param->filepos= new_info->state->data_file_length;
+
+ /* Use new virtual functions for key generation */
+ info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
+ info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
+ DBUG_RETURN(0);
+}
+
+
+static void
+set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
+{
+ if ((sort_info->new_data_file_type=share->data_file_type) ==
+ COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
+ {
+ MARIA_SHARE tmp;
+ sort_info->new_data_file_type= share->state.header.org_data_file_type;
+ /* Set delete_function for sort_delete_record() */
+ tmp= *share;
+ tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
+ tmp.options= ~HA_OPTION_COMPRESS_RECORD;
+ _ma_setup_functions(&tmp);
+ share->delete_record=tmp.delete_record;
+ }
+}
+
+static void restore_data_file_type(MARIA_SHARE *share)
+{
+ MARIA_SHARE tmp_share;
+ share->options&= ~HA_OPTION_COMPRESS_RECORD;
+ mi_int2store(share->state.header.options,share->options);
+ share->state.header.data_file_type=
+ share->state.header.org_data_file_type;
+ share->data_file_type= share->state.header.data_file_type;
+ share->pack.header_length= 0;
+
+ /* Use new virtual functions for key generation */
+ tmp_share= *share;
+ _ma_setup_functions(&tmp_share);
+ share->keypos_to_recpos= tmp_share.keypos_to_recpos;
+ share->recpos_to_keypos= tmp_share.recpos_to_keypos;
+}
+
+
+static void change_data_file_descriptor(MARIA_HA *info, File new_file)
+{
+ my_close(info->dfile.file, MYF(MY_WME));
+ info->dfile.file= info->s->bitmap.file.file= new_file;
+ _ma_bitmap_reset_cache(info->s);
+}
+
+
+/**
+ @brief Mark the data file to not be used
+
+ @note
+ This is used in repair when we want to ensure the handler will not
+ write anything to the data file anymore
+*/
+
+static void unuse_data_file_descriptor(MARIA_HA *info)
+{
+ info->dfile.file= info->s->bitmap.file.file= -1;
+ _ma_bitmap_reset_cache(info->s);
+}
+
+
+/*
+ Copy all states that has to do with the data file
+
+ NOTES
+ This is done to copy the state from the data file generated from
+ repair to the original handler
+*/
+
+static void copy_data_file_state(MARIA_STATE_INFO *to,
+ MARIA_STATE_INFO *from)
+{
+ to->state.records= from->state.records;
+ to->state.del= from->state.del;
+ to->state.empty= from->state.empty;
+ to->state.data_file_length= from->state.data_file_length;
+ to->split= from->split;
+ to->dellink= from->dellink;
+ to->first_bitmap_with_space= from->first_bitmap_with_space;
+}
+
+
+/*
+ Read 'safely' next record while scanning table.
+
+ SYNOPSIS
+ _ma_safe_scan_block_record()
+ info Maria handler
+ record Store found here
+
+ NOTES
+ - One must have called mi_scan() before this
+
+ Differences compared to _ma_scan_block_records() are:
+ - We read all blocks, not only blocks marked by the bitmap to be safe
+ - In case of errors, next read will read next record.
+ - More sanity checks
+
+ RETURN
+ 0 ok
+ HA_ERR_END_OF_FILE End of file
+ # error number
+*/
+
+
+static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
+ MARIA_HA *info, uchar *record)
+{
+ MARIA_SHARE *share= info->s;
+ uint record_pos= info->cur_row.nextpos;
+ ulonglong page= sort_info->page;
+ DBUG_ENTER("_ma_safe_scan_block_record");
+
+ for (;;)
+ {
+ /* Find next row in current page */
+ if (likely(record_pos < info->scan.number_of_rows))
+ {
+ uint length, offset;
+ uchar *data, *end_of_data;
+ char llbuff[22];
+
+ while (!(offset= uint2korr(info->scan.dir)))
+ {
+ info->scan.dir-= DIR_ENTRY_SIZE;
+ record_pos++;
+ if (info->scan.dir < info->scan.dir_end)
+ {
+ _ma_check_print_info(sort_info->param,
+ "Wrong directory on page: %s",
+ llstr(page, llbuff));
+ goto read_next_page;
+ }
+ }
+ /* found row */
+ info->cur_row.lastpos= info->scan.row_base_page + record_pos;
+ info->cur_row.nextpos= record_pos + 1;
+ data= info->scan.page_buff + offset;
+ length= uint2korr(info->scan.dir + 2);
+ end_of_data= data + length;
+ info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */
+
+ if (end_of_data > info->scan.dir_end ||
+ offset < PAGE_HEADER_SIZE || length < share->base.min_block_length)
+ {
+ _ma_check_print_info(sort_info->param,
+ "Wrong directory entry %3u at page %s",
+ record_pos, llstr(page, llbuff));
+ record_pos++;
+ continue;
+ }
+ else
+ {
+ DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
+ DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
+ }
+ }
+
+read_next_page:
+ /* Read until we find next head page */
+ for (;;)
+ {
+ uint page_type;
+ char llbuff[22];
+
+ sort_info->page++; /* In case of errors */
+ page++;
+ if (!(page % share->bitmap.pages_covered))
+ page++; /* Skip bitmap */
+ if ((page + 1) * share->block_size > sort_info->filelength)
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ if (!(pagecache_read(share->pagecache,
+ &info->dfile,
+ page, 0, info->scan.page_buff,
+ PAGECACHE_READ_UNKNOWN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
+ {
+ if (my_errno == HA_ERR_WRONG_CRC)
+ {
+ _ma_check_print_info(sort_info->param,
+ "Wrong CRC on page at %s",
+ llstr(page * share->block_size, llbuff));
+ continue;
+ }
+ DBUG_RETURN(my_errno);
+ }
+ page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
+ PAGE_TYPE_MASK);
+ if (page_type == HEAD_PAGE)
+ {
+ if ((info->scan.number_of_rows=
+ (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
+ break;
+ _ma_check_print_info(sort_info->param,
+ "Wrong head page at %s",
+ llstr(page * share->block_size, llbuff));
+ }
+ else if (page_type >= MAX_PAGE_TYPE)
+ {
+ _ma_check_print_info(sort_info->param,
+ "Found wrong page type: %d at %s",
+ page_type, llstr(page * share->block_size,
+ llbuff));
+ }
+ }
+
+ /* New head page */
+ info->scan.dir= (info->scan.page_buff + share->block_size -
+ PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
+ info->scan.dir_end= (info->scan.dir -
+ (info->scan.number_of_rows - 1) *
+ DIR_ENTRY_SIZE);
+ info->scan.row_base_page= ma_recordpos(page, 0);
+ record_pos= 0;
+ }
+}
+
+
+/**
+ @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
+ and is_of_horizon
+
+ REPAIR/OPTIMIZE have replaced the data/index file with a new file
+ and so, in this scenario:
+ @verbatim
+ CHECKPOINT - REDO_INSERT - COMMIT - ... - REPAIR - ... - crash
+ @endverbatim
+ we do not want Recovery to apply the REDO_INSERT to the table, as it would
+ then possibly wrongly extend the table. By updating create_rename_lsn at
+ the end of REPAIR, we know that REDO_INSERT will be skipped.
+
+ @param param description of the REPAIR operation
+ @param info table
+
+ @return Operation status
+ @retval 0 ok
+ @retval 1 error (disk problem)
+*/
+
+static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+ /* in case this is maria_chk or recovery... */
+ if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
+ share->base.born_transactional)
+ {
+ my_bool save_now_transactional= share->now_transactional;
+
+ /*
+ For now this record is only informative. It could serve when applying
+ logs to a backup, but that needs more thought. Assume table became
+ corrupted. It is repaired, then some writes happen to it.
+ Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
+ record. For it to give the same result as originally, the table should
+ be corrupted the same way, so applying previous REDOs should produce the
+ same corruption; that's really not guaranteed (different execution paths
+ in execution of REDOs vs runtime code so not same bugs hit, temporary
+ hardware issues not repeatable etc). Corruption may not be repeatable.
+ A reasonable solution is to execute the REDO_REPAIR_TABLE record and
+ check if the checksum of the resulting table matches what it was at the
+ end of the original repair (should be stored in log record); or execute
+ the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
+ was it was at the start of the original repair (should be stored in log
+ record).
+ */
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ uchar log_data[FILEID_STORE_SIZE + 4 + 8];
+ LSN lsn;
+
+ /*
+ testflag gives an idea of what REPAIR did (in particular T_QUICK
+ or not: did it touch the data file or not?).
+ */
+ int4store(log_data + FILEID_STORE_SIZE, param->testflag);
+ /* org_key_map is used when recreating index after a load data infile */
+ int8store(log_data + FILEID_STORE_SIZE + 4, param->org_key_map);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+
+ share->now_transactional= 1;
+ /**
+ @todo RECOVERY maria_chk --transaction-log may come here; to be sure
+ that ha_maria is not using the log too, we should do a my_lock() on the
+ control file when Maria starts.
+ */
+ if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
+ &dummy_transaction_object, info,
+ sizeof(log_data),
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, log_data, NULL) ||
+ translog_flush(lsn)))
+ return 1;
+ /*
+ The table's existence was made durable earlier (MY_SYNC_DIR passed to
+ maria_change_to_newfile()). _ma_flush_table_files_after_repair() was
+ called earlier, flushed and forced data+index+state. Old REDOs should
+ not be applied to the table:
+ */
+ if (_ma_update_create_rename_lsn(share, lsn, TRUE))
+ return 1;
+ share->now_transactional= save_now_transactional;
+ }
+ return 0;
+}
+
+
+/* Give error message why reading of key page failed */
+
+static void report_keypage_fault(HA_CHECK *param, my_off_t position)
+{
+ char buff[11];
+
+ if (my_errno == HA_ERR_CRASHED)
+ _ma_check_print_error(param,
+ "Wrong base information on indexpage at filepos: %s",
+ llstr(position, buff));
+ else
+ _ma_check_print_error(param,
+ "Can't read indexpage from filepos: %s, "
+ "error: %d",
+ llstr(position,buff), my_errno);
+}
diff --git a/storage/maria/ma_check_standalone.h b/storage/maria/ma_check_standalone.h
new file mode 100644
index 00000000000..3874d722d6c
--- /dev/null
+++ b/storage/maria/ma_check_standalone.h
@@ -0,0 +1,106 @@
+/* Copyright (C) 2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ All standalone programs which need to use functions from ma_check.c
+ (like maria_repair()) must define their version of _ma_killed_ptr()
+ and _ma_check_print_info|warning|error(). Indeed, linking with ma_check.o
+ brings in the dependencies of ma_check.o which are definitions of the above
+ functions; if the program does not define them then the ones of
+ ha_maria.o are used i.e. ha_maria.o is linked into the program, and this
+ brings dependencies of ha_maria.o on mysqld.o into the program's linking
+ which thus fails, as the program is not linked with mysqld.o.
+ This file contains the versions of these functions used by maria_chk and
+ maria_read_log.
+*/
+
+/*
+ Check if check/repair operation was killed by a signal
+*/
+
+static int not_killed= 0;
+
+volatile int *_ma_killed_ptr(HA_CHECK *param __attribute__((unused)))
+{
+ return &not_killed; /* always NULL */
+}
+
+ /* print warnings and errors */
+ /* VARARGS */
+
+void _ma_check_print_info(HA_CHECK *param __attribute__((unused)),
+ const char *fmt,...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_info");
+ DBUG_PRINT("enter", ("format: %s", fmt));
+
+ va_start(args,fmt);
+ VOID(vfprintf(stdout, fmt, args));
+ VOID(fputc('\n',stdout));
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+/* VARARGS */
+
+void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_warning");
+ DBUG_PRINT("enter", ("format: %s", fmt));
+
+ fflush(stdout);
+ if (!param->warning_printed && !param->error_printed)
+ {
+ if (param->testflag & T_SILENT)
+ fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,
+ param->isam_file_name);
+ param->out_flag|= O_DATA_LOST;
+ }
+ param->warning_printed=1;
+ va_start(args,fmt);
+ fprintf(stderr,"%s: warning: ",my_progname_short);
+ VOID(vfprintf(stderr, fmt, args));
+ VOID(fputc('\n',stderr));
+ fflush(stderr);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+/* VARARGS */
+
+void _ma_check_print_error(HA_CHECK *param, const char *fmt,...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_error");
+ DBUG_PRINT("enter", ("format: %s", fmt));
+
+ fflush(stdout);
+ if (!param->warning_printed && !param->error_printed)
+ {
+ if (param->testflag & T_SILENT)
+ fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name);
+ param->out_flag|= O_DATA_LOST;
+ }
+ param->error_printed|=1;
+ va_start(args,fmt);
+ fprintf(stderr,"%s: error: ",my_progname_short);
+ VOID(vfprintf(stderr, fmt, args));
+ VOID(fputc('\n',stderr));
+ fflush(stderr);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
new file mode 100644
index 00000000000..becaf45b9a2
--- /dev/null
+++ b/storage/maria/ma_checkpoint.c
@@ -0,0 +1,1196 @@
+/* Copyright (C) 2006,2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3071 Maria checkpoint
+ First version written by Guilhem Bichot on 2006-04-27.
+*/
+
+/* Here is the implementation of this module */
+
+/** @todo RECOVERY BUG this is unreviewed code */
+/*
+ Summary:
+ checkpoints are done either by a background thread (checkpoint every Nth
+ second) or by a client.
+ In ha_maria, it's not made available to clients, and will soon be done by a
+ background thread (periodically taking checkpoints and flushing dirty
+ pages).
+*/
+
+#include "maria_def.h"
+#include "ma_pagecache.h"
+#include "ma_blockrec.h"
+#include "ma_checkpoint.h"
+#include "ma_loghandler_lsn.h"
+
+
+/** @brief type of checkpoint currently running */
+static CHECKPOINT_LEVEL checkpoint_in_progress= CHECKPOINT_NONE;
+/** @brief protects checkpoint_in_progress */
+static pthread_mutex_t LOCK_checkpoint;
+/** @brief for killing the background checkpoint thread */
+static pthread_cond_t COND_checkpoint;
+/** @brief if checkpoint module was inited or not */
+static my_bool checkpoint_inited= FALSE;
+/** @brief 'kill' flag for the background checkpoint thread */
+static int checkpoint_thread_die;
+/* is ulong like pagecache->blocks_changed */
+static ulong pages_to_flush_before_next_checkpoint;
+static PAGECACHE_FILE *dfiles, /**< data files to flush in background */
+ *dfiles_end; /**< list of data files ends here */
+static PAGECACHE_FILE *kfiles, /**< index files to flush in background */
+ *kfiles_end; /**< list of index files ends here */
+/* those two statistics below could serve in SHOW GLOBAL STATUS */
+static uint checkpoints_total= 0, /**< all checkpoint requests made */
+ checkpoints_ok_total= 0; /**< all checkpoints which succeeded */
+
+struct st_filter_param
+{
+ LSN up_to_lsn; /**< only pages with rec_lsn < this LSN */
+ uint max_pages; /**< stop after flushing this number pages */
+}; /**< information to determine which dirty pages should be flushed */
+
+static enum pagecache_flush_filter_result
+filter_flush_file_medium(enum pagecache_page_type type,
+ pgcache_page_no_t page,
+ LSN rec_lsn, void *arg);
+static enum pagecache_flush_filter_result
+filter_flush_file_full(enum pagecache_page_type type,
+ pgcache_page_no_t page,
+ LSN rec_lsn, void *arg);
+static enum pagecache_flush_filter_result
+filter_flush_file_evenly(enum pagecache_page_type type,
+ pgcache_page_no_t pageno,
+ LSN rec_lsn, void *arg);
+static int really_execute_checkpoint(void);
+pthread_handler_t ma_checkpoint_background(void *arg);
+static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon);
+
+/**
+ @brief Does a checkpoint
+
+ @param level what level of checkpoint to do
+ @param no_wait if another checkpoint of same or stronger level
+ is already running, consider our job done
+
+ @note In ha_maria, there can never be two threads trying a checkpoint at
+ the same time.
+
+ @return Operation status
+ @retval 0 ok
+ @retval !=0 error
+*/
+
+int ma_checkpoint_execute(CHECKPOINT_LEVEL level, my_bool no_wait)
+{
+ int result= 0;
+ DBUG_ENTER("ma_checkpoint_execute");
+
+ if (!checkpoint_inited)
+ {
+ /*
+ If ha_maria failed to start, maria_panic_hton is called, we come here.
+ */
+ DBUG_RETURN(0);
+ }
+ DBUG_ASSERT(level > CHECKPOINT_NONE);
+
+ /* look for already running checkpoints */
+ pthread_mutex_lock(&LOCK_checkpoint);
+ while (checkpoint_in_progress != CHECKPOINT_NONE)
+ {
+ if (no_wait && (checkpoint_in_progress >= level))
+ {
+ /*
+ If we are the checkpoint background thread, we don't wait (it's
+ smarter to flush pages instead of waiting here while the other thread
+ finishes its checkpoint).
+ */
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ goto end;
+ }
+ pthread_cond_wait(&COND_checkpoint, &LOCK_checkpoint);
+ }
+
+ checkpoint_in_progress= level;
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ /* from then on, we are sure to be and stay the only checkpointer */
+
+ result= really_execute_checkpoint();
+ pthread_cond_broadcast(&COND_checkpoint);
+end:
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief Does a checkpoint, really; expects no other checkpoints
+ running.
+
+ Checkpoint level requested is read from checkpoint_in_progress.
+
+ @return Operation status
+ @retval 0 ok
+ @retval !=0 error
+*/
+
+static int really_execute_checkpoint(void)
+{
+ uint i, error= 0;
+ /** @brief checkpoint_start_log_horizon will be stored there */
+ char *ptr;
+ LEX_STRING record_pieces[4]; /**< only malloc-ed pieces */
+ LSN min_page_rec_lsn, min_trn_rec_lsn, min_first_undo_lsn;
+ TRANSLOG_ADDRESS checkpoint_start_log_horizon;
+ char checkpoint_start_log_horizon_char[LSN_STORE_SIZE];
+ DBUG_ENTER("really_execute_checkpoint");
+ bzero(&record_pieces, sizeof(record_pieces));
+
+ /*
+ STEP 1: record current end-of-log position using log's lock. It is
+ critical for the correctness of Checkpoint (related to memory visibility
+ rules, the log's lock is a mutex).
+ "Horizon" is a lower bound of the LSN of the next log record.
+ */
+ checkpoint_start_log_horizon= translog_get_horizon();
+ DBUG_PRINT("info",("checkpoint_start_log_horizon (%lu,0x%lx)",
+ LSN_IN_PARTS(checkpoint_start_log_horizon)));
+ lsn_store(checkpoint_start_log_horizon_char, checkpoint_start_log_horizon);
+
+ /*
+ STEP 2: fetch information about transactions.
+ We must fetch transactions before dirty pages. Indeed, a transaction
+ first sets its rec_lsn then sets the page's rec_lsn then sets its rec_lsn
+ to 0. If we fetched pages first, we may see no dirty page yet, then we
+ fetch transactions but the transaction has already reset its rec_lsn to 0
+ so we miss rec_lsn again.
+ For a similar reason (over-allocated bitmap pages) we have to fetch
+ transactions before flushing bitmap pages.
+
+ min_trn_rec_lsn will serve to lower the starting point of the REDO phase
+ (down from checkpoint_start_log_horizon).
+ */
+ if (unlikely(trnman_collect_transactions(&record_pieces[0],
+ &record_pieces[1],
+ &min_trn_rec_lsn,
+ &min_first_undo_lsn)))
+ goto err;
+
+
+ /* STEP 3: fetch information about table files */
+ if (unlikely(collect_tables(&record_pieces[2],
+ checkpoint_start_log_horizon)))
+ goto err;
+
+
+ /* STEP 4: fetch information about dirty pages */
+ /*
+ It's better to do it _after_ having flushed some data pages (which
+ collect_tables() may have done), because those are now non-dirty and so we
+ have a more up-to-date dirty pages list to put into the checkpoint record,
+ and thus we will have less work at Recovery.
+ */
+ /* Using default pagecache for now */
+ if (unlikely(pagecache_collect_changed_blocks_with_lsn(maria_pagecache,
+ &record_pieces[3],
+ &min_page_rec_lsn)))
+ goto err;
+
+
+ /* LAST STEP: now write the checkpoint log record */
+ {
+ LSN lsn;
+ uint total_rec_length;
+ /*
+ the log handler is allowed to modify "str" and "length" (but not "*str")
+ of its argument, so we must not pass it record_pieces directly,
+ otherwise we would later not know what memory pieces to my_free().
+ */
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 5];
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str=
+ checkpoint_start_log_horizon_char;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= total_rec_length=
+ sizeof(checkpoint_start_log_horizon_char);
+ for (i= 0; i < (sizeof(record_pieces)/sizeof(record_pieces[0])); i++)
+ {
+ log_array[TRANSLOG_INTERNAL_PARTS + 1 + i]= record_pieces[i];
+ total_rec_length+= record_pieces[i].length;
+ }
+ if (unlikely(translog_write_record(&lsn, LOGREC_CHECKPOINT,
+ &dummy_transaction_object, NULL,
+ total_rec_length,
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, NULL, NULL) ||
+ translog_flush(lsn)))
+ goto err;
+ translog_lock();
+ /*
+ This cannot be done as a inwrite_rec_hook of LOGREC_CHECKPOINT, because
+ such hook would be called before translog_flush (and we must be sure
+ that log was flushed before we write to the control file).
+ */
+ if (unlikely(ma_control_file_write_and_force(lsn, FILENO_IMPOSSIBLE,
+ CONTROL_FILE_UPDATE_ONLY_LSN)))
+ {
+ translog_unlock();
+ goto err;
+ }
+ translog_unlock();
+ }
+
+ /*
+ Note that we should not alter memory structures until we have successfully
+ written the checkpoint record and control file.
+ */
+ /* checkpoint succeeded */
+ ptr= record_pieces[3].str;
+ pages_to_flush_before_next_checkpoint= uint4korr(ptr);
+ DBUG_PRINT("checkpoint",("%u pages to flush before next checkpoint",
+ (uint)pages_to_flush_before_next_checkpoint));
+
+ /* compute log's low-water mark */
+ {
+ TRANSLOG_ADDRESS log_low_water_mark= min_page_rec_lsn;
+ set_if_smaller(log_low_water_mark, min_trn_rec_lsn);
+ set_if_smaller(log_low_water_mark, min_first_undo_lsn);
+ set_if_smaller(log_low_water_mark, checkpoint_start_log_horizon);
+ /**
+ Now purge unneeded logs.
+ As some systems have an unreliable fsync (drive lying), we could try to
+ be robust against that: remember a few previous checkpoints in the
+ control file, and not purge logs immediately... Think about it.
+ */
+ if (translog_purge(log_low_water_mark))
+ ma_message_no_user(0, "log purging failed");
+ }
+
+ goto end;
+
+err:
+ error= 1;
+ ma_message_no_user(0, "checkpoint failed");
+ /* we were possibly not able to determine what pages to flush */
+ pages_to_flush_before_next_checkpoint= 0;
+
+end:
+ for (i= 0; i < (sizeof(record_pieces)/sizeof(record_pieces[0])); i++)
+ my_free(record_pieces[i].str, MYF(MY_ALLOW_ZERO_PTR));
+ pthread_mutex_lock(&LOCK_checkpoint);
+ checkpoint_in_progress= CHECKPOINT_NONE;
+ checkpoints_total++;
+ checkpoints_ok_total+= !error;
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ DBUG_RETURN(error);
+}
+
+
+/**
+ @brief Initializes the checkpoint module
+
+ @param interval If one wants the module to create a
+ thread which will periodically do
+ checkpoints, and flush dirty pages, in the
+ background, it should specify a non-zero
+ interval in seconds. The thread will then be
+ created and will take checkpoints separated by
+ approximately 'interval' second.
+
+ @note A checkpoint is taken only if there has been some significant
+ activity since the previous checkpoint. Between checkpoint N and N+1 the
+ thread flushes all dirty pages which were already dirty at the time of
+ checkpoint N.
+
+ @return Operation status
+ @retval 0 ok
+ @retval !=0 error
+*/
+
+int ma_checkpoint_init(ulong interval)
+{
+ pthread_t th;
+ int res= 0;
+ DBUG_ENTER("ma_checkpoint_init");
+ checkpoint_inited= TRUE;
+ checkpoint_thread_die= 2; /* not yet born == dead */
+ if (pthread_mutex_init(&LOCK_checkpoint, MY_MUTEX_INIT_SLOW) ||
+ pthread_cond_init(&COND_checkpoint, 0))
+ res= 1;
+ else if (interval > 0)
+ {
+ compile_time_assert(sizeof(void *) >= sizeof(ulong));
+ if (!(res= pthread_create(&th, NULL, ma_checkpoint_background,
+ (void *)interval)))
+ checkpoint_thread_die= 0; /* thread lives, will have to be killed */
+ }
+ DBUG_RETURN(res);
+}
+
+
+#ifndef DBUG_OFF
+/**
+ Function used to test recovery: flush some table pieces and then caller
+ crashes.
+
+ @param what_to_flush 0: current bitmap and all data pages
+ 1: state
+ 2: all bitmap pages
+*/
+static void flush_all_tables(int what_to_flush)
+{
+ int res= 0;
+ LIST *pos; /**< to iterate over open tables */
+ pthread_mutex_lock(&THR_LOCK_maria);
+ for (pos= maria_open_list; pos; pos= pos->next)
+ {
+ MARIA_HA *info= (MARIA_HA*)pos->data;
+ if (info->s->now_transactional)
+ {
+ switch (what_to_flush)
+ {
+ case 0:
+ res= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_KEEP, FLUSH_KEEP);
+ break;
+ case 1:
+ res= _ma_state_info_write(info->s, 1|4);
+ DBUG_PRINT("maria_flush_states",
+ ("is_of_horizon: LSN (%lu,0x%lx)",
+ LSN_IN_PARTS(info->s->state.is_of_horizon)));
+ break;
+ case 2:
+ res= _ma_bitmap_flush_all(info->s);
+ break;
+ }
+ }
+ DBUG_ASSERT(res == 0);
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+}
+#endif
+
+
+/**
+ @brief Destroys the checkpoint module
+*/
+
+void ma_checkpoint_end(void)
+{
+ DBUG_ENTER("ma_checkpoint_end");
+ DBUG_EXECUTE_IF("maria_flush_bitmap",
+ {
+ DBUG_PRINT("maria_flush_bitmap", ("now"));
+ flush_all_tables(2);
+ });
+ DBUG_EXECUTE_IF("maria_flush_whole_page_cache",
+ {
+ DBUG_PRINT("maria_flush_whole_page_cache", ("now"));
+ flush_all_tables(0);
+ });
+ DBUG_EXECUTE_IF("maria_flush_whole_log",
+ {
+ DBUG_PRINT("maria_flush_whole_log", ("now"));
+ translog_flush(translog_get_horizon());
+ });
+ /*
+ Note that for WAL reasons, maria_flush_states requires
+ maria_flush_whole_log.
+ */
+ DBUG_EXECUTE_IF("maria_flush_states",
+ {
+ DBUG_PRINT("maria_flush_states", ("now"));
+ flush_all_tables(1);
+ });
+ DBUG_EXECUTE_IF("maria_crash",
+ {
+ DBUG_PRINT("maria_crash", ("now"));
+ fflush(DBUG_FILE);
+ abort();
+ });
+
+ if (checkpoint_inited)
+ {
+ pthread_mutex_lock(&LOCK_checkpoint);
+ if (checkpoint_thread_die != 2) /* thread was started ok */
+ {
+ DBUG_PRINT("info",("killing Maria background checkpoint thread"));
+ checkpoint_thread_die= 1; /* kill it */
+ do /* and wait for it to be dead */
+ {
+ /* wake it up if it was in a sleep */
+ pthread_cond_broadcast(&COND_checkpoint);
+ DBUG_PRINT("info",("waiting for Maria background checkpoint thread"
+ " to die"));
+ pthread_cond_wait(&COND_checkpoint, &LOCK_checkpoint);
+ }
+ while (checkpoint_thread_die != 2);
+ }
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ my_free((uchar *)dfiles, MYF(MY_ALLOW_ZERO_PTR));
+ my_free((uchar *)kfiles, MYF(MY_ALLOW_ZERO_PTR));
+ dfiles= kfiles= NULL;
+ pthread_mutex_destroy(&LOCK_checkpoint);
+ pthread_cond_destroy(&COND_checkpoint);
+ checkpoint_inited= FALSE;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief dirty-page filtering criteria for MEDIUM checkpoint.
+
+ We flush data/index pages which have been dirty since the previous
+ checkpoint (this is the two-checkpoint rule: the REDO phase will not have
+ to start from earlier than the next-to-last checkpoint).
+ Bitmap pages are handled by _ma_bitmap_flush_all().
+
+ @param type Page's type
+ @param pageno Page's number
+ @param rec_lsn Page's rec_lsn
+ @param arg filter_param
+*/
+
+static enum pagecache_flush_filter_result
+filter_flush_file_medium(enum pagecache_page_type type,
+ pgcache_page_no_t pageno __attribute__ ((unused)),
+ LSN rec_lsn, void *arg)
+{
+ struct st_filter_param *param= (struct st_filter_param *)arg;
+ return (type == PAGECACHE_LSN_PAGE) &&
+ (cmp_translog_addr(rec_lsn, param->up_to_lsn) <= 0);
+}
+
+
+/**
+ @brief dirty-page filtering criteria for FULL checkpoint.
+
+ We flush all dirty data/index pages.
+ Bitmap pages are handled by _ma_bitmap_flush_all().
+
+ @param type Page's type
+ @param pageno Page's number
+ @param rec_lsn Page's rec_lsn
+ @param arg filter_param
+*/
+
+static enum pagecache_flush_filter_result
+filter_flush_file_full(enum pagecache_page_type type,
+ pgcache_page_no_t pageno __attribute__ ((unused)),
+ LSN rec_lsn __attribute__ ((unused)),
+ void *arg __attribute__ ((unused)))
+{
+ return (type == PAGECACHE_LSN_PAGE);
+}
+
+
+/**
+ @brief dirty-page filtering criteria for background flushing thread.
+
+ We flush data/index pages which have been dirty since the previous
+ checkpoint (this is the two-checkpoint rule: the REDO phase will not have
+ to start from earlier than the next-to-last checkpoint), and no
+ bitmap pages. But we flush no more than a certain number of pages (to have
+ an even flushing, no write burst).
+ The reason to not flush bitmap pages is that they may not be in a flushable
+ state at this moment and we don't want to wait for them.
+
+ @param type Page's type
+ @param pageno Page's number
+ @param rec_lsn Page's rec_lsn
+ @param arg filter_param
+*/
+
+static enum pagecache_flush_filter_result
+filter_flush_file_evenly(enum pagecache_page_type type,
+ pgcache_page_no_t pageno __attribute__ ((unused)),
+ LSN rec_lsn, void *arg)
+{
+ struct st_filter_param *param= (struct st_filter_param *)arg;
+ if (unlikely(param->max_pages == 0)) /* all flushed already */
+ return FLUSH_FILTER_SKIP_ALL;
+ if ((type == PAGECACHE_LSN_PAGE) &&
+ (cmp_translog_addr(rec_lsn, param->up_to_lsn) <= 0))
+ {
+ param->max_pages--;
+ return FLUSH_FILTER_OK;
+ }
+ return FLUSH_FILTER_SKIP_TRY_NEXT;
+}
+
+
+/**
+ @brief Background thread which does checkpoints and flushes periodically.
+
+ Takes a checkpoint. After this, all pages dirty at the time of that
+ checkpoint are flushed evenly until it is time to take another checkpoint.
+ This ensures that the REDO phase starts at earliest (in LSN time) at the
+ next-to-last checkpoint record ("two-checkpoint rule").
+
+ @note MikaelR questioned why the same thread does two different jobs, the
+ risk could be that while a checkpoint happens no LRD flushing happens.
+*/
+
+pthread_handler_t ma_checkpoint_background(void *arg)
+{
+ /** @brief At least this of log/page bytes written between checkpoints */
+ const uint checkpoint_min_activity= 2*1024*1024;
+ /*
+ If the interval could be changed by the user while we are in this thread,
+ it could be annoying: for example it could cause "case 2" to be executed
+ right after "case 0", thus having 'dfile' unset. So the thread cares only
+ about the interval's value when it started.
+ */
+ const ulong interval= (ulong)arg;
+ uint sleeps, sleep_time;
+ TRANSLOG_ADDRESS log_horizon_at_last_checkpoint=
+ translog_get_horizon();
+ ulonglong pagecache_flushes_at_last_checkpoint=
+ maria_pagecache->global_cache_write;
+ uint pages_bunch_size;
+ struct st_filter_param filter_param;
+ PAGECACHE_FILE *dfile; /**< data file currently being flushed */
+ PAGECACHE_FILE *kfile; /**< index file currently being flushed */
+ LINT_INIT(kfile);
+ LINT_INIT(dfile);
+ LINT_INIT(pages_bunch_size);
+
+ my_thread_init();
+ DBUG_PRINT("info",("Maria background checkpoint thread starts"));
+ DBUG_ASSERT(interval > 0);
+
+ /*
+ Recovery ended with all tables closed and a checkpoint: no need to take
+ one immediately.
+ */
+ sleeps= 1;
+ pages_to_flush_before_next_checkpoint= 0;
+
+ for(;;) /* iterations of checkpoints and dirty page flushing */
+ {
+#if 0 /* good for testing, to do a lot of checkpoints, finds a lot of bugs */
+ sleeps=0;
+#endif
+ struct timespec abstime;
+ switch (sleeps % interval)
+ {
+ case 0:
+ /*
+ With background flushing evenly distributed over the time
+ between two checkpoints, we should have only little flushing to do
+ in the checkpoint.
+ */
+ /*
+ No checkpoint if little work of interest for recovery was done
+ since last checkpoint. Such work includes log writing (lengthens
+ recovery, checkpoint would shorten it), page flushing (checkpoint
+ would decrease the amount of read pages in recovery).
+ In case of one short statement per minute (very low load), we don't
+ want to checkpoint every minute, hence the positive
+ checkpoint_min_activity.
+ */
+ if (((translog_get_horizon() - log_horizon_at_last_checkpoint) +
+ (maria_pagecache->global_cache_write -
+ pagecache_flushes_at_last_checkpoint) *
+ maria_pagecache->block_size) < checkpoint_min_activity)
+ {
+ /* don't take checkpoint, so don't know what to flush */
+ pages_to_flush_before_next_checkpoint= 0;
+ sleep_time= interval;
+ break;
+ }
+ sleep_time= 1;
+ ma_checkpoint_execute(CHECKPOINT_MEDIUM, TRUE);
+ /*
+ Snapshot this kind of "state" of the engine. Note that the value below
+ is possibly greater than last_checkpoint_lsn.
+ */
+ log_horizon_at_last_checkpoint= translog_get_horizon();
+ pagecache_flushes_at_last_checkpoint=
+ maria_pagecache->global_cache_write;
+ /*
+ If the checkpoint above succeeded it has set d|kfiles and
+ d|kfiles_end. If is has failed, it has set
+ pages_to_flush_before_next_checkpoint to 0 so we will skip flushing
+ and sleep until the next checkpoint.
+ */
+ break;
+ case 1:
+ /* set up parameters for background page flushing */
+ filter_param.up_to_lsn= last_checkpoint_lsn;
+ pages_bunch_size= pages_to_flush_before_next_checkpoint / interval;
+ dfile= dfiles;
+ kfile= kfiles;
+ /* fall through */
+ default:
+ if (pages_bunch_size > 0)
+ {
+ DBUG_PRINT("checkpoint",
+ ("Maria background checkpoint thread: %u pages",
+ pages_bunch_size));
+ /* flush a bunch of dirty pages */
+ filter_param.max_pages= pages_bunch_size;
+ while (dfile != dfiles_end)
+ {
+ /*
+ We use FLUSH_KEEP_LAZY: if a file is already in flush, it's
+ smarter to move to the next file than wait for this one to be
+ completely flushed, which may take long.
+ */
+ int res=
+ flush_pagecache_blocks_with_filter(maria_pagecache,
+ dfile, FLUSH_KEEP_LAZY,
+ filter_flush_file_evenly,
+ &filter_param);
+ if (unlikely(res & PCFLUSH_ERROR))
+ ma_message_no_user(0, "background data page flush failed");
+ if (filter_param.max_pages == 0) /* bunch all flushed, sleep */
+ break; /* and we will continue with the same file */
+ dfile++; /* otherwise all this file is flushed, move to next file */
+ /*
+ MikaelR noted that he observed that Linux's file cache may never
+ fsync to disk until this cache is full, at which point it decides
+ to empty the cache, making the machine very slow. A solution was
+ to fsync after writing 2 MB. So we might want to fsync() here if
+ we wrote enough pages.
+ */
+ }
+ while (kfile != kfiles_end)
+ {
+ int res=
+ flush_pagecache_blocks_with_filter(maria_pagecache,
+ kfile, FLUSH_KEEP_LAZY,
+ filter_flush_file_evenly,
+ &filter_param);
+ if (unlikely(res & PCFLUSH_ERROR))
+ ma_message_no_user(0, "background index page flush failed");
+ if (filter_param.max_pages == 0) /* bunch all flushed, sleep */
+ break; /* and we will continue with the same file */
+ kfile++; /* otherwise all this file is flushed, move to next file */
+ }
+ sleep_time= 1;
+ }
+ else
+ {
+ /* Can directly sleep until the next checkpoint moment */
+ sleep_time= interval - (sleeps % interval);
+ }
+ }
+ pthread_mutex_lock(&LOCK_checkpoint);
+ if (checkpoint_thread_die == 1)
+ break;
+#if 0 /* good for testing, to do a lot of checkpoints, finds a lot of bugs */
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ my_sleep(100000); /* a tenth of a second */
+ pthread_mutex_lock(&LOCK_checkpoint);
+#else
+ /* To have a killable sleep, we use timedwait like our SQL GET_LOCK() */
+ DBUG_PRINT("info", ("sleeping %u seconds", sleep_time));
+ set_timespec(abstime, sleep_time);
+ pthread_cond_timedwait(&COND_checkpoint, &LOCK_checkpoint, &abstime);
+#endif
+ if (checkpoint_thread_die == 1)
+ break;
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ sleeps+= sleep_time;
+ }
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ DBUG_PRINT("info",("Maria background checkpoint thread ends"));
+ /*
+ That's the final one, which guarantees that a clean shutdown always ends
+ with a checkpoint.
+ */
+ ma_checkpoint_execute(CHECKPOINT_FULL, FALSE);
+ pthread_mutex_lock(&LOCK_checkpoint);
+ checkpoint_thread_die= 2; /* indicate that we are dead */
+ /* wake up ma_checkpoint_end() which may be waiting for our death */
+ pthread_cond_broadcast(&COND_checkpoint);
+ /* broadcast was inside unlock because ma_checkpoint_end() destroys mutex */
+ pthread_mutex_unlock(&LOCK_checkpoint);
+ my_thread_end();
+ return 0;
+}
+
+
+/**
+ @brief Allocates buffer and stores in it some info about open tables,
+ does some flushing on those.
+
+ Does the allocation because the caller cannot know the size itself.
+ Memory freeing is to be done by the caller (if the "str" member of the
+ LEX_STRING is not NULL).
+ The caller is taking a checkpoint.
+
+ @param[out] str pointer to where the allocated buffer,
+ and its size, will be put; buffer will be filled
+ with info about open tables
+ @param checkpoint_start_log_horizon Of the in-progress checkpoint
+ record.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
+{
+ MARIA_SHARE **distinct_shares= NULL;
+ char *ptr;
+ uint error= 1, sync_error= 0, nb, nb_stored, i;
+ my_bool unmark_tables= TRUE;
+ uint total_names_length;
+ LIST *pos; /**< to iterate over open tables */
+ struct st_state_copy {
+ uint index;
+ MARIA_STATE_INFO state;
+ };
+ struct st_state_copy *state_copies= NULL, /**< fixed-size cache of states */
+ *state_copies_end, /**< cache ends here */
+ *state_copy; /**< iterator in cache */
+ TRANSLOG_ADDRESS state_copies_horizon; /**< horizon of states' _copies_ */
+ struct st_filter_param filter_param;
+ PAGECACHE_FLUSH_FILTER filter;
+ DBUG_ENTER("collect_tables");
+
+ LINT_INIT(state_copies_horizon);
+ /* let's make a list of distinct shares */
+ pthread_mutex_lock(&THR_LOCK_maria);
+ for (nb= 0, pos= maria_open_list; pos; pos= pos->next)
+ {
+ MARIA_HA *info= (MARIA_HA*)pos->data;
+ MARIA_SHARE *share= info->s;
+ /* the first three variables below can never change */
+ if (share->base.born_transactional && !share->temporary &&
+ share->mode != O_RDONLY &&
+ !(share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP))
+ {
+ /*
+ Why we didn't take intern_lock above: table had in_checkpoint==0 so no
+ thread could set in_checkpoint. And no thread needs to know that we
+ are setting in_checkpoint, because only maria_close() needs it and
+ cannot run now as we hold THR_LOCK_maria.
+ */
+ /*
+ This table is relevant for checkpoint and not already seen. Mark it,
+ so that it is not seen again in the loop.
+ */
+ nb++;
+ DBUG_ASSERT(share->in_checkpoint == 0);
+ /* This flag ensures that we count only _distinct_ shares. */
+ share->in_checkpoint= MARIA_CHECKPOINT_SEEN_IN_LOOP;
+ }
+ }
+ if (unlikely((distinct_shares=
+ (MARIA_SHARE **)my_malloc(nb * sizeof(MARIA_SHARE *),
+ MYF(MY_WME))) == NULL))
+ goto err;
+ for (total_names_length= 0, i= 0, pos= maria_open_list; pos; pos= pos->next)
+ {
+ MARIA_HA *info= (MARIA_HA*)pos->data;
+ MARIA_SHARE *share= info->s;
+ if (share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP)
+ {
+ distinct_shares[i++]= share;
+ /*
+ With this we prevent the share from going away while we later flush
+ and force it without holding THR_LOCK_maria. For example if the share
+ could be my_free()d by maria_close() we would have a problem when we
+ access it to flush the table. We "pin" the share pointer.
+ And we also take down MARIA_CHECKPOINT_SEEN_IN_LOOP, so that it is
+ not seen again in the loop.
+ */
+ share->in_checkpoint= MARIA_CHECKPOINT_LOOKS_AT_ME;
+ /** @todo avoid strlen() */
+ total_names_length+= strlen(share->open_file_name);
+ }
+ }
+
+ DBUG_ASSERT(i == nb);
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ DBUG_PRINT("info",("found %u table shares", nb));
+
+ str->length=
+ 4 + /* number of tables */
+ (2 + /* short id */
+ 4 + /* kfile */
+ 4 + /* dfile */
+ LSN_STORE_SIZE + /* first_log_write_at_lsn */
+ 1 /* end-of-name 0 */
+ ) * nb + total_names_length;
+ if (unlikely((str->str= my_malloc(str->length, MYF(MY_WME))) == NULL))
+ goto err;
+
+ ptr= str->str;
+ ptr+= 4; /* real number of stored tables is not yet know */
+
+ /* only possible checkpointer, so can do the read below without mutex */
+ filter_param.up_to_lsn= last_checkpoint_lsn;
+ switch(checkpoint_in_progress)
+ {
+ case CHECKPOINT_MEDIUM:
+ filter= &filter_flush_file_medium;
+ break;
+ case CHECKPOINT_FULL:
+ filter= &filter_flush_file_full;
+ break;
+ case CHECKPOINT_INDIRECT:
+ filter= NULL;
+ break;
+ default:
+ DBUG_ASSERT(0);
+ goto err;
+ }
+
+ /*
+ The principle of reading/writing the state below is explained in
+ ma_recovery.c, look for "Recovery of the state".
+ */
+#define STATE_COPIES 1024
+ state_copies= (struct st_state_copy *)
+ my_malloc(STATE_COPIES * sizeof(struct st_state_copy), MYF(MY_WME));
+ dfiles= (PAGECACHE_FILE *)my_realloc((uchar *)dfiles,
+ /* avoid size of 0 for my_realloc */
+ max(1, nb) * sizeof(PAGECACHE_FILE),
+ MYF(MY_WME | MY_ALLOW_ZERO_PTR));
+ kfiles= (PAGECACHE_FILE *)my_realloc((uchar *)kfiles,
+ /* avoid size of 0 for my_realloc */
+ max(1, nb) * sizeof(PAGECACHE_FILE),
+ MYF(MY_WME | MY_ALLOW_ZERO_PTR));
+ if (unlikely((state_copies == NULL) ||
+ (dfiles == NULL) || (kfiles == NULL)))
+ goto err;
+ state_copy= state_copies_end= NULL;
+ dfiles_end= dfiles;
+ kfiles_end= kfiles;
+
+ for (nb_stored= 0, i= 0; i < nb; i++)
+ {
+ MARIA_SHARE *share= distinct_shares[i];
+ PAGECACHE_FILE kfile, dfile;
+ my_bool ignore_share;
+ if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME))
+ {
+ /* No need for a mutex to read the above, only us can write this flag */
+ continue;
+ }
+ /**
+ @todo We should not look at tables which didn't change since last
+ checkpoint.
+ */
+ DBUG_PRINT("info",("looking at table '%s'", share->open_file_name));
+ if (state_copy == state_copies_end) /* we have no more cached states */
+ {
+ /*
+ Collect and cache a bunch of states. We do this for many states at a
+ time, to not lock/unlock the log's lock too often.
+ */
+ uint j, bound= min(nb, i + STATE_COPIES);
+ state_copy= state_copies;
+ /* part of the state is protected by log's lock */
+ translog_lock();
+ state_copies_horizon= translog_get_horizon_no_lock();
+ for (j= i; j < bound; j++)
+ {
+ MARIA_SHARE *share2= distinct_shares[j];
+ if (!(share2->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME))
+ continue;
+ state_copy->index= j;
+ state_copy->state= share2->state; /* we copy the state */
+ state_copy++;
+ /*
+ data_file_length is not updated under log's lock by the bitmap
+ code, but writing a wrong data_file_length is ok: a next
+ maria_close() will correct it; if we crash before, Recovery will
+ set it to the true physical size.
+ */
+ }
+ translog_unlock();
+ /**
+ We are going to flush these states.
+ Before, all records describing how to undo such state must be
+ in the log (WAL). Usually this means UNDOs. In the special case of
+ data|key_file_length, recovery just needs to open the table to fix the
+ length, so any LOGREC_FILE_ID/REDO/UNDO allowing recovery to
+ understand it must open a table, is enough; so as long as
+ data|key_file_length is updated after writing any log record it's ok:
+ if we copied new value above, it means the record was before
+ state_copies_horizon and we flush such record below.
+ Apart from data|key_file_length which are easily recoverable from the
+ real file's size, all other state members must be updated only when
+ writing the UNDO; otherwise, if updated before, if their new value is
+ flushed by a checkpoint and there is a crash before UNDO is written,
+ their REDO group will be missing or at least incomplete and skipped
+ by recovery, so bad state value will stay. For example, setting
+ key_root before writing the UNDO: the table would have old index
+ pages (they were pinned at time of crash) and a new, thus wrong,
+ key_root.
+ @todo RECOVERY BUG check that all code honours that.
+ */
+ if (translog_flush(state_copies_horizon))
+ goto err;
+ /* now we have cached states and they are WAL-safe*/
+ state_copies_end= state_copy;
+ state_copy= state_copies;
+ }
+
+ /* locate our state among these cached ones */
+ for ( ; state_copy->index != i; state_copy++)
+ DBUG_ASSERT(state_copy < state_copies_end);
+
+ /* OS file descriptors are ints which we stored in 4 bytes */
+ compile_time_assert(sizeof(int) <= 4);
+ pthread_mutex_lock(&share->intern_lock);
+ /*
+ Tables in a normal state have their two file descriptors open.
+ In some rare cases like REPAIR, some descriptor may be closed or even
+ -1. If that happened, the _ma_state_info_write() may fail. This is
+ prevented by enclosing all all places which close/change kfile.file with
+ intern_lock.
+ */
+ kfile= share->kfile;
+ dfile= share->bitmap.file;
+ /*
+ Ignore table which has no logged writes (all its future log records will
+ be found naturally by Recovery). Ignore obsolete shares (_before_
+ setting themselves to last_version=0 they already did all flush and
+ sync; if we flush their state now we may be flushing an obsolete state
+ onto a newer one (assuming the table has been reopened with a different
+ share but of course same physical index file).
+ */
+ ignore_share= (share->id == 0) | (share->last_version == 0);
+ DBUG_PRINT("info", ("ignore_share: %d", ignore_share));
+ if (!ignore_share)
+ {
+ /** @todo avoid strlen */
+ uint open_file_name_len= strlen(share->open_file_name) + 1;
+ /* remember the descriptors for background flush */
+ *(dfiles_end++)= dfile;
+ *(kfiles_end++)= kfile;
+ /* we will store this table in the record */
+ nb_stored++;
+ int2store(ptr, share->id);
+ ptr+= 2;
+ /*
+ We must store the OS file descriptors, because the pagecache, which
+ tells us the list of dirty pages, refers to these pages by OS file
+ descriptors. An alternative is to make the page cache aware of the
+ 2-byte id and of the location of a page ("is it a data file page or an
+ index file page?").
+ If one descriptor is -1, normally there should be no dirty pages
+ collected for this file, it's ok to store -1, it will not be used.
+ */
+ int4store(ptr, kfile.file);
+ ptr+= 4;
+ int4store(ptr, dfile.file);
+ ptr+= 4;
+ lsn_store(ptr, share->lsn_of_file_id);
+ ptr+= LSN_STORE_SIZE;
+ /*
+ first_bitmap_with_space is not updated under log's lock, and is
+ important. We would need the bitmap's lock to get it right. Recovery
+ of this is not clear, so we just play safe: write it out as
+ unknown: if crash, _ma_bitmap_init() at next open (for example in
+ Recovery) will convert it to 0 and thus the first insertion will
+ search for free space from the file's first bitmap (0) -
+ under-optimal but safe.
+ If no crash, maria_close() will write the exact value.
+ */
+ state_copy->state.first_bitmap_with_space= ~(ulonglong)0;
+ memcpy(ptr, share->open_file_name, open_file_name_len);
+ ptr+= open_file_name_len;
+ if (cmp_translog_addr(share->state.is_of_horizon,
+ checkpoint_start_log_horizon) >= 0)
+ {
+ /*
+ State was flushed recently, it does not hold down the log's
+ low-water mark and will not give avoidable work to Recovery. So we
+ needn't flush it. Also, it is possible that while we copied the
+ state above (under log's lock, without intern_lock) it was being
+ modified in memory or flushed to disk (without log's lock, under
+ intern_lock, like in maria_extra()), so our copy may be incorrect
+ and we should not flush it.
+ It may also be a share which got last_version==0 since we checked
+ last_version; in this case, it flushed its state and the LSN test
+ above will catch it.
+ */
+ }
+ else
+ {
+ /*
+ We could do the state flush only if share->changed, but it's
+ tricky.
+ Consider a maria_write() which has written REDO,UNDO, and before it
+ calls _ma_writeinfo() (setting share->changed=1), checkpoint
+ happens and sees share->changed=0, does not flush state. It is
+ possible that Recovery does not start from before the REDO and thus
+ the state is not recovered. A solution may be to set
+ share->changed=1 under log mutex when writing log records.
+ But as anyway we have another problem below, this optimization would
+ be of little use.
+ */
+ /** @todo flush state only if changed since last checkpoint */
+ DBUG_ASSERT(share->last_version != 0);
+ state_copy->state.is_of_horizon= share->state.is_of_horizon=
+ state_copies_horizon;
+ if (kfile.file >= 0)
+ sync_error|=
+ _ma_state_info_write_sub(kfile.file, &state_copy->state, 1);
+ /*
+ We don't set share->changed=0 because it may interfere with a
+ concurrent _ma_writeinfo() doing share->changed=1 (cancel its
+ effect). The sad consequence is that we will flush the same state at
+ each checkpoint if the table was once written and then not anymore.
+ */
+ }
+ if (_ma_bitmap_flush_all(share))
+ {
+ sync_error= 1;
+ /** @todo all write failures should mark table corrupted */
+ ma_message_no_user(0, "checkpoint bitmap page flush failed");
+ }
+ DBUG_ASSERT(share->pagecache == maria_pagecache);
+ }
+ if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME)
+ {
+ /* maria_close() left us to free the share */
+ pthread_mutex_unlock(&share->intern_lock);
+ pthread_mutex_destroy(&share->intern_lock);
+ my_free((uchar *)share, MYF(0));
+ }
+ else
+ {
+ /* share goes back to normal state */
+ share->in_checkpoint= 0;
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+
+ /*
+ We do the big disk writes out of intern_lock to not block other
+ users of this table (intern_lock is taken at the start and end of
+ every statement). This means that file descriptors may be invalid
+ (files may have been closed for example by HA_EXTRA_PREPARE_FOR_*
+ under Windows, or REPAIR). This should not be a problem as we use
+ MY_IGNORE_BADFD. Descriptors may even point to other files but then
+ the old blocks (of before the close) must have been flushed for sure,
+ so our flush will flush new blocks (of after the latest open) and that
+ should do no harm.
+ */
+ /*
+ If CHECKPOINT_MEDIUM, this big flush below may result in a
+ serious write burst. Realize that all pages dirtied between the
+ last checkpoint and the one we are doing now, will be flushed at
+ next checkpoint, except those evicted by LRU eviction (depending on
+ the size of the page cache compared to the size of the working data
+ set, eviction may be rare or frequent).
+ We avoid that burst by anticipating: those pages are flushed
+ in bunches spanned regularly over the time interval between now and
+ the next checkpoint, by a background thread. Thus the next checkpoint
+ will have only little flushing to do (CHECKPOINT_MEDIUM should thus be
+ only a little slower than CHECKPOINT_INDIRECT).
+ */
+
+ /*
+ PageCacheFlushConcurrencyBugs
+ Inside the page cache, calls to flush_pagecache_blocks_int() on the same
+ file are serialized. Examples of concurrency bugs which happened when we
+ didn't have this serialization:
+ - maria_chk_size() (via CHECK TABLE) happens concurrently with
+ Checkpoint: Checkpoint is flushing a page: it pins the page and is
+ pre-empted, maria_chk_size() wants to flush this page too so gets an
+ error because Checkpoint pinned this page. Such error makes
+ maria_chk_size() mark the table as corrupted.
+ - maria_close() happens concurrently with Checkpoint:
+ Checkpoint is flushing a page: it registers a request on the page, is
+ pre-empted ; maria_close() flushes this page too with FLUSH_RELEASE:
+ FLUSH_RELEASE will cause a free_block() which assumes the page is in the
+ LRU, but it is not (as Checkpoint registered a request). Crash.
+ - one thread is evicting a page of the file out of the LRU: it marks it
+ iPC_BLOCK_IN_SWITCH and is pre-empted. Then two other threads do flushes
+ of the same file concurrently (like above). Then one flusher sees the
+ page is in switch, removes it from changed_blocks[] and puts it in its
+ first_in_switch, so the other flusher will not see the page at all and
+ return too early. If it's maria_close() which returns too early, then
+ maria_close() may close the file descriptor, and the other flusher, and
+ the evicter will fail to write their page: corruption.
+ */
+
+ if (!ignore_share)
+ {
+ if (filter != NULL)
+ {
+ if ((flush_pagecache_blocks_with_filter(maria_pagecache,
+ &dfile, FLUSH_KEEP_LAZY,
+ filter, &filter_param) &
+ PCFLUSH_ERROR))
+ ma_message_no_user(0, "checkpoint data page flush failed");
+ if ((flush_pagecache_blocks_with_filter(maria_pagecache,
+ &kfile, FLUSH_KEEP_LAZY,
+ filter, &filter_param) &
+ PCFLUSH_ERROR))
+ ma_message_no_user(0, "checkpoint index page flush failed");
+ }
+ /*
+ fsyncs the fd, that's the loooong operation (e.g. max 150 fsync
+ per second, so if you have touched 1000 files it's 7 seconds).
+ */
+ sync_error|=
+ my_sync(dfile.file, MYF(MY_WME | MY_IGNORE_BADFD)) |
+ my_sync(kfile.file, MYF(MY_WME | MY_IGNORE_BADFD));
+ /*
+ in case of error, we continue because writing other tables to disk is
+ still useful.
+ */
+ }
+ }
+
+ if (sync_error)
+ goto err;
+ /* We maybe over-estimated (due to share->id==0 or last_version==0) */
+ DBUG_ASSERT(str->length >= (uint)(ptr - str->str));
+ str->length= (uint)(ptr - str->str);
+ /*
+ As we support max 65k tables open at a time (2-byte short id), we
+ assume uint is enough for the cumulated length of table names; and
+ LEX_STRING::length is uint.
+ */
+ int4store(str->str, nb_stored);
+ error= unmark_tables= 0;
+
+err:
+ if (unlikely(unmark_tables))
+ {
+ /* maria_close() uses THR_LOCK_maria from start to end */
+ pthread_mutex_lock(&THR_LOCK_maria);
+ for (i= 0; i < nb; i++)
+ {
+ MARIA_SHARE *share= distinct_shares[i];
+ if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME)
+ {
+ /* maria_close() left us to free the share */
+ pthread_mutex_destroy(&share->intern_lock);
+ my_free((uchar *)share, MYF(0));
+ }
+ else
+ {
+ /* share goes back to normal state */
+ share->in_checkpoint= 0;
+ }
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ }
+ my_free((uchar *)distinct_shares, MYF(MY_ALLOW_ZERO_PTR));
+ my_free((uchar *)state_copies, MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(error);
+}
diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h
new file mode 100644
index 00000000000..69645c6bcda
--- /dev/null
+++ b/storage/maria/ma_checkpoint.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2006,2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3071 Maria checkpoint
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/* This is the interface of this module. */
+
+typedef enum enum_ma_checkpoint_level {
+ CHECKPOINT_NONE= 0,
+ /* just write dirty_pages, transactions table and sync files */
+ CHECKPOINT_INDIRECT,
+ /* also flush all dirty pages which were already dirty at prev checkpoint */
+ CHECKPOINT_MEDIUM,
+ /* also flush all dirty pages */
+ CHECKPOINT_FULL
+} CHECKPOINT_LEVEL;
+
+C_MODE_START
+int ma_checkpoint_init(ulong interval);
+void ma_checkpoint_end(void);
+int ma_checkpoint_execute(CHECKPOINT_LEVEL level, my_bool no_wait);
+C_MODE_END
+
+/**
+ @brief reads some LSNs with special trickery
+
+ If a 64-bit variable transitions between both halves being zero to both
+ halves being non-zero, and back, this function can be used to do a read of
+ it (without mutex, without atomic load) which always produces a correct
+ (though maybe slightly old) value (even on 32-bit CPUs). The value is at
+ least as new as the latest mutex unlock done by the calling thread.
+ The assumption is that the system sets both 4-byte halves either at the
+ same time, or one after the other (in any order), but NOT some bytes of the
+ first half then some bytes of the second half then the rest of bytes of the
+ first half. With this assumption, the function can detect when it is
+ seeing an inconsistent value.
+
+ @param LSN pointer to the LSN variable to read
+
+ @return LSN part (most significant byte always 0)
+*/
+#if ( SIZEOF_CHARP >= 8 )
+/* 64-bit CPU, 64-bit reads are atomic */
+#define lsn_read_non_atomic LSN_WITH_FLAGS_TO_LSN
+#else
+static inline LSN lsn_read_non_atomic_32(const volatile LSN *x)
+{
+ /*
+ 32-bit CPU, 64-bit reads may give a mixed of old half and new half (old
+ low bits and new high bits, or the contrary).
+ */
+ for (;;) /* loop until no atomicity problems */
+ {
+ /*
+ Remove most significant byte in case this is a LSN_WITH_FLAGS object.
+ Those flags in TRN::first_undo_lsn break the condition on transitions so
+ they must be removed below.
+ */
+ LSN y= LSN_WITH_FLAGS_TO_LSN(*x);
+ if (likely((y == LSN_IMPOSSIBLE) || LSN_VALID(y)))
+ return y;
+ }
+}
+#define lsn_read_non_atomic(x) lsn_read_non_atomic_32(&x)
+#endif
+
+/**
+ prints a message from a task not connected to any user (checkpoint
+ and recovery for example).
+
+ @param level 0 if error, ME_JUST_WARNING if warning,
+ ME_JUST_INFO if info
+ @param sentence text to write
+*/
+#define ma_message_no_user(level, sentence) \
+ my_printf_error(HA_ERR_GENERIC, "Maria engine: %s", MYF(level), sentence)
diff --git a/storage/maria/ma_checksum.c b/storage/maria/ma_checksum.c
new file mode 100644
index 00000000000..b48eae1c27d
--- /dev/null
+++ b/storage/maria/ma_checksum.c
@@ -0,0 +1,89 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Calculate a checksum for a row */
+
+#include "maria_def.h"
+
+/**
+ Calculate a checksum for the record
+
+ _ma_checksum()
+ @param info Maria handler
+ @param record Record
+
+ @note
+ To ensure that the checksum is independent of the row format
+ we need to always calculate the checksum in the original field order.
+
+ @return checksum
+*/
+
+ha_checksum _ma_checksum(MARIA_HA *info, const uchar *record)
+{
+ ha_checksum crc=0;
+ uint i,end;
+ MARIA_COLUMNDEF *base_column= info->s->columndef;
+ uint16 *column_nr= info->s->column_nr;
+
+ if (info->s->base.null_bytes)
+ crc= my_checksum(crc, record, info->s->base.null_bytes);
+
+ for (i= 0, end= info->s->base.fields ; i < end ; i++)
+ {
+ MARIA_COLUMNDEF *column= base_column + column_nr[i];
+ const uchar *pos;
+ ulong length;
+
+ if (record[column->null_pos] & column->null_bit)
+ continue; /* Null field */
+
+ pos= record + column->offset;
+ switch (column->type) {
+ case FIELD_BLOB:
+ {
+ uint blob_size_length= column->length- portable_sizeof_char_ptr;
+ length= _ma_calc_blob_length(blob_size_length, pos);
+ if (length)
+ {
+ memcpy((char*) &pos, pos + blob_size_length, sizeof(char*));
+ crc= my_checksum(crc, pos, length);
+ }
+ continue;
+ }
+ case FIELD_VARCHAR:
+ {
+ uint pack_length= column->fill_length;
+ if (pack_length == 1)
+ length= (ulong) *(uchar*) pos;
+ else
+ length= uint2korr(pos);
+ pos+= pack_length; /* Skip length information */
+ break;
+ }
+ default:
+ length= column->length;
+ break;
+ }
+ crc= my_checksum(crc, pos, length);
+ }
+ return crc;
+}
+
+
+ha_checksum _ma_static_checksum(MARIA_HA *info, const uchar *pos)
+{
+ return my_checksum(0, pos, info->s->base.reclength);
+}
diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c
new file mode 100644
index 00000000000..f058754c0ad
--- /dev/null
+++ b/storage/maria/ma_close.c
@@ -0,0 +1,160 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* close a isam-database */
+/*
+ TODO:
+ We need to have a separate mutex on the closed file to allow other threads
+ to open other files during the time we flush the cache and close this file
+*/
+
+#include "maria_def.h"
+
+int maria_close(register MARIA_HA *info)
+{
+ int error=0,flag;
+ my_bool share_can_be_freed= FALSE;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_close");
+ DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u",
+ (long) info, (uint) share->reopen,
+ (uint) share->tot_locks));
+
+ pthread_mutex_lock(&THR_LOCK_maria);
+ if (info->lock_type == F_EXTRA_LCK)
+ info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */
+
+ if (share->reopen == 1 && share->kfile.file >= 0)
+ _ma_decrement_open_count(info);
+
+ if (info->lock_type != F_UNLCK)
+ {
+ if (maria_lock_database(info,F_UNLCK))
+ error=my_errno;
+ }
+ pthread_mutex_lock(&share->intern_lock);
+
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ share->r_locks--;
+ share->tot_locks--;
+ }
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ if (end_io_cache(&info->rec_cache))
+ error=my_errno;
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ }
+ flag= !--share->reopen;
+ maria_open_list=list_delete(maria_open_list,&info->open_list);
+
+ my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ (*share->end)(info);
+
+ if (flag)
+ {
+ /* Last close of file; Flush everything */
+ if (share->kfile.file >= 0)
+ {
+ if ((*share->once_end)(share))
+ error= my_errno;
+ if (flush_pagecache_blocks(share->pagecache, &share->kfile,
+ (share->temporary ?
+ FLUSH_IGNORE_CHANGED :
+ FLUSH_RELEASE)))
+ error= my_errno;
+#ifdef HAVE_MMAP
+ if (share->file_map)
+ _ma_unmap_file(info);
+#endif
+ /*
+ If we are crashed, we can safely flush the current state as it will
+ not change the crashed state.
+ We can NOT write the state in other cases as other threads
+ may be using the file at this point
+ IF using --external-locking, which does not apply to Maria.
+ */
+ if (((share->changed && share->base.born_transactional) ||
+ maria_is_crashed(info)))
+ {
+ /*
+ State must be written to file as it was not done at table's
+ unlocking.
+ */
+ if (_ma_state_info_write(share, 1))
+ error= my_errno;
+ }
+ /*
+ File must be synced as it is going out of the maria_open_list and so
+ becoming unknown to future Checkpoints.
+ */
+ if (!share->temporary && my_sync(share->kfile.file, MYF(MY_WME)))
+ error= my_errno;
+ if (my_close(share->kfile.file, MYF(0)))
+ error= my_errno;
+ }
+#ifdef THREAD
+ thr_lock_delete(&share->lock);
+ {
+ int i,keys;
+ keys = share->state.header.keys;
+ VOID(rwlock_destroy(&share->mmap_lock));
+ for(i=0; i<keys; i++) {
+ VOID(rwlock_destroy(&share->key_root_lock[i]));
+ }
+ }
+#endif
+ DBUG_ASSERT(share->now_transactional == share->base.born_transactional);
+ if (share->in_checkpoint == MARIA_CHECKPOINT_LOOKS_AT_ME)
+ {
+ share->kfile.file= -1; /* because Checkpoint does not need to flush */
+ /* we cannot my_free() the share, Checkpoint would see a bad pointer */
+ share->in_checkpoint|= MARIA_CHECKPOINT_SHOULD_FREE_ME;
+ }
+ else
+ share_can_be_freed= TRUE;
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ pthread_mutex_unlock(&share->intern_lock);
+ if (share_can_be_freed)
+ {
+ VOID(pthread_mutex_destroy(&share->intern_lock));
+ my_free((uchar *)share, MYF(0));
+ }
+ if (info->ftparser_param)
+ {
+ my_free((uchar*)info->ftparser_param, MYF(0));
+ info->ftparser_param= 0;
+ }
+ if (info->dfile.file >= 0)
+ {
+ /*
+ This is outside of mutex so would confuse a concurrent
+ Checkpoint. Fortunately in BLOCK_RECORD we close earlier under mutex.
+ */
+ if (my_close(info->dfile.file, MYF(0)))
+ error= my_errno;
+ }
+
+ delete_dynamic(&info->pinned_pages);
+ my_free(info, MYF(0));
+
+ if (error)
+ {
+ DBUG_PRINT("error", ("Got error on close: %d", my_errno));
+ DBUG_RETURN(my_errno= error);
+ }
+ DBUG_RETURN(0);
+} /* maria_close */
diff --git a/storage/maria/ma_commit.c b/storage/maria/ma_commit.c
new file mode 100644
index 00000000000..763dfb88107
--- /dev/null
+++ b/storage/maria/ma_commit.c
@@ -0,0 +1,141 @@
+/* Copyright (C) 2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#include "trnman.h"
+
+/**
+ @brief writes a COMMIT record to log and commits transaction in memory
+
+ @param trn transaction
+
+ @return Operation status
+ @retval 0 ok
+ @retval 1 error (disk error or out of memory)
+*/
+
+int ma_commit(TRN *trn)
+{
+ int res;
+ LSN commit_lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS];
+ DBUG_ENTER("ma_commit");
+
+ if (trn->undo_lsn == 0) /* no work done, rollback (cheaper than commit) */
+ DBUG_RETURN(trnman_rollback_trn(trn));
+ /*
+ - if COMMIT record is written before trnman_commit_trn():
+ if Checkpoint comes in the middle it will see trn is not committed,
+ then if crash, Recovery might roll back trn (if min(rec_lsn) is after
+ COMMIT record) and this is not an issue as
+ * transaction's updates were not made visible to other transactions
+ * "commit ok" was not sent to client
+ Alternatively, Recovery might commit trn (if min(rec_lsn) is before COMMIT
+ record), which is ok too. All in all it means that "trn committed" is not
+ 100% equal to "COMMIT record written".
+ - if COMMIT record is written after trnman_commit_trn():
+ if crash happens between the two, trn will be rolled back which is an
+ issue (transaction's updates were made visible to other transactions).
+ So we need to go the first way.
+ */
+
+ /*
+ We do not store "thd->transaction.xid_state.xid" for now, it will be
+ needed only when we support XA.
+ */
+ res= (translog_write_record(&commit_lsn, LOGREC_COMMIT,
+ trn, NULL, 0,
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, NULL, NULL) ||
+ translog_flush(commit_lsn) ||
+ trnman_commit_trn(trn));
+ /*
+ Note: if trnman_commit_trn() fails above, we have already
+ written the COMMIT record, so Checkpoint and Recovery will see the
+ transaction as committed.
+ */
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Writes a COMMIT record for a transaciton associated with a file
+
+ @param info Maria handler
+
+ @return Operation status
+ @retval 0 ok
+ @retval # error (disk error or out of memory)
+*/
+
+int maria_commit(MARIA_HA *info)
+{
+ return info->s->now_transactional ? ma_commit(info->trn) : 0;
+}
+
+
+/**
+ @brief Starts a transaction on a file handle
+
+ @param info Maria handler
+
+ @return Operation status
+ @retval 0 ok
+ @retval # Error code.
+*/
+
+
+int maria_begin(MARIA_HA *info)
+{
+ DBUG_ENTER("maria_begin");
+
+ if (info->s->now_transactional)
+ {
+ TRN *trn;
+ struct st_my_thread_var *mysys_var= my_thread_var;
+ trn= trnman_new_trn(&mysys_var->mutex,
+ &mysys_var->suspend,
+ (char*) &mysys_var + STACK_DIRECTION *1024*128);
+ if (unlikely(!trn))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+
+ DBUG_PRINT("info", ("TRN set to 0x%lx", (ulong) trn));
+ info->trn= trn;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ @brief Disable logging for this table
+
+ @note
+ Mainly used during repair table, where we don't want to log all
+ changes to index or rows
+*/
+
+void maria_disable_logging(MARIA_HA *info)
+{
+ info->s->now_transactional= 0;
+ info->trn= &dummy_transaction_object;
+ info->s->page_type= PAGECACHE_PLAIN_PAGE;
+}
+
+
+void maria_enable_logging(MARIA_HA *info)
+{
+ if ((info->s->now_transactional= info->s->base.born_transactional))
+ info->s->page_type= PAGECACHE_LSN_PAGE;
+}
diff --git a/storage/maria/ma_commit.h b/storage/maria/ma_commit.h
new file mode 100644
index 00000000000..2c57c73fd7a
--- /dev/null
+++ b/storage/maria/ma_commit.h
@@ -0,0 +1,18 @@
+/* Copyright (C) 2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+C_MODE_START
+int ma_commit(TRN *trn);
+C_MODE_END
diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c
new file mode 100644
index 00000000000..36efb52acec
--- /dev/null
+++ b/storage/maria/ma_control_file.c
@@ -0,0 +1,509 @@
+/* Copyright (C) 2007 MySQL AB & Guilhem Bichot & Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3234 Maria control file
+ First version written by Guilhem Bichot on 2006-04-27.
+*/
+
+#ifndef EXTRACT_DEFINITIONS
+#include "maria_def.h"
+#endif
+
+/*
+ A control file contains the following objects:
+
+Start of create time variables (at start of file):
+ - Magic string (including version number of Maria control file)
+ - Uuid
+ - Size of create time part
+ - Size of dynamic part
+ - Maria block size
+..... Here we can add new variables without changing format
+ - Checksum of create time part (last of block)
+
+Start of changeable part:
+ - Checksum of changeable part
+ - LSN of last checkpoint
+ - Number of last log file
+..... Here we can add new variables without changing format
+
+The idea is that one can add new variables to the control file and still
+use it with old program versions. If one needs to do an incompatible change
+one should increment the control file version number.
+*/
+
+/* Total size should be < sector size for atomic write operation */
+#define CF_MAX_SIZE 512
+#define CF_MIN_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \
+ CF_CHECKSUM_SIZE * 2 + CF_LSN_SIZE + CF_FILENO_SIZE)
+
+/* Create time variables */
+#define CF_MAGIC_STRING "\xfe\xfe\xc"
+#define CF_MAGIC_STRING_OFFSET 0
+#define CF_MAGIC_STRING_SIZE (sizeof(CF_MAGIC_STRING)-1)
+#define CF_VERSION_OFFSET (CF_MAGIC_STRING_OFFSET + CF_MAGIC_STRING_SIZE)
+#define CF_VERSION_SIZE 1
+#define CF_UUID_OFFSET (CF_VERSION_OFFSET + CF_VERSION_SIZE)
+#define CF_UUID_SIZE MY_UUID_SIZE
+#define CF_CREATE_TIME_SIZE_OFFSET (CF_UUID_OFFSET + CF_UUID_SIZE)
+#define CF_SIZE_SIZE 2
+#define CF_CHANGEABLE_SIZE_OFFSET (CF_CREATE_TIME_SIZE_OFFSET + CF_SIZE_SIZE)
+#define CF_BLOCKSIZE_OFFSET (CF_CHANGEABLE_SIZE_OFFSET + CF_SIZE_SIZE)
+#define CF_BLOCKSIZE_SIZE 2
+
+#define CF_CREATE_TIME_TOTAL_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \
+ CF_CHECKSUM_SIZE)
+
+/*
+ Start of the part that changes during execution
+ This is stored at offset uint2korr(file[CF_CHANGEABLE_SIZE])
+*/
+#define CF_CHECKSUM_OFFSET 0
+#define CF_CHECKSUM_SIZE 4
+#define CF_LSN_OFFSET (CF_CHECKSUM_OFFSET + CF_CHECKSUM_SIZE)
+#define CF_LSN_SIZE LSN_STORE_SIZE
+#define CF_FILENO_OFFSET (CF_LSN_OFFSET + CF_LSN_SIZE)
+#define CF_FILENO_SIZE 4
+
+#define CF_CHANGEABLE_TOTAL_SIZE (CF_FILENO_OFFSET + CF_FILENO_SIZE)
+
+/*
+ The following values should not be changed, except when changing version
+ number of the maria control file. These are the minimum sizes of the
+ parts the code can handle.
+*/
+
+#define CF_MIN_CREATE_TIME_TOTAL_SIZE \
+(CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + CF_CHECKSUM_SIZE)
+#define CF_MIN_CHANGEABLE_TOTAL_SIZE \
+(CF_FILENO_OFFSET + CF_FILENO_SIZE)
+
+#ifndef EXTRACT_DEFINITIONS
+
+/* This module owns these two vars. */
+/**
+ This LSN serves for the two-checkpoint rule, and also to find the
+ checkpoint record when doing a recovery.
+*/
+LSN last_checkpoint_lsn= LSN_IMPOSSIBLE;
+uint32 last_logno= FILENO_IMPOSSIBLE;
+
+/**
+ @brief If log's lock should be asserted when writing to control file.
+
+ Can be re-used by any function which needs to be thread-safe except when
+ it is called at startup.
+*/
+my_bool maria_multi_threaded= FALSE;
+/** @brief if currently doing a recovery */
+my_bool maria_in_recovery= FALSE;
+
+/*
+ Control file is less then 512 bytes (a disk sector),
+ to be as atomic as possible
+*/
+static int control_file_fd= -1;
+
+static uint cf_create_time_size;
+static uint cf_changeable_size;
+
+/**
+ @brief Create Maria control file
+*/
+
+static CONTROL_FILE_ERROR create_control_file(const char *name,
+ int open_flags)
+{
+ uint32 sum;
+ uchar buffer[CF_CREATE_TIME_TOTAL_SIZE];
+ DBUG_ENTER("maria_create_control_file");
+
+ /* in a recovery, we expect to find a control file */
+ if (maria_in_recovery)
+ DBUG_RETURN(CONTROL_FILE_MISSING);
+ if ((control_file_fd= my_create(name, 0,
+ open_flags,
+ MYF(MY_SYNC_DIR | MY_WME))) < 0)
+ DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
+
+ /* Reset variables, as we are creating the file */
+ cf_create_time_size= CF_CREATE_TIME_TOTAL_SIZE;
+ cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE;
+
+ /* Create unique uuid for the control file */
+ my_uuid_init((ulong) &buffer, (ulong) &maria_uuid);
+ my_uuid(maria_uuid);
+
+ /* Prepare and write the file header */
+ memcpy(buffer, CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE);
+ buffer[CF_VERSION_OFFSET]= CONTROL_FILE_VERSION;
+ memcpy(buffer + CF_UUID_OFFSET, maria_uuid, CF_UUID_SIZE);
+ int2store(buffer + CF_CREATE_TIME_SIZE_OFFSET, cf_create_time_size);
+ int2store(buffer + CF_CHANGEABLE_SIZE_OFFSET, cf_changeable_size);
+
+ /* Write create time variables */
+ int2store(buffer + CF_BLOCKSIZE_OFFSET, maria_block_size);
+
+ /* Store checksum for create time parts */
+ sum= (uint32) my_checksum(0, buffer, cf_create_time_size -
+ CF_CHECKSUM_SIZE);
+ int4store(buffer + cf_create_time_size - CF_CHECKSUM_SIZE, sum);
+
+ if (my_pwrite(control_file_fd, buffer, cf_create_time_size,
+ 0, MYF(MY_FNABP | MY_WME)))
+ DBUG_RETURN(1);
+
+ /*
+ To be safer we should make sure that there are no logs or data/index
+ files around (indeed it could be that the control file alone was deleted
+ or not restored, and we should not go on with life at this point).
+
+ TODO: For now we trust (this is alpha version), but for beta if would
+ be great to verify.
+
+ We could have a tool which can rebuild the control file, by reading the
+ directory of logs, finding the newest log, reading it to find last
+ checkpoint... Slow but can save your db. For this to be possible, we
+ must always write to the control file right after writing the checkpoint
+ log record, and do nothing in between (i.e. the checkpoint must be
+ usable as soon as it has been written to the log).
+ */
+
+ /* init the file with these "undefined" values */
+ DBUG_RETURN(ma_control_file_write_and_force(LSN_IMPOSSIBLE,
+ FILENO_IMPOSSIBLE,
+ CONTROL_FILE_UPDATE_ALL));
+}
+
+/*
+ @brief Initialize control file subsystem
+
+ Looks for the control file. If none and creation is requested, creates file.
+ If present, reads it to find out last checkpoint's LSN and last log, updates
+ the last_checkpoint_lsn and last_logno global variables.
+ Called at engine's start.
+
+ @note
+ The format of the control file is defined in the comments and defines
+ at the start of this file.
+
+ @note If in recovery, file is not created
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error (in which case the file is left closed)
+*/
+
+CONTROL_FILE_ERROR ma_control_file_create_or_open()
+{
+ uchar buffer[CF_MAX_SIZE];
+ char name[FN_REFLEN], errmsg_buff[256];
+ const char *errmsg;
+ MY_STAT stat_buff;
+ uint new_cf_create_time_size, new_cf_changeable_size, new_block_size;
+ uint retry;
+ int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR;
+ int error= CONTROL_FILE_UNKNOWN_ERROR;
+ DBUG_ENTER("ma_control_file_create_or_open");
+
+ /*
+ If you change sizes in the #defines, you at least have to change the
+ "*store" and "*korr" calls in this file, and can even create backward
+ compatibility problems. Beware!
+ */
+ DBUG_ASSERT(CF_LSN_SIZE == (3+4));
+ DBUG_ASSERT(CF_FILENO_SIZE == 4);
+
+ if (control_file_fd >= 0) /* already open */
+ DBUG_RETURN(0);
+
+ if (fn_format(name, CONTROL_FILE_BASE_NAME,
+ maria_data_root, "", MYF(MY_WME)) == NullS)
+ DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
+
+ if (my_access(name,F_OK))
+ DBUG_RETURN(create_control_file(name, open_flags));
+
+ /* Otherwise, file exists */
+
+ if ((control_file_fd= my_open(name, open_flags, MYF(MY_WME))) < 0)
+ {
+ errmsg= "Can't open file";
+ goto err;
+ }
+
+ if (my_stat(name, &stat_buff, MYF(0)) == NULL)
+ {
+ errmsg= "Can't read status";
+ goto err;
+ }
+
+ if ((uint) stat_buff.st_size < CF_MIN_SIZE)
+ {
+ /*
+ Given that normally we write only a sector and it's atomic, the only
+ possibility for a file to be of too short size is if we crashed at the
+ very first startup, between file creation and file write. Quite unlikely
+ (and can be made even more unlikely by doing this: create a temp file,
+ write it, and then rename it to be the control file).
+ What's more likely is if someone forgot to restore the control file,
+ just did a "touch control" to try to get Maria to start, or if the
+ disk/filesystem has a problem.
+ So let's be rigid.
+ */
+ error= CONTROL_FILE_TOO_SMALL;
+ errmsg= "Size of control file is smaller than expected";
+ goto err;
+ }
+
+ /* Check if control file is unexpectedly big */
+ if ((uint)stat_buff.st_size > CF_MAX_SIZE)
+ {
+ error= CONTROL_FILE_TOO_BIG;
+ errmsg= "File size bigger than expected";
+ goto err;
+ }
+
+ if (my_read(control_file_fd, buffer, stat_buff.st_size, MYF(MY_FNABP)))
+ {
+ errmsg= "Can't read file";
+ goto err;
+ }
+
+ if (memcmp(buffer + CF_MAGIC_STRING_OFFSET,
+ CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE))
+ {
+ error= CONTROL_FILE_BAD_MAGIC_STRING;
+ errmsg= "Missing valid id at start of file. File is not a valid maria control file";
+ goto err;
+ }
+
+ if (buffer[CF_VERSION_OFFSET] > CONTROL_FILE_VERSION)
+ {
+ error= CONTROL_FILE_BAD_VERSION;
+ sprintf(errmsg_buff, "File is from a future maria system: %d. Current version is: %d",
+ (int) buffer[CF_VERSION_OFFSET], CONTROL_FILE_VERSION);
+ errmsg= errmsg_buff;
+ goto err;
+ }
+
+ new_cf_create_time_size= uint2korr(buffer + CF_CREATE_TIME_SIZE_OFFSET);
+ new_cf_changeable_size= uint2korr(buffer + CF_CHANGEABLE_SIZE_OFFSET);
+
+ if (new_cf_create_time_size < CF_MIN_CREATE_TIME_TOTAL_SIZE ||
+ new_cf_changeable_size < CF_MIN_CHANGEABLE_TOTAL_SIZE ||
+ new_cf_create_time_size + new_cf_changeable_size !=
+ stat_buff.st_size)
+ {
+ error= CONTROL_FILE_INCONSISTENT_INFORMATION;
+ errmsg= "Sizes stored in control file are inconsistent";
+ goto err;
+ }
+
+ new_block_size= uint2korr(buffer + CF_BLOCKSIZE_OFFSET);
+ if (new_block_size != maria_block_size)
+ {
+ error= CONTROL_FILE_WRONG_BLOCKSIZE;
+ sprintf(errmsg_buff,
+ "Block size in control file (%u) is different than given maria_block_size: %u",
+ new_block_size, (uint) maria_block_size);
+ errmsg= errmsg_buff;
+ goto err;
+ }
+
+ if (my_checksum(0, buffer, new_cf_create_time_size - CF_CHECKSUM_SIZE) !=
+ uint4korr(buffer + new_cf_create_time_size - CF_CHECKSUM_SIZE))
+ {
+ error= CONTROL_FILE_BAD_HEAD_CHECKSUM;
+ errmsg= "Fixed part checksum mismatch";
+ goto err;
+ }
+
+ if (my_checksum(0, buffer + new_cf_create_time_size + CF_CHECKSUM_SIZE,
+ new_cf_changeable_size - CF_CHECKSUM_SIZE) !=
+ uint4korr(buffer + new_cf_create_time_size))
+ {
+ error= CONTROL_FILE_BAD_CHECKSUM;
+ errmsg= "Changeable part (end of control file) checksum missmatch";
+ goto err;
+ }
+
+ memcpy(maria_uuid, buffer + CF_UUID_OFFSET, CF_UUID_SIZE);
+ cf_create_time_size= new_cf_create_time_size;
+ cf_changeable_size= new_cf_changeable_size;
+ last_checkpoint_lsn= lsn_korr(buffer + new_cf_create_time_size +
+ CF_LSN_OFFSET);
+ last_logno= uint4korr(buffer + new_cf_create_time_size + CF_FILENO_OFFSET);
+
+ retry= 0;
+
+ /*
+ We can't here use the automatic wait in my_lock() as the alarm thread
+ may not yet exists.
+ */
+
+ while (my_lock(control_file_fd, F_WRLCK, 0L, F_TO_EOF,
+ MYF(MY_SEEK_NOT_DONE | MY_FORCE_LOCK | MY_NO_WAIT)))
+ {
+ if (retry == 0)
+ my_printf_error(HA_ERR_INITIALIZATION,
+ "Can't lock maria control file '%s' for exclusive use, "
+ "error: %d. Will retry for %d seconds", 0,
+ name, my_errno, MARIA_MAX_CONTROL_FILE_LOCK_RETRY);
+ if (retry++ > MARIA_MAX_CONTROL_FILE_LOCK_RETRY)
+ {
+ errmsg= "Could not get an exclusive lock; File is probably in use by another process";
+ goto err;
+ }
+ sleep(1);
+ }
+
+ DBUG_RETURN(0);
+
+err:
+ my_printf_error(HA_ERR_INITIALIZATION,
+ "Error when trying to use maria control file '%s': %s", 0,
+ name, errmsg);
+ ma_control_file_end();
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Write information durably to the control file; stores this information into
+ the last_checkpoint_lsn and last_logno global variables.
+ Called when we have created a new log (after syncing this log's creation)
+ and when we have written a checkpoint (after syncing this log record).
+ Variables last_checkpoint_lsn and last_logno must be protected by caller
+ using log's lock, unless this function is called at startup.
+
+ SYNOPSIS
+ ma_control_file_write_and_force()
+ checkpoint_lsn LSN of last checkpoint
+ logno last log file number
+ objs_to_write which of the arguments should be used as new values
+ (for example, CF_UPDATE_ONLY_LSN will not
+ write the logno argument to the control file and will
+ not update the last_logno global variable); can be:
+ CF_UPDATE_ALL
+ CF_UPDATE_ONLY_LSN
+ CF_UPDATE_ONLY_LOGNO.
+
+ NOTE
+ We always want to do one single my_pwrite() here to be as atomic as
+ possible.
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
+ uint objs_to_write)
+{
+ char buffer[CF_MAX_SIZE];
+ my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE;
+ uint32 sum;
+ DBUG_ENTER("ma_control_file_write_and_force");
+
+ DBUG_ASSERT(control_file_fd >= 0); /* must be open */
+#ifndef DBUG_OFF
+ if (maria_multi_threaded)
+ translog_lock_assert_owner();
+#endif
+
+ if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LSN)
+ update_checkpoint_lsn= TRUE;
+ else if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LOGNO)
+ update_logno= TRUE;
+ else if (objs_to_write == CONTROL_FILE_UPDATE_ALL)
+ update_checkpoint_lsn= update_logno= TRUE;
+ else /* incorrect value of objs_to_write */
+ DBUG_ASSERT(0);
+
+ if (update_checkpoint_lsn)
+ lsn_store(buffer + CF_LSN_OFFSET, checkpoint_lsn);
+ else /* store old value == change nothing */
+ lsn_store(buffer + CF_LSN_OFFSET, last_checkpoint_lsn);
+
+ if (update_logno)
+ int4store(buffer + CF_FILENO_OFFSET, logno);
+ else
+ int4store(buffer + CF_FILENO_OFFSET, last_logno);
+
+ /*
+ Clear unknown part of changeable part.
+ Other option would be to remember the original values in the file
+ and copy them here, but this should be safer.
+ */
+ bzero(buffer + CF_CHANGEABLE_TOTAL_SIZE,
+ cf_changeable_size - CF_CHANGEABLE_TOTAL_SIZE);
+
+ /* Checksum is stored first */
+ compile_time_assert(CF_CHECKSUM_OFFSET == 0);
+ sum= my_checksum(0, buffer + CF_CHECKSUM_SIZE,
+ cf_changeable_size - CF_CHECKSUM_SIZE);
+ int4store(buffer, sum);
+
+ if (my_pwrite(control_file_fd, buffer, cf_changeable_size,
+ cf_create_time_size, MYF(MY_FNABP | MY_WME)) ||
+ my_sync(control_file_fd, MYF(MY_WME)))
+ DBUG_RETURN(1);
+
+ if (update_checkpoint_lsn)
+ last_checkpoint_lsn= checkpoint_lsn;
+ if (update_logno)
+ last_logno= logno;
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Free resources taken by control file subsystem
+
+ SYNOPSIS
+ ma_control_file_end()
+*/
+
+int ma_control_file_end()
+{
+ int close_error;
+ DBUG_ENTER("ma_control_file_end");
+
+ if (control_file_fd < 0) /* already closed */
+ DBUG_RETURN(0);
+
+ (void) my_lock(control_file_fd, F_UNLCK, 0L, F_TO_EOF,
+ MYF(MY_SEEK_NOT_DONE | MY_FORCE_LOCK));
+
+ close_error= my_close(control_file_fd, MYF(MY_WME));
+ /*
+ As my_close() frees structures even if close() fails, we do the same,
+ i.e. we mark the file as closed in all cases.
+ */
+ control_file_fd= -1;
+ /*
+ As this module owns these variables, closing the module forbids access to
+ them (just a safety):
+ */
+ last_checkpoint_lsn= LSN_IMPOSSIBLE;
+ last_logno= FILENO_IMPOSSIBLE;
+
+ DBUG_RETURN(close_error);
+}
+
+#endif /* EXTRACT_DEFINITIONS */
diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h
new file mode 100644
index 00000000000..e051ac01b28
--- /dev/null
+++ b/storage/maria/ma_control_file.h
@@ -0,0 +1,90 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3234 Maria control file
+ First version written by Guilhem Bichot on 2006-04-27.
+*/
+
+#ifndef _ma_control_file_h
+#define _ma_control_file_h
+
+#define CONTROL_FILE_BASE_NAME "maria_log_control"
+/*
+ Major version for control file. Should only be changed when doing
+ big changes that made the new control file incompatible with all
+ older versions of Maria.
+*/
+#define CONTROL_FILE_VERSION 1
+
+/* Here is the interface of this module */
+
+/*
+ LSN of the last checkoint
+ (if last_checkpoint_lsn == LSN_IMPOSSIBLE then there was never a checkpoint)
+*/
+extern LSN last_checkpoint_lsn;
+/*
+ Last log number (if last_logno == FILENO_IMPOSSIBLE then there is no log
+ file yet)
+*/
+extern uint32 last_logno;
+
+extern my_bool maria_multi_threaded, maria_in_recovery;
+
+typedef enum enum_control_file_error {
+ CONTROL_FILE_OK= 0,
+ CONTROL_FILE_TOO_SMALL,
+ CONTROL_FILE_TOO_BIG,
+ CONTROL_FILE_BAD_MAGIC_STRING,
+ CONTROL_FILE_BAD_VERSION,
+ CONTROL_FILE_BAD_CHECKSUM,
+ CONTROL_FILE_BAD_HEAD_CHECKSUM,
+ CONTROL_FILE_MISSING,
+ CONTROL_FILE_INCONSISTENT_INFORMATION,
+ CONTROL_FILE_WRONG_BLOCKSIZE,
+ CONTROL_FILE_UNKNOWN_ERROR /* any other error */
+} CONTROL_FILE_ERROR;
+
+#define CONTROL_FILE_UPDATE_ALL 0
+#define CONTROL_FILE_UPDATE_ONLY_LSN 1
+#define CONTROL_FILE_UPDATE_ONLY_LOGNO 2
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ Looks for the control file. If none and creation was requested, creates file.
+ If present, reads it to find out last checkpoint's LSN and last log.
+ Called at engine's start.
+*/
+CONTROL_FILE_ERROR ma_control_file_create_or_open();
+/*
+ Write information durably to the control file.
+ Called when we have created a new log (after syncing this log's creation)
+ and when we have written a checkpoint (after syncing this log record).
+*/
+int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
+ uint objs_to_write);
+
+
+/* Free resources taken by control file subsystem */
+int ma_control_file_end();
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
new file mode 100644
index 00000000000..767242ec027
--- /dev/null
+++ b/storage/maria/ma_create.c
@@ -0,0 +1,1344 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Create a MARIA table */
+
+#include "ma_ftdefs.h"
+#include "ma_sp_defs.h"
+#include <my_bit.h>
+#include "ma_blockrec.h"
+#include "trnman_public.h"
+
+#if defined(MSDOS) || defined(__WIN__)
+#ifdef __WIN__
+#include <fcntl.h>
+#else
+#include <process.h> /* Prototype for getpid */
+#endif
+#endif
+#include <m_ctype.h>
+
+static int compare_columns(MARIA_COLUMNDEF **a, MARIA_COLUMNDEF **b);
+
+/*
+ Old options is used when recreating database, from maria_chk
+*/
+
+int maria_create(const char *name, enum data_file_type datafile_type,
+ uint keys,MARIA_KEYDEF *keydefs,
+ uint columns, MARIA_COLUMNDEF *columndef,
+ uint uniques, MARIA_UNIQUEDEF *uniquedefs,
+ MARIA_CREATE_INFO *ci,uint flags)
+{
+ register uint i,j;
+ File dfile,file;
+ int errpos,save_errno, create_mode= O_RDWR | O_TRUNC, res;
+ myf create_flag;
+ uint length,max_key_length,packed,pack_bytes,pointer,real_length_diff,
+ key_length,info_length,key_segs,options,min_key_length,
+ base_pos,long_varchar_count,varchar_length,
+ unique_key_parts,fulltext_keys,offset, not_block_record_extra_length;
+ uint max_field_lengths, extra_header_size, column_nr;
+ ulong reclength, real_reclength,min_pack_length;
+ char filename[FN_REFLEN], linkname[FN_REFLEN], *linkname_ptr;
+ ulong pack_reclength;
+ ulonglong tot_length,max_rows, tmp;
+ enum en_fieldtype type;
+ enum data_file_type org_datafile_type= datafile_type;
+ MARIA_SHARE share;
+ MARIA_KEYDEF *keydef,tmp_keydef;
+ MARIA_UNIQUEDEF *uniquedef;
+ HA_KEYSEG *keyseg,tmp_keyseg;
+ MARIA_COLUMNDEF *column, *end_column;
+ double *rec_per_key_part;
+ ulong *nulls_per_key_part;
+ uint16 *column_array;
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY], kfile_size_before_extension;
+ MARIA_CREATE_INFO tmp_create_info;
+ my_bool tmp_table= FALSE; /* cache for presence of HA_OPTION_TMP_TABLE */
+ my_bool forced_packed;
+ myf sync_dir= 0;
+ uchar *log_data= NULL;
+ DBUG_ENTER("maria_create");
+ DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u",
+ keys, columns, uniques, flags));
+
+ DBUG_ASSERT(maria_block_size && maria_block_size % IO_SIZE == 0);
+ LINT_INIT(dfile);
+ LINT_INIT(file);
+
+ if (!ci)
+ {
+ bzero((char*) &tmp_create_info,sizeof(tmp_create_info));
+ ci=&tmp_create_info;
+ }
+
+ if (keys + uniques > MARIA_MAX_KEY || columns == 0)
+ {
+ DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION);
+ }
+ errpos=0;
+ options=0;
+ bzero((uchar*) &share,sizeof(share));
+
+ if (flags & HA_DONT_TOUCH_DATA)
+ {
+ org_datafile_type= ci->org_data_file_type;
+ if (!(ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD))
+ options=ci->old_options &
+ (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD |
+ HA_OPTION_READ_ONLY_DATA | HA_OPTION_CHECKSUM |
+ HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE);
+ else
+ {
+ /* Uncompressing rows */
+ options=ci->old_options &
+ (HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE);
+ }
+ }
+
+ if (ci->reloc_rows > ci->max_rows)
+ ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */
+
+ if (!(rec_per_key_part=
+ (double*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(double) +
+ (keys + uniques)*HA_MAX_KEY_SEG*sizeof(ulong) +
+ sizeof(uint16) * columns,
+ MYF(MY_WME | MY_ZEROFILL))))
+ DBUG_RETURN(my_errno);
+ nulls_per_key_part= (ulong*) (rec_per_key_part +
+ (keys + uniques) * HA_MAX_KEY_SEG);
+ column_array= (uint16*) (nulls_per_key_part +
+ (keys + uniques) * HA_MAX_KEY_SEG);
+
+
+ /* Start by checking fields and field-types used */
+
+ varchar_length=long_varchar_count=packed= not_block_record_extra_length=
+ pack_reclength= max_field_lengths= 0;
+ reclength= min_pack_length= ci->null_bytes;
+ forced_packed= 0;
+ column_nr= 0;
+
+ for (column= columndef, end_column= column + columns ;
+ column != end_column ;
+ column++)
+ {
+ /* Fill in not used struct parts */
+ column->column_nr= column_nr++;
+ column->offset= reclength;
+ column->empty_pos= 0;
+ column->empty_bit= 0;
+ column->fill_length= column->length;
+ if (column->null_bit)
+ options|= HA_OPTION_NULL_FIELDS;
+
+ reclength+= column->length;
+ type= column->type;
+ if (datafile_type == BLOCK_RECORD)
+ {
+ if (type == FIELD_SKIP_PRESPACE)
+ type= FIELD_NORMAL; /* SKIP_PRESPACE not supported */
+ if (type == FIELD_NORMAL &&
+ column->length > FULL_PAGE_SIZE(maria_block_size))
+ {
+ /* FIELD_NORMAL can't be split over many blocks, convert to a CHAR */
+ type= column->type= FIELD_SKIP_ENDSPACE;
+ }
+ }
+
+ if (type != FIELD_NORMAL && type != FIELD_CHECK)
+ {
+ column->empty_pos= packed/8;
+ column->empty_bit= (1 << (packed & 7));
+ if (type == FIELD_BLOB)
+ {
+ forced_packed= 1;
+ packed++;
+ share.base.blobs++;
+ if (pack_reclength != INT_MAX32)
+ {
+ if (column->length == 4+portable_sizeof_char_ptr)
+ pack_reclength= INT_MAX32;
+ else
+ {
+ /* Add max possible blob length */
+ pack_reclength+= (1 << ((column->length-
+ portable_sizeof_char_ptr)*8));
+ }
+ }
+ max_field_lengths+= (column->length - portable_sizeof_char_ptr);
+ }
+ else if (type == FIELD_SKIP_PRESPACE ||
+ type == FIELD_SKIP_ENDSPACE)
+ {
+ forced_packed= 1;
+ max_field_lengths+= column->length > 255 ? 2 : 1;
+ not_block_record_extra_length++;
+ packed++;
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ varchar_length+= column->length-1; /* Used for min_pack_length */
+ pack_reclength++;
+ not_block_record_extra_length++;
+ max_field_lengths++;
+ packed++;
+ column->fill_length= 1;
+ options|= HA_OPTION_NULL_FIELDS; /* Use ma_checksum() */
+
+ /* We must test for 257 as length includes pack-length */
+ if (test(column->length >= 257))
+ {
+ long_varchar_count++;
+ max_field_lengths++;
+ column->fill_length= 2;
+ }
+ }
+ else if (type == FIELD_SKIP_ZERO)
+ packed++;
+ else
+ {
+ if (!column->null_bit)
+ min_pack_length+= column->length;
+ else
+ {
+ /* Only BLOCK_RECORD skips NULL fields for all field values */
+ not_block_record_extra_length+= column->length;
+ }
+ column->empty_pos= 0;
+ column->empty_bit= 0;
+ }
+ }
+ else /* FIELD_NORMAL */
+ {
+ if (!column->null_bit)
+ {
+ min_pack_length+= column->length;
+ share.base.fixed_not_null_fields++;
+ share.base.fixed_not_null_fields_length+= column->length;
+ }
+ else
+ not_block_record_extra_length+= column->length;
+ }
+ }
+
+ if (datafile_type == STATIC_RECORD && forced_packed)
+ {
+ /* Can't use fixed length records, revert to block records */
+ datafile_type= BLOCK_RECORD;
+ }
+
+ if (datafile_type == DYNAMIC_RECORD)
+ options|= HA_OPTION_PACK_RECORD; /* Must use packed records */
+
+ if (datafile_type == STATIC_RECORD)
+ {
+ /* We can't use checksum with static length rows */
+ flags&= ~HA_CREATE_CHECKSUM;
+ options&= ~HA_OPTION_CHECKSUM;
+ min_pack_length= reclength;
+ packed= 0;
+ }
+ else if (datafile_type != BLOCK_RECORD)
+ min_pack_length+= not_block_record_extra_length;
+ else
+ min_pack_length+= 5; /* Min row overhead */
+
+ if ((packed & 7) == 1)
+ {
+ /*
+ Not optimal packing, try to remove a 1 uchar length zero-field as
+ this will get same record length, but smaller pack overhead
+ */
+ while (column != columndef)
+ {
+ column--;
+ if (column->type == (int) FIELD_SKIP_ZERO && column->length == 1)
+ {
+ /*
+ NOTE1: here we change a field type FIELD_SKIP_ZERO ->
+ FIELD_NORMAL
+ */
+ column->type=(int) FIELD_NORMAL;
+ column->empty_pos= 0;
+ column->empty_bit= 0;
+ packed--;
+ min_pack_length++;
+ break;
+ }
+ }
+ }
+
+ if (flags & HA_CREATE_TMP_TABLE)
+ {
+ options|= HA_OPTION_TMP_TABLE;
+ tmp_table= TRUE;
+ create_mode|= O_NOFOLLOW;
+ /* "CREATE TEMPORARY" tables are not crash-safe (dropped at restart) */
+ ci->transactional= FALSE;
+ flags&= ~HA_CREATE_PAGE_CHECKSUM;
+ }
+ share.base.null_bytes= ci->null_bytes;
+ share.base.original_null_bytes= ci->null_bytes;
+ share.base.born_transactional= ci->transactional;
+ share.base.max_field_lengths= max_field_lengths;
+ share.base.field_offsets= 0; /* for future */
+
+ if (pack_reclength != INT_MAX32)
+ pack_reclength+= max_field_lengths + long_varchar_count;
+
+ if (flags & HA_CREATE_CHECKSUM || (options & HA_OPTION_CHECKSUM))
+ {
+ options|= HA_OPTION_CHECKSUM;
+ min_pack_length++;
+ pack_reclength++;
+ }
+ if (flags & HA_CREATE_DELAY_KEY_WRITE)
+ options|= HA_OPTION_DELAY_KEY_WRITE;
+ if (flags & HA_CREATE_RELIES_ON_SQL_LAYER)
+ options|= HA_OPTION_RELIES_ON_SQL_LAYER;
+ if (flags & HA_CREATE_PAGE_CHECKSUM)
+ options|= HA_OPTION_PAGE_CHECKSUM;
+
+ pack_bytes= (packed + 7) / 8;
+ if (pack_reclength != INT_MAX32)
+ pack_reclength+= reclength+pack_bytes +
+ test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD));
+ min_pack_length+= pack_bytes;
+ /* Calculate min possible row length for rows-in-block */
+ extra_header_size= MAX_FIXED_HEADER_SIZE;
+ if (ci->transactional)
+ {
+ extra_header_size= TRANS_MAX_FIXED_HEADER_SIZE;
+ DBUG_PRINT("info",("creating a transactional table"));
+ }
+ share.base.min_block_length= (extra_header_size + share.base.null_bytes +
+ pack_bytes);
+ if (!ci->data_file_length && ci->max_rows)
+ {
+ if (pack_reclength == INT_MAX32 ||
+ (~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength)
+ ci->data_file_length= ~(ulonglong) 0;
+ else
+ ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength;
+ }
+ else if (!ci->max_rows)
+ {
+ if (datafile_type == BLOCK_RECORD)
+ {
+ uint rows_per_page= ((maria_block_size - PAGE_OVERHEAD_SIZE) /
+ (min_pack_length + extra_header_size +
+ DIR_ENTRY_SIZE));
+ ulonglong data_file_length= ci->data_file_length;
+ if (!data_file_length)
+ data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) *
+ 8)) -1) * maria_block_size);
+ if (rows_per_page > 0)
+ {
+ set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE);
+ ci->max_rows= data_file_length / maria_block_size * rows_per_page;
+ }
+ else
+ ci->max_rows= data_file_length / (min_pack_length +
+ extra_header_size +
+ DIR_ENTRY_SIZE);
+ }
+ else
+ ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length +
+ ((options &
+ HA_OPTION_PACK_RECORD) ?
+ 3 : 0)));
+ }
+ max_rows= (ulonglong) ci->max_rows;
+ if (datafile_type == BLOCK_RECORD)
+ {
+ /*
+ The + 1 is for record position withing page
+ The / 2 is because we need one bit for knowing if there is transid's
+ after the row pointer
+ */
+ pointer= maria_get_pointer_length((ci->data_file_length /
+ (maria_block_size * 2)), 3) + 1;
+ set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE);
+
+ if (!max_rows)
+ max_rows= (((((ulonglong) 1 << ((pointer-1)*8)) -1) * maria_block_size) /
+ min_pack_length / 2);
+ }
+ else
+ {
+ if (datafile_type != STATIC_RECORD)
+ pointer= maria_get_pointer_length(ci->data_file_length,
+ maria_data_pointer_size);
+ else
+ pointer= maria_get_pointer_length(ci->max_rows, maria_data_pointer_size);
+ if (!max_rows)
+ max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length);
+ }
+
+ real_reclength=reclength;
+ if (datafile_type == STATIC_RECORD)
+ {
+ if (reclength <= pointer)
+ reclength=pointer+1; /* reserve place for delete link */
+ }
+ else
+ reclength+= long_varchar_count; /* We need space for varchar! */
+
+ max_key_length=0; tot_length=0 ; key_segs=0;
+ fulltext_keys=0;
+ share.state.rec_per_key_part= rec_per_key_part;
+ share.state.nulls_per_key_part= nulls_per_key_part;
+ share.state.key_root=key_root;
+ share.state.key_del= HA_OFFSET_ERROR;
+ if (uniques)
+ max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer;
+
+ for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++)
+ {
+ share.state.key_root[i]= HA_OFFSET_ERROR;
+ length= real_length_diff= 0;
+ min_key_length= key_length= pointer;
+
+ if ((keydef->flag & (HA_SPATIAL | HA_FULLTEXT) &&
+ ci->transactional))
+ {
+ my_errno= HA_ERR_UNSUPPORTED;
+ my_message(HA_ERR_UNSUPPORTED,
+ "Maria can't yet handle SPATIAL or FULLTEXT keys in "
+ "transactional mode. For now use TRANSACTIONAL=0", MYF(0));
+ goto err_no_lock;
+ }
+
+ if (keydef->flag & HA_SPATIAL)
+ {
+#ifdef HAVE_SPATIAL
+ /* BAR TODO to support 3D and more dimensions in the future */
+ uint sp_segs=SPDIMS*2;
+ keydef->flag=HA_SPATIAL;
+
+ if (flags & HA_DONT_TOUCH_DATA)
+ {
+ /*
+ Called by maria_chk - i.e. table structure was taken from
+ MYI file and SPATIAL key *does have* additional sp_segs keysegs.
+ keydef->seg here points right at the GEOMETRY segment,
+ so we only need to decrease keydef->keysegs.
+ (see maria_recreate_table() in _ma_check.c)
+ */
+ keydef->keysegs-=sp_segs-1;
+ }
+
+ for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ;
+ j++, keyseg++)
+ {
+ if (keyseg->type != HA_KEYTYPE_BINARY &&
+ keyseg->type != HA_KEYTYPE_VARBINARY1 &&
+ keyseg->type != HA_KEYTYPE_VARBINARY2)
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err_no_lock;
+ }
+ }
+ keydef->keysegs+=sp_segs;
+ key_length+=SPLEN*sp_segs;
+ length++; /* At least one length uchar */
+ min_key_length++;
+#else
+ my_errno= HA_ERR_UNSUPPORTED;
+ goto err_no_lock;
+#endif /*HAVE_SPATIAL*/
+ }
+ else if (keydef->flag & HA_FULLTEXT)
+ {
+ keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY;
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+
+ for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ;
+ j++, keyseg++)
+ {
+ if (keyseg->type != HA_KEYTYPE_TEXT &&
+ keyseg->type != HA_KEYTYPE_VARTEXT1 &&
+ keyseg->type != HA_KEYTYPE_VARTEXT2)
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err_no_lock;
+ }
+ if (!(keyseg->flag & HA_BLOB_PART) &&
+ (keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARTEXT2))
+ {
+ /* Make a flag that this is a VARCHAR */
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ /* Store in bit_start number of bytes used to pack the length */
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)?
+ 1 : 2);
+ }
+ }
+
+ fulltext_keys++;
+ key_length+= HA_FT_MAXBYTELEN+HA_FT_WLEN;
+ length++; /* At least one length uchar */
+ min_key_length+= 1 + HA_FT_WLEN;
+ real_length_diff=HA_FT_MAXBYTELEN-FT_MAX_WORD_LEN_FOR_SORT;
+ }
+ else
+ {
+ /* Test if prefix compression */
+ if (keydef->flag & HA_PACK_KEY)
+ {
+ /* Can't use space_compression on number keys */
+ if ((keydef->seg[0].flag & HA_SPACE_PACK) &&
+ keydef->seg[0].type == (int) HA_KEYTYPE_NUM)
+ keydef->seg[0].flag&= ~HA_SPACE_PACK;
+
+ /* Only use HA_PACK_KEY when first segment is a variable length key */
+ if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART |
+ HA_VAR_LENGTH_PART)))
+ {
+ /* pack relative to previous key */
+ keydef->flag&= ~HA_PACK_KEY;
+ keydef->flag|= HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY;
+ }
+ else
+ {
+ keydef->seg[0].flag|=HA_PACK_KEY; /* for easyer intern test */
+ keydef->flag|=HA_VAR_LENGTH_KEY;
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+ }
+ }
+ if (keydef->flag & HA_BINARY_PACK_KEY)
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+
+ if (keydef->flag & HA_AUTO_KEY && ci->with_auto_increment)
+ share.base.auto_key=i+1;
+ for (j=0, keyseg=keydef->seg ; j < keydef->keysegs ; j++, keyseg++)
+ {
+ /* numbers are stored with high by first to make compression easier */
+ switch (keyseg->type) {
+ case HA_KEYTYPE_SHORT_INT:
+ case HA_KEYTYPE_LONG_INT:
+ case HA_KEYTYPE_FLOAT:
+ case HA_KEYTYPE_DOUBLE:
+ case HA_KEYTYPE_USHORT_INT:
+ case HA_KEYTYPE_ULONG_INT:
+ case HA_KEYTYPE_LONGLONG:
+ case HA_KEYTYPE_ULONGLONG:
+ case HA_KEYTYPE_INT24:
+ case HA_KEYTYPE_UINT24:
+ case HA_KEYTYPE_INT8:
+ keyseg->flag|= HA_SWAP_KEY;
+ break;
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY1:
+ case HA_KEYTYPE_VARBINARY2:
+ if (!(keyseg->flag & HA_BLOB_PART))
+ {
+ /* Make a flag that this is a VARCHAR */
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ /* Store in bit_start number of bytes used to pack the length */
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARBINARY1) ?
+ 1 : 2);
+ }
+ break;
+ default:
+ break;
+ }
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ DBUG_ASSERT(!(keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)));
+ keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY;
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+ length++; /* At least one length uchar */
+ min_key_length++;
+ key_length+= keyseg->length;
+ if (keyseg->length >= 255)
+ {
+ /* prefix may be 3 bytes */
+ length+= 2;
+ }
+ }
+ else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
+ {
+ DBUG_ASSERT(!test_all_bits(keyseg->flag,
+ (HA_VAR_LENGTH_PART | HA_BLOB_PART)));
+ keydef->flag|=HA_VAR_LENGTH_KEY;
+ length++; /* At least one length uchar */
+ min_key_length++;
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+ key_length+= keyseg->length;
+ if (keyseg->length >= 255)
+ {
+ /* prefix may be 3 bytes */
+ length+= 2;
+ }
+ }
+ else
+ {
+ key_length+= keyseg->length;
+ if (!keyseg->null_bit)
+ min_key_length+= keyseg->length;
+ }
+ if (keyseg->null_bit)
+ {
+ key_length++;
+ /* min key part is 1 byte */
+ min_key_length++;
+ options|=HA_OPTION_PACK_KEYS;
+ keyseg->flag|=HA_NULL_PART;
+ keydef->flag|=HA_VAR_LENGTH_KEY | HA_NULL_PART_KEY;
+ }
+ }
+ } /* if HA_FULLTEXT */
+ key_segs+=keydef->keysegs;
+ if (keydef->keysegs > HA_MAX_KEY_SEG)
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err_no_lock;
+ }
+ /*
+ key_segs may be 0 in the case when we only want to be able to
+ add on row into the table. This can happen with some DISTINCT queries
+ in MySQL
+ */
+ if ((keydef->flag & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME &&
+ key_segs)
+ share.state.rec_per_key_part[key_segs-1]=1L;
+ length+=key_length;
+ /*
+ A key can't be longer than than half a index block (as we have
+ to be able to put at least 2 keys on an index block for the key
+ algorithms to work).
+ */
+ if (length > maria_max_key_length())
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err_no_lock;
+ }
+ keydef->block_length= maria_block_size;
+ keydef->keylength= (uint16) key_length;
+ keydef->minlength= (uint16) min_key_length;
+ keydef->maxlength= (uint16) length;
+
+ if (length > max_key_length)
+ max_key_length= length;
+ tot_length+= ((max_rows/(ulong) (((uint) maria_block_size-5)/
+ (length*2))) *
+ maria_block_size);
+ }
+
+ unique_key_parts=0;
+ for (i=0, uniquedef=uniquedefs ; i < uniques ; i++ , uniquedef++)
+ {
+ uniquedef->key=keys+i;
+ unique_key_parts+=uniquedef->keysegs;
+ share.state.key_root[keys+i]= HA_OFFSET_ERROR;
+ tot_length+= (max_rows/(ulong) (((uint) maria_block_size-5)/
+ ((MARIA_UNIQUE_HASH_LENGTH + pointer)*2)))*
+ (ulong) maria_block_size;
+ }
+ keys+=uniques; /* Each unique has 1 key */
+ key_segs+=uniques; /* Each unique has 1 key seg */
+
+ base_pos=(MARIA_STATE_INFO_SIZE + keys * MARIA_STATE_KEY_SIZE +
+ key_segs * MARIA_STATE_KEYSEG_SIZE);
+ info_length= base_pos+(uint) (MARIA_BASE_INFO_SIZE+
+ keys * MARIA_KEYDEF_SIZE+
+ uniques * MARIA_UNIQUEDEF_SIZE +
+ (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+
+ columns*(MARIA_COLUMNDEF_SIZE + 2));
+
+ DBUG_PRINT("info", ("info_length: %u", info_length));
+ /* There are only 16 bits for the total header length. */
+ if (info_length > 65535)
+ {
+ my_printf_error(0, "Maria table '%s' has too many columns and/or "
+ "indexes and/or unique constraints.",
+ MYF(0), name + dirname_length(name));
+ my_errno= HA_WRONG_CREATE_OPTION;
+ goto err_no_lock;
+ }
+
+ bmove(share.state.header.file_version,(uchar*) maria_file_magic,4);
+ ci->old_options=options | (ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD ?
+ HA_OPTION_COMPRESS_RECORD |
+ HA_OPTION_TEMP_COMPRESS_RECORD: 0);
+ mi_int2store(share.state.header.options,ci->old_options);
+ mi_int2store(share.state.header.header_length,info_length);
+ mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE);
+ mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE);
+ mi_int2store(share.state.header.base_pos,base_pos);
+ share.state.header.data_file_type= share.data_file_type= datafile_type;
+ share.state.header.org_data_file_type= org_datafile_type;
+ share.state.header.language= (ci->language ?
+ ci->language : default_charset_info->number);
+
+ share.state.dellink = HA_OFFSET_ERROR;
+ share.state.first_bitmap_with_space= 0;
+#ifdef EXTERNAL_LOCKING
+ share.state.process= (ulong) getpid();
+#endif
+ share.state.version= (ulong) time((time_t*) 0);
+ share.state.sortkey= (ushort) ~0;
+ share.state.auto_increment=ci->auto_increment;
+ share.options=options;
+ share.base.rec_reflength=pointer;
+ share.base.block_size= maria_block_size;
+
+ /* Get estimate for index file length (this may be wrong for FT keys) */
+ tmp= (tot_length + maria_block_size * keys *
+ MARIA_INDEX_BLOCK_MARGIN) / maria_block_size;
+ /*
+ use maximum of key_file_length we calculated and key_file_length value we
+ got from MYI file header (see also mariapack.c:save_state)
+ */
+ share.base.key_reflength=
+ maria_get_pointer_length(max(ci->key_file_length,tmp),3);
+ share.base.keys= share.state.header.keys= keys;
+ share.state.header.uniques= uniques;
+ share.state.header.fulltext_keys= fulltext_keys;
+ mi_int2store(share.state.header.key_parts,key_segs);
+ mi_int2store(share.state.header.unique_key_parts,unique_key_parts);
+
+ maria_set_all_keys_active(share.state.key_map, keys);
+
+ share.base.keystart = share.state.state.key_file_length=
+ MY_ALIGN(info_length, maria_block_size);
+ share.base.max_key_block_length= maria_block_size;
+ share.base.max_key_length=ALIGN_SIZE(max_key_length+4);
+ share.base.records=ci->max_rows;
+ share.base.reloc= ci->reloc_rows;
+ share.base.reclength=real_reclength;
+ share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM);
+ share.base.max_pack_length=pack_reclength;
+ share.base.min_pack_length=min_pack_length;
+ share.base.pack_bytes= pack_bytes;
+ share.base.fields= columns;
+ share.base.pack_fields= packed;
+
+ if (share.data_file_type == BLOCK_RECORD)
+ {
+ /*
+ we are going to create a first bitmap page, set data_file_length
+ to reflect this, before the state goes to disk
+ */
+ share.state.state.data_file_length= maria_block_size;
+ /* Add length of packed fields + length */
+ share.base.pack_reclength+= share.base.max_field_lengths+3;
+
+ }
+
+ /* max_data_file_length and max_key_file_length are recalculated on open */
+ if (tmp_table)
+ share.base.max_data_file_length= (my_off_t) ci->data_file_length;
+ else if (ci->transactional && translog_status == TRANSLOG_OK &&
+ !maria_in_recovery)
+ {
+ /*
+ we have checked translog_inited above, because maria_chk may call us
+ (via maria_recreate_table()) and it does not have a log.
+ */
+ sync_dir= MY_SYNC_DIR;
+ }
+
+ if (datafile_type == DYNAMIC_RECORD)
+ {
+ share.base.min_block_length=
+ (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH &&
+ ! share.base.blobs) ?
+ max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) :
+ MARIA_EXTEND_BLOCK_LENGTH;
+ }
+ else if (datafile_type == STATIC_RECORD)
+ share.base.min_block_length= share.base.pack_reclength;
+
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ share.state.create_time= (long) time((time_t*) 0);
+
+ pthread_mutex_lock(&THR_LOCK_maria);
+
+ /*
+ NOTE: For test_if_reopen() we need a real path name. Hence we need
+ MY_RETURN_REAL_PATH for every fn_format(filename, ...).
+ */
+ if (ci->index_file_name)
+ {
+ char *iext= strrchr(ci->index_file_name, '.');
+ int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT);
+ if (tmp_table)
+ {
+ char *path;
+ /* chop off the table name, tempory tables use generated name */
+ if ((path= strrchr(ci->index_file_name, FN_LIBCHAR)))
+ *path= '\0';
+ fn_format(filename, name, ci->index_file_name, MARIA_NAME_IEXT,
+ MY_REPLACE_DIR | MY_UNPACK_FILENAME |
+ MY_RETURN_REAL_PATH | MY_APPEND_EXT);
+ }
+ else
+ {
+ fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME | MY_RETURN_REAL_PATH |
+ (have_iext ? MY_REPLACE_EXT : MY_APPEND_EXT));
+ }
+ fn_format(linkname, name, "", MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ linkname_ptr= linkname;
+ /*
+ Don't create the table if the link or file exists to ensure that one
+ doesn't accidently destroy another table.
+ Don't sync dir now if the data file has the same path.
+ */
+ create_flag=
+ (ci->data_file_name &&
+ !strcmp(ci->index_file_name, ci->data_file_name)) ? 0 : sync_dir;
+ }
+ else
+ {
+ char *iext= strrchr(name, '.');
+ int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT);
+ fn_format(filename, name, "", MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME | MY_RETURN_REAL_PATH |
+ (have_iext ? MY_REPLACE_EXT : MY_APPEND_EXT));
+ linkname_ptr= NullS;
+ /*
+ Replace the current file.
+ Don't sync dir now if the data file has the same path.
+ */
+ create_flag= (flags & HA_CREATE_KEEP_FILES) ? 0 : MY_DELETE_OLD;
+ create_flag|= (!ci->data_file_name ? 0 : sync_dir);
+ }
+
+ /*
+ If a MRG_MARIA table is in use, the mapped MARIA tables are open,
+ but no entry is made in the table cache for them.
+ A TRUNCATE command checks for the table in the cache only and could
+ be fooled to believe, the table is not open.
+ Pull the emergency brake in this situation. (Bug #8306)
+
+
+ NOTE: The filename is compared against unique_file_name of every
+ open table. Hence we need a real path here.
+ */
+ if (_ma_test_if_reopen(filename))
+ {
+ my_printf_error(0, "MARIA table '%s' is in use "
+ "(most likely by a MERGE table). Try FLUSH TABLES.",
+ MYF(0), name + dirname_length(name));
+ goto err;
+ }
+
+ if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
+ MYF(MY_WME|create_flag))) < 0)
+ goto err;
+ errpos=1;
+
+ DBUG_PRINT("info", ("write state info and base info"));
+ if (_ma_state_info_write_sub(file, &share.state, 2) ||
+ _ma_base_info_write(file, &share.base))
+ goto err;
+ DBUG_PRINT("info", ("base_pos: %d base_info_size: %d",
+ base_pos, MARIA_BASE_INFO_SIZE));
+ DBUG_ASSERT(my_tell(file,MYF(0)) == base_pos+ MARIA_BASE_INFO_SIZE);
+
+ /* Write key and keyseg definitions */
+ DBUG_PRINT("info", ("write key and keyseg definitions"));
+ for (i=0 ; i < share.base.keys - uniques; i++)
+ {
+ uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0;
+
+ if (_ma_keydef_write(file, &keydefs[i]))
+ goto err;
+ for (j=0 ; j < keydefs[i].keysegs-sp_segs ; j++)
+ if (_ma_keyseg_write(file, &keydefs[i].seg[j]))
+ goto err;
+#ifdef HAVE_SPATIAL
+ for (j=0 ; j < sp_segs ; j++)
+ {
+ HA_KEYSEG sseg;
+ sseg.type=SPTYPE;
+ sseg.language= 7; /* Binary */
+ sseg.null_bit=0;
+ sseg.bit_start=0;
+ sseg.bit_end=0;
+ sseg.bit_length= 0;
+ sseg.bit_pos= 0;
+ sseg.length=SPLEN;
+ sseg.null_pos=0;
+ sseg.start=j*SPLEN;
+ sseg.flag= HA_SWAP_KEY;
+ if (_ma_keyseg_write(file, &sseg))
+ goto err;
+ }
+#endif
+ }
+ /* Create extra keys for unique definitions */
+ offset= real_reclength - uniques*MARIA_UNIQUE_HASH_LENGTH;
+ bzero((char*) &tmp_keydef,sizeof(tmp_keydef));
+ bzero((char*) &tmp_keyseg,sizeof(tmp_keyseg));
+ for (i=0; i < uniques ; i++)
+ {
+ tmp_keydef.keysegs=1;
+ tmp_keydef.flag= HA_UNIQUE_CHECK;
+ tmp_keydef.block_length= (uint16) maria_block_size;
+ tmp_keydef.keylength= MARIA_UNIQUE_HASH_LENGTH + pointer;
+ tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength;
+ tmp_keyseg.type= MARIA_UNIQUE_HASH_TYPE;
+ tmp_keyseg.length= MARIA_UNIQUE_HASH_LENGTH;
+ tmp_keyseg.start= offset;
+ offset+= MARIA_UNIQUE_HASH_LENGTH;
+ if (_ma_keydef_write(file,&tmp_keydef) ||
+ _ma_keyseg_write(file,(&tmp_keyseg)))
+ goto err;
+ }
+
+ /* Save unique definition */
+ DBUG_PRINT("info", ("write unique definitions"));
+ for (i=0 ; i < share.state.header.uniques ; i++)
+ {
+ HA_KEYSEG *keyseg_end;
+ keyseg= uniquedefs[i].seg;
+ if (_ma_uniquedef_write(file, &uniquedefs[i]))
+ goto err;
+ for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs;
+ keyseg < keyseg_end;
+ keyseg++)
+ {
+ switch (keyseg->type) {
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY1:
+ case HA_KEYTYPE_VARBINARY2:
+ if (!(keyseg->flag & HA_BLOB_PART))
+ {
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARBINARY1) ?
+ 1 : 2);
+ }
+ break;
+ default:
+ DBUG_ASSERT((keyseg->flag & HA_VAR_LENGTH_PART) == 0);
+ break;
+ }
+ if (_ma_keyseg_write(file, keyseg))
+ goto err;
+ }
+ }
+ DBUG_PRINT("info", ("write field definitions"));
+ if (datafile_type == BLOCK_RECORD)
+ {
+ /* Store columns in a more efficent order */
+ MARIA_COLUMNDEF **col_order, **pos;
+ if (!(col_order= (MARIA_COLUMNDEF**) my_malloc(share.base.fields *
+ sizeof(MARIA_COLUMNDEF*),
+ MYF(MY_WME))))
+ goto err;
+ for (column= columndef, pos= col_order ;
+ column != end_column ;
+ column++, pos++)
+ *pos= column;
+ qsort(col_order, share.base.fields, sizeof(*col_order),
+ (qsort_cmp) compare_columns);
+ for (i=0 ; i < share.base.fields ; i++)
+ {
+ column_array[col_order[i]->column_nr]= i;
+ if (_ma_columndef_write(file, col_order[i]))
+ {
+ my_free((uchar*) col_order, MYF(0));
+ goto err;
+ }
+ }
+ my_free((uchar*) col_order, MYF(0));
+ }
+ else
+ {
+ for (i=0 ; i < share.base.fields ; i++)
+ {
+ column_array[i]= (uint16) i;
+ if (_ma_columndef_write(file, &columndef[i]))
+ goto err;
+ }
+ }
+ if (_ma_column_nr_write(file, column_array, columns))
+ goto err;
+
+ if ((kfile_size_before_extension= my_tell(file,MYF(0))) == MY_FILEPOS_ERROR)
+ goto err;
+#ifndef DBUG_OFF
+ if (kfile_size_before_extension != info_length)
+ DBUG_PRINT("warning",("info_length: %u != used_length: %u",
+ info_length, (uint)kfile_size_before_extension));
+#endif
+
+ if (sync_dir)
+ {
+ /*
+ we log the first bytes and then the size to which we extend; this is
+ not log 1 KB of mostly zeroes if this is a small table.
+ */
+ char empty_string[]= "";
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4];
+ uint total_rec_length= 0;
+ uint k;
+ LSN lsn;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= 1 + 2 + 2 +
+ kfile_size_before_extension;
+ /* we are needing maybe 64 kB, so don't use the stack */
+ log_data= my_malloc(log_array[TRANSLOG_INTERNAL_PARTS + 1].length, MYF(0));
+ if ((log_data == NULL) ||
+ my_pread(file, 1 + 2 + 2 + log_data, kfile_size_before_extension,
+ 0, MYF(MY_NABP)))
+ goto err;
+ /*
+ remember if the data file was created or not, to know if Recovery can
+ do it or not, in the future
+ */
+ log_data[0]= test(flags & HA_DONT_TOUCH_DATA);
+ int2store(log_data + 1, kfile_size_before_extension);
+ int2store(log_data + 1 + 2, share.base.keystart);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char *)name;
+ /* we store the end-zero, for Recovery to just pass it to my_create() */
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
+ strlen(log_array[TRANSLOG_INTERNAL_PARTS + 0].str) + 1;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= log_data;
+ /* symlink description is also needed for re-creation by Recovery: */
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char *)
+ (ci->data_file_name ? ci->data_file_name : empty_string);
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].length=
+ strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str) + 1;
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (char *)
+ (ci->index_file_name ? ci->index_file_name : empty_string);
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].length=
+ strlen(log_array[TRANSLOG_INTERNAL_PARTS + 3].str) + 1;
+ for (k= TRANSLOG_INTERNAL_PARTS;
+ k < (sizeof(log_array)/sizeof(log_array[0])); k++)
+ total_rec_length+= log_array[k].length;
+ /**
+ For this record to be of any use for Recovery, we need the upper
+ MySQL layer to be crash-safe, which it is not now (that would require
+ work using the ddl_log of sql/sql_table.cc); when it is, we should
+ reconsider the moment of writing this log record (before or after op,
+ under THR_LOCK_maria or not...), how to use it in Recovery.
+ For now this record can serve when we apply logs to a backup,
+ so we sync it. This happens before the data file is created. If the
+ data file was created before, and we crashed before writing the log
+ record, at restart the table may be used, so we would not have a
+ trustable history in the log (impossible to apply this log to a
+ backup). The way we do it, if we crash before writing the log record
+ then there is no data file and the table cannot be used.
+ @todo Note that in case of TRUNCATE TABLE we also come here; for
+ Recovery to be able to finish TRUNCATE TABLE, instead of leaving a
+ half-truncated table, we should log the record at start of
+ maria_create(); for that we shouldn't write to the index file but to a
+ buffer (DYNAMIC_STRING), put the buffer into the record, then put the
+ buffer into the index file (so, change _ma_keydef_write() etc). That
+ would also enable Recovery to finish a CREATE TABLE. The final result
+ would be that we would be able to finish what the SQL layer has asked
+ for: it would be atomic.
+ When in CREATE/TRUNCATE (or DROP or RENAME or REPAIR) we have not
+ called external_lock(), so have no TRN. It does not matter, as all
+ these operations are non-transactional and sync their files.
+ */
+ if (unlikely(translog_write_record(&lsn,
+ LOGREC_REDO_CREATE_TABLE,
+ &dummy_transaction_object, NULL,
+ total_rec_length,
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, NULL, NULL) ||
+ translog_flush(lsn)))
+ goto err;
+ /*
+ store LSN into file, needed for Recovery to not be confused if a
+ DROP+CREATE happened (applying REDOs to the wrong table).
+ */
+ share.kfile.file= file;
+ if (_ma_update_create_rename_lsn_sub(&share, lsn, FALSE))
+ goto err;
+ my_free(log_data, MYF(0));
+ }
+
+ if (!(flags & HA_DONT_TOUCH_DATA))
+ {
+ if (ci->data_file_name)
+ {
+ char *dext= strrchr(ci->data_file_name, '.');
+ int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT);
+
+ if (tmp_table)
+ {
+ char *path;
+ /* chop off the table name, tempory tables use generated name */
+ if ((path= strrchr(ci->data_file_name, FN_LIBCHAR)))
+ *path= '\0';
+ fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT,
+ MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ }
+ else
+ {
+ fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME |
+ (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT));
+ }
+ fn_format(linkname, name, "",MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ linkname_ptr= linkname;
+ create_flag=0;
+ }
+ else
+ {
+ fn_format(filename,name,"", MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ linkname_ptr= NullS;
+ create_flag= (flags & HA_CREATE_KEEP_FILES) ? 0 : MY_DELETE_OLD;
+ }
+ if ((dfile=
+ my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
+ MYF(MY_WME | create_flag | sync_dir))) < 0)
+ goto err;
+ errpos=3;
+
+ if (_ma_initialize_data_file(&share, dfile))
+ goto err;
+ }
+
+ /* Enlarge files */
+ DBUG_PRINT("info", ("enlarge to keystart: %lu",
+ (ulong) share.base.keystart));
+ if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0)))
+ goto err;
+
+ if (sync_dir && my_sync(file, MYF(0)))
+ goto err;
+
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ {
+#ifdef USE_RELOC
+ if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0)))
+ goto err;
+#endif
+ if (sync_dir && my_sync(dfile, MYF(0)))
+ goto err;
+ if (my_close(dfile,MYF(0)))
+ goto err;
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ res= 0;
+ my_free((char*) rec_per_key_part,MYF(0));
+ errpos=0;
+ if (my_close(file,MYF(0)))
+ res= my_errno;
+ DBUG_RETURN(res);
+
+err:
+ pthread_mutex_unlock(&THR_LOCK_maria);
+
+err_no_lock:
+ save_errno=my_errno;
+ switch (errpos) {
+ case 3:
+ VOID(my_close(dfile,MYF(0)));
+ /* fall through */
+ case 2:
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT),
+ sync_dir);
+ /* fall through */
+ case 1:
+ VOID(my_close(file,MYF(0)));
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT),
+ sync_dir);
+ }
+ my_free(log_data, MYF(MY_ALLOW_ZERO_PTR));
+ my_free((char*) rec_per_key_part, MYF(0));
+ DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */
+}
+
+
+uint maria_get_pointer_length(ulonglong file_length, uint def)
+{
+ DBUG_ASSERT(def >= 2 && def <= 7);
+ if (file_length) /* If not default */
+ {
+#ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS
+ if (file_length >= (ULL(1) << 56))
+ def=8;
+ else
+#endif
+ if (file_length >= (ULL(1) << 48))
+ def=7;
+ else if (file_length >= (ULL(1) << 40))
+ def=6;
+ else if (file_length >= (ULL(1) << 32))
+ def=5;
+ else if (file_length >= (ULL(1) << 24))
+ def=4;
+ else if (file_length >= (ULL(1) << 16))
+ def=3;
+ else
+ def=2;
+ }
+ return def;
+}
+
+
+/*
+ Sort columns for records-in-block
+
+ IMPLEMENTATION
+ Sort columns in following order:
+
+ Fixed size, not null columns
+ Fixed length, null fields
+ Variable length fields (CHAR, VARCHAR)
+ Blobs
+
+ For same kind of fields, keep fields in original order
+*/
+
+static inline int sign(long a)
+{
+ return a < 0 ? -1 : (a > 0 ? 1 : 0);
+}
+
+
+static int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr)
+{
+ MARIA_COLUMNDEF *a= *a_ptr, *b= *b_ptr;
+ enum en_fieldtype a_type, b_type;
+
+ a_type= ((a->type == FIELD_NORMAL || a->type == FIELD_CHECK) ?
+ FIELD_NORMAL : a->type);
+ b_type= ((b->type == FIELD_NORMAL || b->type == FIELD_CHECK) ?
+ FIELD_NORMAL : b->type);
+
+ if (a_type == FIELD_NORMAL && !a->null_bit)
+ {
+ if (b_type != FIELD_NORMAL || b->null_bit)
+ return -1;
+ return sign((long) a->offset - (long) b->offset);
+ }
+ if (b_type == FIELD_NORMAL && !b->null_bit)
+ return 1;
+ if (a_type == b_type)
+ return sign((long) a->offset - (long) b->offset);
+ if (a_type == FIELD_NORMAL)
+ return -1;
+ if (b_type == FIELD_NORMAL)
+ return 1;
+ if (a_type == FIELD_BLOB)
+ return 1;
+ if (b_type == FIELD_BLOB)
+ return -1;
+ return sign((long) a->offset - (long) b->offset);
+}
+
+
+/**
+ @brief Initialize data file
+
+ @note
+ In BLOCK_RECORD, a freshly created datafile is one page long; while in
+ other formats it is 0-byte long.
+ */
+
+int _ma_initialize_data_file(MARIA_SHARE *share, File dfile)
+{
+ if (share->data_file_type == BLOCK_RECORD)
+ {
+ share->bitmap.block_size= share->base.block_size;
+ share->bitmap.file.file = dfile;
+ return _ma_bitmap_create_first(share);
+ }
+ return 0;
+}
+
+
+/**
+ @brief Writes create_rename_lsn and is_of_horizon to disk, can force.
+
+ This is for special cases where:
+ - we don't want to write the full state to disk (so, not call
+ _ma_state_info_write()) because some parts of the state may be
+ currently inconsistent, or because it would be overkill
+ - we must sync these LSNs immediately for correctness.
+ It acquires intern_lock to protect the two LSNs and state write.
+
+ @param share table's share
+ @param do_sync if the write should be forced to disk
+
+ @return Operation status
+ @retval 0 ok
+ @retval 1 error (disk problem)
+*/
+
+int _ma_update_create_rename_lsn(MARIA_SHARE *share,
+ LSN lsn, my_bool do_sync)
+{
+ int res;
+ pthread_mutex_lock(&share->intern_lock);
+ res= _ma_update_create_rename_lsn_sub(share, lsn, do_sync);
+ pthread_mutex_unlock(&share->intern_lock);
+ return res;
+}
+
+
+/**
+ @brief Writes create_rename_lsn and is_of_horizon to disk, can force.
+
+ Shortcut of _ma_update_create_rename_lsn() when we know that
+ intern_lock is not needed (when creating a table or opening it for the
+ first time).
+
+ @param share table's share
+ @param do_sync if the write should be forced to disk
+
+ @return Operation status
+ @retval 0 ok
+ @retval 1 error (disk problem)
+*/
+
+int _ma_update_create_rename_lsn_sub(MARIA_SHARE *share,
+ LSN lsn, my_bool do_sync)
+{
+ char buf[LSN_STORE_SIZE*2], *ptr;
+ File file= share->kfile.file;
+ DBUG_ASSERT(file >= 0);
+ for (ptr= buf; ptr < (buf + sizeof(buf)); ptr+= LSN_STORE_SIZE)
+ lsn_store(ptr, lsn);
+ share->state.is_of_horizon= share->state.create_rename_lsn= lsn;
+ if (share->id != 0)
+ {
+ /*
+ If OP is the operation which is calling us, if table is later written,
+ we could see in the log:
+ FILE_ID ... REDO_OP ... REDO_INSERT.
+ (that can happen in real life at least with OP=REPAIR).
+ As FILE_ID will be ignored by Recovery because it is <
+ create_rename_lsn, REDO_INSERT would be ignored too, wrongly.
+ To avoid that, we force a LOGREC_FILE_ID to be logged at next write:
+ */
+ translog_deassign_id_from_share(share);
+ }
+ return my_pwrite(file, buf, sizeof(buf),
+ sizeof(share->state.header) + 2, MYF(MY_NABP)) ||
+ (do_sync && my_sync(file, MYF(0)));
+}
diff --git a/storage/maria/ma_dbug.c b/storage/maria/ma_dbug.c
new file mode 100644
index 00000000000..c706fd253cd
--- /dev/null
+++ b/storage/maria/ma_dbug.c
@@ -0,0 +1,193 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Support rutiner with are using with dbug */
+
+#include "maria_def.h"
+
+ /* Print a key in user understandable format */
+
+void _ma_print_key(FILE *stream, register HA_KEYSEG *keyseg,
+ const uchar *key, uint length)
+{
+ int flag;
+ short int s_1;
+ long int l_1;
+ float f_1;
+ double d_1;
+ const uchar *end;
+ const uchar *key_end= key + length;
+
+ VOID(fputs("Key: \"",stream));
+ flag=0;
+ for (; keyseg->type && key < key_end ;keyseg++)
+ {
+ if (flag++)
+ VOID(putc('-',stream));
+ end= key+ keyseg->length;
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ /* A NULL value is encoded by a 1-byte flag. Zero means NULL. */
+ if (! *(key++))
+ {
+ fprintf(stream,"NULL");
+ continue;
+ }
+ end++;
+ }
+
+ switch (keyseg->type) {
+ case HA_KEYTYPE_BINARY:
+ if (!(keyseg->flag & HA_SPACE_PACK) && keyseg->length == 1)
+ { /* packed binary digit */
+ VOID(fprintf(stream,"%d",(uint) *key++));
+ break;
+ }
+ /* fall through */
+ case HA_KEYTYPE_TEXT:
+ case HA_KEYTYPE_NUM:
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ VOID(fprintf(stream,"%.*s",(int) *key,key+1));
+ key+= (int) *key+1;
+ }
+ else
+ {
+ VOID(fprintf(stream,"%.*s",(int) keyseg->length,key));
+ key=end;
+ }
+ break;
+ case HA_KEYTYPE_INT8:
+ VOID(fprintf(stream,"%d",(int) *((signed char*) key)));
+ key=end;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ s_1= mi_sint2korr(key);
+ VOID(fprintf(stream,"%d",(int) s_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ {
+ ushort u_1;
+ u_1= mi_uint2korr(key);
+ VOID(fprintf(stream,"%u",(uint) u_1));
+ key=end;
+ break;
+ }
+ case HA_KEYTYPE_LONG_INT:
+ l_1=mi_sint4korr(key);
+ VOID(fprintf(stream,"%ld",l_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ l_1=mi_uint4korr(key);
+ VOID(fprintf(stream,"%lu",(ulong) l_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_INT24:
+ VOID(fprintf(stream,"%ld",(long) mi_sint3korr(key)));
+ key=end;
+ break;
+ case HA_KEYTYPE_UINT24:
+ VOID(fprintf(stream,"%lu",(ulong) mi_uint3korr(key)));
+ key=end;
+ break;
+ case HA_KEYTYPE_FLOAT:
+ mi_float4get(f_1,key);
+ VOID(fprintf(stream,"%g",(double) f_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ mi_float8get(d_1,key);
+ VOID(fprintf(stream,"%g",d_1));
+ key=end;
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ {
+ char buff[21];
+ longlong2str(mi_sint8korr(key),buff,-10);
+ VOID(fprintf(stream,"%s",buff));
+ key=end;
+ break;
+ }
+ case HA_KEYTYPE_ULONGLONG:
+ {
+ char buff[21];
+ longlong2str(mi_sint8korr(key),buff,10);
+ VOID(fprintf(stream,"%s",buff));
+ key=end;
+ break;
+ }
+ case HA_KEYTYPE_BIT:
+ {
+ uint i;
+ fputs("0x",stream);
+ for (i=0 ; i < keyseg->length ; i++)
+ fprintf(stream, "%02x", (uint) *key++);
+ key= end;
+ break;
+ }
+
+#endif
+ case HA_KEYTYPE_VARTEXT1: /* VARCHAR and TEXT */
+ case HA_KEYTYPE_VARTEXT2: /* VARCHAR and TEXT */
+ case HA_KEYTYPE_VARBINARY1: /* VARBINARY and BLOB */
+ case HA_KEYTYPE_VARBINARY2: /* VARBINARY and BLOB */
+ {
+ uint tmp_length;
+ get_key_length(tmp_length,key);
+ /*
+ The following command sometimes gives a warning from valgrind.
+ Not yet sure if the bug is in valgrind, glibc or mysqld
+ */
+ VOID(fprintf(stream,"%.*s",(int) tmp_length,key));
+ key+=tmp_length;
+ break;
+ }
+ default: break; /* This never happens */
+ }
+ }
+ VOID(fputs("\"\n",stream));
+ return;
+} /* print_key */
+
+
+#ifdef EXTRA_DEBUG
+
+my_bool _ma_check_table_is_closed(const char *name, const char *where)
+{
+ char filename[FN_REFLEN];
+ LIST *pos;
+ DBUG_ENTER("_ma_check_table_is_closed");
+
+ (void) fn_format(filename,name,"",MARIA_NAME_IEXT,4+16+32);
+ for (pos=maria_open_list ; pos ; pos=pos->next)
+ {
+ MARIA_HA *info=(MARIA_HA*) pos->data;
+ MARIA_SHARE *share= info->s;
+ if (!strcmp(share->unique_file_name,filename))
+ {
+ if (share->last_version)
+ {
+ fprintf(stderr,"Warning: Table: %s is open on %s\n", name,where);
+ DBUG_PRINT("warning",("Table: %s is open on %s", name,where));
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ DBUG_RETURN(0);
+}
+#endif /* EXTRA_DEBUG */
diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c
new file mode 100644
index 00000000000..67d3d8d7092
--- /dev/null
+++ b/storage/maria/ma_delete.c
@@ -0,0 +1,1421 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "ma_fulltext.h"
+#include "ma_rt_index.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+
+static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag,
+ uchar *key, uint key_length,
+ my_off_t page, uchar *anc_buff,
+ MARIA_PINNED_PAGE *anc_page_link);
+static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo, uchar *key,
+ my_off_t anc_page, uchar *anc_buff, my_off_t leaf_page,
+ uchar *leaf_buff, MARIA_PINNED_PAGE *leaf_page_link,
+ uchar *keypos, my_off_t next_block, uchar *ret_key);
+static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ my_off_t anc_page, uchar *anc_buff,
+ my_off_t leaf_page, uchar *leaf_buff,
+ MARIA_PINNED_PAGE *leaf_page_link, uchar *keypos);
+static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,uchar *keypos,
+ uchar *lastkey,uchar *page_end,
+ my_off_t *next_block, MARIA_KEY_PARAM *s_temp);
+static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint changed_length,
+ uint move_length);
+
+/* @breif Remove a row from a MARIA table */
+
+int maria_delete(MARIA_HA *info,const uchar *record)
+{
+ uint i;
+ uchar *old_key;
+ int save_errno;
+ char lastpos[8];
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_delete");
+
+ /* Test if record is in datafile */
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage",
+ maria_print_error(share, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
+ DBUG_EXECUTE_IF("my_error_test_undefined_error",
+ maria_print_error(share, INT_MAX);
+ DBUG_RETURN(my_errno= INT_MAX););
+ if (!(info->update & HA_STATE_AKTIV))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No database read */
+ }
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+ if ((*share->compare_record)(info,record))
+ goto err; /* Error on read-check */
+
+ if (_ma_mark_file_changed(info))
+ goto err;
+
+ /* Remove all keys from the index file */
+
+ old_key= info->lastkey2;
+ for (i=0 ; i < share->base.keys ; i++ )
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ share->keyinfo[i].version++;
+ if (share->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_del(info, i, old_key, record, info->cur_row.lastpos))
+ goto err;
+ }
+ else
+ {
+ if (share->keyinfo[i].ck_delete(info,i,old_key,
+ _ma_make_key(info, i, old_key,
+ record,
+ info->cur_row.lastpos)))
+ goto err;
+ }
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+ }
+ }
+
+ if (share->calc_checksum)
+ {
+ /*
+ We can't use the row based checksum as this doesn't have enough
+ precision.
+ */
+ info->cur_row.checksum= (*share->calc_checksum)(info, record);
+ }
+
+ if ((*share->delete_record)(info, record))
+ goto err; /* Remove record from database */
+
+ info->state->checksum+= - !share->now_transactional *
+ info->cur_row.checksum;
+ info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED;
+ info->state->records-= !share->now_transactional;
+ share->state.changed|= STATE_NOT_OPTIMIZED_ROWS;
+
+ mi_sizestore(lastpos, info->cur_row.lastpos);
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ if (info->invalidator != 0)
+ {
+ DBUG_PRINT("info", ("invalidator... '%s' (delete)", share->open_file_name));
+ (*info->invalidator)(share->open_file_name);
+ info->invalidator=0;
+ }
+ DBUG_RETURN(0);
+
+err:
+ save_errno=my_errno;
+ mi_sizestore(lastpos, info->cur_row.lastpos);
+ if (save_errno != HA_ERR_RECORD_CHANGED)
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* mark table crashed */
+ }
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ info->update|=HA_STATE_WRITTEN; /* Buffer changed */
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ my_errno=save_errno;
+ if (save_errno == HA_ERR_KEY_NOT_FOUND)
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ }
+
+ DBUG_RETURN(my_errno);
+} /* maria_delete */
+
+
+/* Remove a key from the btree index */
+
+int _ma_ck_delete(register MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length)
+{
+ MARIA_SHARE *share= info->s;
+ int res;
+ LSN lsn= LSN_IMPOSSIBLE;
+ my_off_t new_root= share->state.key_root[keynr];
+ uchar key_buff[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("_ma_ck_delete");
+
+ if (share->now_transactional)
+ {
+ /* Save original value as the key may change */
+ memcpy(key_buff, key, key_length + share->rec_reflength);
+ }
+
+ res= _ma_ck_real_delete(info, share->keyinfo+keynr, key, key_length,
+ &new_root);
+
+ if (!res && share->now_transactional)
+ {
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ KEY_NR_STORE_SIZE + PAGE_STORE_SIZE], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ struct st_msg_to_write_hook_for_undo_key msg;
+ enum translog_record_type log_type= LOGREC_UNDO_KEY_DELETE;
+
+ info->key_delete_undo_lsn[keynr]= info->trn->undo_lsn;
+ lsn_store(log_data, info->trn->undo_lsn);
+ key_nr_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, keynr);
+ log_pos= log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE;
+
+ if (new_root != share->state.key_root[keynr])
+ {
+ my_off_t page;
+ page= ((new_root == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
+ new_root / share->block_size);
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+ log_type= LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
+ }
+
+ /*
+ Note that for delete key, we don't log the reference to the record.
+ This is because the row may be inserted at a different place when
+ we exceute the undo
+ */
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_buff;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
+
+ msg.root= &share->state.key_root[keynr];
+ msg.value= new_root;
+
+ if (translog_write_record(&lsn, log_type,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length +
+ key_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data + LSN_STORE_SIZE, &msg))
+ res= -1;
+ }
+ else
+ {
+ share->state.key_root[keynr]= new_root;
+ _ma_fast_unlock_key_del(info);
+ }
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+} /* _ma_ck_delete */
+
+
+int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length, my_off_t *root)
+{
+ int error;
+ uint nod_flag;
+ my_off_t old_root;
+ uchar *root_buff;
+ MARIA_PINNED_PAGE *page_link;
+ DBUG_ENTER("_ma_ck_real_delete");
+
+ if ((old_root=*root) == HA_OFFSET_ERROR)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno=HA_ERR_CRASHED);
+ }
+ if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ {
+ DBUG_PRINT("error",("Couldn't allocate memory"));
+ DBUG_RETURN(my_errno=ENOMEM);
+ }
+ DBUG_PRINT("info",("root_page: %ld", (long) old_root));
+ if (!_ma_fetch_keypage(info, keyinfo, old_root,
+ PAGECACHE_LOCK_WRITE, DFLT_INIT_HITS, root_buff, 0,
+ &page_link))
+ {
+ error= -1;
+ goto err;
+ }
+ if ((error=d_search(info,keyinfo,
+ (keyinfo->flag & HA_FULLTEXT ?
+ SEARCH_FIND | SEARCH_UPDATE : SEARCH_SAME),
+ key, key_length, old_root, root_buff, page_link)) >0)
+ {
+ if (error == 2)
+ {
+ DBUG_PRINT("test",("Enlarging of root when deleting"));
+ error= _ma_enlarge_root(info,keyinfo,key,root);
+ }
+ else /* error == 1 */
+ {
+ uint used_length;
+ MARIA_SHARE *share= info->s;
+ _ma_get_used_and_nod(share, root_buff, used_length, nod_flag);
+ page_link->changed= 1;
+ if (used_length <= nod_flag + share->keypage_header + 1)
+ {
+ error=0;
+ if (nod_flag)
+ *root= _ma_kpos(nod_flag, root_buff +share->keypage_header +
+ nod_flag);
+ else
+ *root=HA_OFFSET_ERROR;
+ if (_ma_dispose(info, old_root, 0))
+ error= -1;
+ }
+ else
+ error= _ma_write_keypage(info,keyinfo, old_root,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, root_buff);
+ }
+ }
+err:
+ my_afree((uchar*) root_buff);
+ DBUG_PRINT("exit",("Return: %d",error));
+ DBUG_RETURN(error);
+} /* _ma_ck_real_delete */
+
+
+/**
+ @brief Remove key below key root
+
+ @param key Key to delete. Will contain new key if block was enlarged
+
+ @return
+ @retval 0 ok (anc_page is not changed)
+ @retval 1 If data on page is too small; In this case anc_buff is not saved
+ @retval 2 If data on page is too big
+ @retval -1 On errors
+*/
+
+static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uint comp_flag, uchar *key, uint key_length,
+ my_off_t anc_page, uchar *anc_buff,
+ MARIA_PINNED_PAGE *anc_page_link)
+{
+ int flag,ret_value,save_flag;
+ uint length,nod_flag,search_key_length;
+ my_bool last_key;
+ uchar *leaf_buff,*keypos;
+ my_off_t leaf_page,next_block;
+ uchar lastkey[HA_MAX_KEY_BUFF];
+ MARIA_PINNED_PAGE *leaf_page_link;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("d_search");
+ DBUG_DUMP("page",anc_buff,_ma_get_page_used(share, anc_buff));
+
+ search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY;
+ flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, search_key_length,
+ comp_flag, &keypos, lastkey, &last_key);
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ {
+ DBUG_PRINT("error",("Found wrong key"));
+ DBUG_RETURN(-1);
+ }
+ nod_flag= _ma_test_if_nod(share, anc_buff);
+
+ if (!flag && (keyinfo->flag & HA_FULLTEXT))
+ {
+ uint off;
+ int subkeys;
+
+ get_key_full_length_rdonly(off, lastkey);
+ subkeys=ft_sintXkorr(lastkey+off);
+ DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0);
+ comp_flag=SEARCH_SAME;
+ if (subkeys >= 0)
+ {
+ /* normal word, one-level tree structure */
+ if (info->ft1_to_ft2)
+ {
+ /* we're in ft1->ft2 conversion mode. Saving key data */
+ insert_dynamic(info->ft1_to_ft2, (lastkey+off));
+ }
+ else
+ {
+ /* we need exact match only if not in ft1->ft2 conversion mode */
+ flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY,
+ comp_flag, &keypos, lastkey, &last_key);
+ }
+ /* fall through to normal delete */
+ }
+ else
+ {
+ /* popular word. two-level tree. going down */
+ uint tmp_key_length;
+ my_off_t root;
+ uchar *kpos=keypos;
+
+ if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,
+ lastkey)))
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ DBUG_RETURN(-1);
+ }
+ root= _ma_dpos(info,nod_flag,kpos);
+ if (subkeys == -1)
+ {
+ /* the last entry in sub-tree */
+ if (_ma_dispose(info, root, 1))
+ DBUG_RETURN(-1);
+ /* fall through to normal delete */
+ }
+ else
+ {
+ keyinfo=&share->ft2_keyinfo;
+ /* we'll modify key entry 'in vivo' */
+ kpos-=keyinfo->keylength+nod_flag;
+ get_key_full_length_rdonly(off, key);
+ key+=off;
+ ret_value= _ma_ck_real_delete(info, &share->ft2_keyinfo,
+ key, HA_FT_WLEN, &root);
+ _ma_dpointer(info, kpos+HA_FT_WLEN, root);
+ subkeys++;
+ ft_intXstore(kpos, subkeys);
+ if (!ret_value)
+ {
+ anc_page_link->changed= 1;
+ ret_value= _ma_write_keypage(info, keyinfo, anc_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, anc_buff);
+ }
+ DBUG_PRINT("exit",("Return: %d",ret_value));
+ DBUG_RETURN(ret_value);
+ }
+ }
+ }
+ leaf_buff=0;
+ LINT_INIT(leaf_page);
+ if (nod_flag)
+ {
+ /* Read left child page */
+ leaf_page= _ma_kpos(nod_flag,keypos);
+ if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ {
+ DBUG_PRINT("error", ("Couldn't allocate memory"));
+ my_errno=ENOMEM;
+ DBUG_RETURN(-1);
+ }
+ if (!_ma_fetch_keypage(info,keyinfo,leaf_page,
+ PAGECACHE_LOCK_WRITE, DFLT_INIT_HITS, leaf_buff,
+ 0, &leaf_page_link))
+ goto err;
+ }
+
+ if (flag != 0)
+ {
+ if (!nod_flag)
+ {
+ DBUG_PRINT("error",("Didn't find key"));
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED; /* This should newer happend */
+ goto err;
+ }
+ save_flag=0;
+ ret_value=d_search(info, keyinfo, comp_flag, key, key_length,
+ leaf_page, leaf_buff, leaf_page_link);
+ }
+ else
+ { /* Found key */
+ uint tmp;
+ uint anc_buff_length= _ma_get_page_used(share, anc_buff);
+ if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,
+ anc_buff + anc_buff_length,
+ &next_block, &s_temp)))
+ goto err;
+
+ anc_page_link->changed= 1;
+ anc_buff_length-= tmp;
+ _ma_store_page_used(share, anc_buff, anc_buff_length);
+
+ /*
+ Log initial changes on pages
+ If there is an underflow, there will be more changes logged to the
+ page
+ */
+ if (share->now_transactional &&
+ _ma_log_delete(info, anc_page, anc_buff, s_temp.key_pos,
+ s_temp.changed_length, s_temp.move_length))
+ DBUG_RETURN(-1);
+
+ if (!nod_flag)
+ { /* On leaf page */
+ if (anc_buff_length <= (info->quick_mode ?
+ MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length))
+ {
+ /* Page will be written by caller if we return 1 */
+ DBUG_RETURN(1);
+ }
+ if (_ma_write_keypage(info, keyinfo, anc_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ anc_buff))
+ DBUG_RETURN(-1);
+ DBUG_RETURN(0);
+ }
+ save_flag=1; /* Mark that anc_buff is changed */
+ ret_value= del(info, keyinfo, key, anc_page, anc_buff,
+ leaf_page, leaf_buff, leaf_page_link,
+ keypos, next_block, lastkey);
+ }
+ if (ret_value >0)
+ {
+ save_flag=1;
+ if (ret_value == 1)
+ ret_value= underflow(info, keyinfo, anc_page, anc_buff,
+ leaf_page, leaf_buff, leaf_page_link, keypos);
+ else
+ { /* This happens only with packed keys */
+ DBUG_PRINT("test",("Enlarging of key when deleting"));
+ if (!_ma_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length))
+ goto err;
+ ret_value= _ma_insert(info, keyinfo, key, anc_buff, keypos, anc_page,
+ lastkey, (my_off_t) 0, (uchar*) 0,
+ (MARIA_PINNED_PAGE*) 0, (uchar*) 0, (my_bool) 0);
+ }
+ }
+ if (ret_value == 0 && _ma_get_page_used(share, anc_buff) >
+ (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
+ {
+ /* parent buffer got too big ; We have to split the page */
+ save_flag=1;
+ ret_value= _ma_split_page(info, keyinfo, key, anc_page, anc_buff,
+ (uint) (keyinfo->block_length -
+ KEYPAGE_CHECKSUM_SIZE),
+ (uchar*) 0, 0, 0, lastkey, 0) | 2;
+ }
+ if (save_flag && ret_value != 1)
+ {
+ anc_page_link->changed= 1;
+ ret_value|= _ma_write_keypage(info, keyinfo, anc_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, anc_buff);
+ }
+ else
+ {
+ DBUG_DUMP("page", anc_buff, _ma_get_page_used(share, anc_buff));
+ }
+ my_afree(leaf_buff);
+ DBUG_PRINT("exit",("Return: %d",ret_value));
+ DBUG_RETURN(ret_value);
+
+err:
+ my_afree(leaf_buff);
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ DBUG_RETURN (-1);
+} /* d_search */
+
+
+/**
+ @brief Remove a key that has a page-reference
+
+ @param info Maria handler
+ @param key Buffer for key to be inserted at upper level
+ @param anc_page Page address for page where deleted key was
+ @param anc_buff Page buffer (nod) where deleted key was
+ @param leaf_page Page address for nod before the deleted key
+ @param leaf_buff Buffer for leaf_page
+ @param leaf_buff_link Pinned page link for leaf_buff
+ @param keypos Pos to where deleted key was on anc_buff
+ @param next_block Page adress for nod after deleted key
+ @param ret_key Key before keypos in anc_buff
+
+ @notes
+ leaf_buff must be written to disk if retval > 0
+ anc_buff is not updated on disk. Caller should do this
+
+ @return
+ @retval < 0 Error
+ @retval 0 OK. leaf_buff is written to disk
+
+ @retval 1 key contains key to upper level (from balance page)
+ leaf_buff has underflow
+ @retval 2 key contains key to upper level (from split space)
+*/
+
+static int del(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, my_off_t anc_page, uchar *anc_buff,
+ my_off_t leaf_page, uchar *leaf_buff,
+ MARIA_PINNED_PAGE *leaf_page_link,
+ uchar *keypos, my_off_t next_block, uchar *ret_key)
+{
+ int ret_value,length;
+ uint a_length, nod_flag, leaf_length, new_leaf_length, tmp;
+ my_off_t next_page;
+ uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
+ MARIA_SHARE *share= info->s;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_PINNED_PAGE *next_page_link;
+ DBUG_ENTER("del");
+ DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page,
+ (ulong) keypos));
+
+ _ma_get_used_and_nod(share, leaf_buff, leaf_length, nod_flag);
+ DBUG_DUMP("leaf_buff", leaf_buff, leaf_length);
+
+ endpos= leaf_buff + leaf_length;
+ if (!(key_start= _ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos,
+ &tmp)))
+ DBUG_RETURN(-1);
+
+ if (nod_flag)
+ {
+ next_page= _ma_kpos(nod_flag,endpos);
+ if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ DBUG_RETURN(-1);
+ if (!_ma_fetch_keypage(info, keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, next_buff, 0, &next_page_link))
+ ret_value= -1;
+ else
+ {
+ DBUG_DUMP("next_page", next_buff, _ma_get_page_used(share, next_buff));
+ if ((ret_value= del(info, keyinfo, key, anc_page, anc_buff, next_page,
+ next_buff, next_page_link, keypos, next_block,
+ ret_key)) >0)
+ {
+ /* Get new length after key was deleted */
+ endpos=leaf_buff+_ma_get_page_used(share, leaf_buff);
+ if (ret_value == 1)
+ {
+ ret_value= underflow(info, keyinfo, leaf_page, leaf_buff, next_page,
+ next_buff, next_page_link, endpos);
+ if (ret_value == 0 &&
+ _ma_get_page_used(share, leaf_buff) >
+ (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
+ {
+ ret_value= (_ma_split_page(info, keyinfo, key,
+ leaf_page, leaf_buff,
+ (uint) (keyinfo->block_length -
+ KEYPAGE_CHECKSUM_SIZE),
+ (uchar*) 0, 0, 0,
+ ret_key, 0) | 2);
+ }
+ }
+ else
+ {
+ DBUG_PRINT("test",("Inserting of key when deleting"));
+ if (!_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos,
+ &tmp))
+ goto err;
+ ret_value= _ma_insert(info, keyinfo, key, leaf_buff, endpos,
+ leaf_page, keybuff, (my_off_t) 0, (uchar*) 0,
+ (MARIA_PINNED_PAGE *) 0, (uchar*) 0, 0);
+ }
+ }
+ leaf_page_link->changed= 1;
+ /*
+ If ret_value <> 0, then leaf_page underflowed and caller will have
+ to handle underflow and write leaf_page to disk.
+ We can't write it here, as if leaf_page is empty we get an assert
+ in _ma_write_keypage.
+ */
+ if (ret_value == 0 && _ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, leaf_buff))
+ goto err;
+ }
+ my_afree(next_buff);
+ DBUG_RETURN(ret_value);
+ }
+
+ /*
+ Remove last key from leaf page
+ Note that leaf_page page may only have had one key (can normally only
+ happen in quick mode), in which ase it will now temporary have 0 keys
+ on it. This will be corrected by the caller as we will return 0.
+ */
+ new_leaf_length= (uint) (key_start - leaf_buff);
+ _ma_store_page_used(share, leaf_buff, new_leaf_length);
+
+ if (share->now_transactional &&
+ _ma_log_suffix(info, leaf_page, leaf_buff, leaf_length,
+ new_leaf_length))
+ goto err;
+
+ if (new_leaf_length <= (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length))
+ {
+ /* Underflow, leaf_page will be written by caller */
+ ret_value= 1;
+ leaf_page_link->changed= 1; /* Safety */
+ }
+ else
+ {
+ ret_value= 0;
+ if (_ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ leaf_buff))
+ goto err;
+ }
+
+ /* Place last key in ancestor page on deleted key position */
+
+ a_length= _ma_get_page_used(share, anc_buff);
+ endpos=anc_buff+a_length;
+ if (keypos != anc_buff+share->keypage_header + share->base.key_reflength &&
+ !_ma_get_last_key(info,keyinfo,anc_buff,ret_key,keypos,&tmp))
+ goto err;
+ prev_key= (keypos == anc_buff + share->keypage_header +
+ share->base.key_reflength ? 0 : ret_key);
+ length=(*keyinfo->pack_key)(keyinfo,share->base.key_reflength,
+ keypos == endpos ? (uchar*) 0 : keypos,
+ prev_key, prev_key,
+ keybuff,&s_temp);
+ if (length > 0)
+ bmove_upp(endpos+length,endpos,(uint) (endpos-keypos));
+ else
+ bmove(keypos,keypos-length, (int) (endpos-keypos)+length);
+ (*keyinfo->store_key)(keyinfo,keypos,&s_temp);
+ key_start= keypos;
+
+ /* Save pointer to next leaf */
+ if (!(*keyinfo->get_key)(keyinfo,share->base.key_reflength,&keypos,ret_key))
+ goto err;
+ _ma_kpointer(info,keypos - share->base.key_reflength,next_block);
+ _ma_store_page_used(share, anc_buff, a_length + length);
+
+ if (share->now_transactional &&
+ _ma_log_add(info, anc_page, anc_buff, a_length,
+ key_start, s_temp.changed_length, s_temp.move_length, 1))
+ goto err;
+
+ DBUG_RETURN(new_leaf_length <=
+ (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length));
+err:
+ DBUG_RETURN(-1);
+} /* del */
+
+
+/**
+ @brief Balances adjacent pages if underflow occours
+
+ @fn underflow()
+ @param anc_buff Anchestor page data
+ @param leaf_page Page number of leaf page
+ @param leaf_buff Leaf page (page that underflowed)
+ @param leaf_page_link Pointer to pin information about leaf page
+ @param keypos Position after current key in anc_buff
+
+ @note
+ This function writes redo entries for all changes
+ leaf_page is saved to disk
+ Caller must save anc_buff
+
+ @return
+ @retval 0 ok
+ @retval 1 ok, but anc_buff did underflow
+ @retval -1 error
+ */
+
+static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ my_off_t anc_page, uchar *anc_buff,
+ my_off_t leaf_page, uchar *leaf_buff,
+ MARIA_PINNED_PAGE *leaf_page_link,
+ uchar *keypos)
+{
+ int t_length;
+ uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag;
+ uint next_buff_length, new_buff_length, key_reflength, key_length;
+ uint unchanged_leaf_length, new_leaf_length, new_anc_length;
+ my_off_t next_page;
+ uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF];
+ uchar *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*prev_key;
+ uchar *after_key, *anc_end_pos;
+ MARIA_KEY_PARAM key_deleted, key_inserted;
+ MARIA_SHARE *share= info->s;
+ MARIA_PINNED_PAGE *next_page_link;
+ my_bool first_key;
+ DBUG_ENTER("underflow");
+ DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page,
+ (ulong) keypos));
+ DBUG_DUMP("anc_buff", anc_buff, _ma_get_page_used(share, anc_buff));
+ DBUG_DUMP("leaf_buff", leaf_buff, _ma_get_page_used(share, leaf_buff));
+
+ buff=info->buff;
+ info->keyread_buff_used=1;
+ next_keypos=keypos;
+ nod_flag= _ma_test_if_nod(share, leaf_buff);
+ p_length= nod_flag+share->keypage_header;
+ anc_length= _ma_get_page_used(share, anc_buff);
+ leaf_length= _ma_get_page_used(share, leaf_buff);
+ key_reflength=share->base.key_reflength;
+ if (share->keyinfo+info->lastinx == keyinfo)
+ info->page_changed=1;
+ first_key= keypos == anc_buff + share->keypage_header + key_reflength;
+
+ if ((keypos < anc_buff + anc_length && (info->state->records & 1)) ||
+ first_key)
+ {
+ /* Use page right of anc-page */
+ DBUG_PRINT("test",("use right page"));
+
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ {
+ if (!(next_keypos= _ma_get_key(info, keyinfo,
+ anc_buff, buff, keypos, &length)))
+ goto err;
+ }
+ else
+ {
+ /* Got to end of found key */
+ buff[0]=buff[1]=0; /* Avoid length error check if packed key */
+ if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos,
+ buff))
+ goto err;
+ }
+ next_page= _ma_kpos(key_reflength,next_keypos);
+ if (!_ma_fetch_keypage(info,keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, buff, 0, &next_page_link))
+ goto err;
+ next_buff_length= _ma_get_page_used(share, buff);
+ DBUG_DUMP("next", buff, next_buff_length);
+
+ /* find keys to make a big key-page */
+ bmove(next_keypos-key_reflength, buff + share->keypage_header,
+ key_reflength);
+ if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_key,next_keypos,&length) ||
+ !_ma_get_last_key(info,keyinfo,leaf_buff,leaf_key,
+ leaf_buff+leaf_length,&length))
+ goto err;
+
+ /* merge pages and put parting key from anc_buff between */
+ prev_key=(leaf_length == p_length ? (uchar*) 0 : leaf_key);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,buff+p_length,
+ prev_key, prev_key,
+ anc_key, &key_inserted);
+ length= next_buff_length - p_length;
+ endpos=buff+length+leaf_length+t_length;
+ /* buff will always be larger than before !*/
+ bmove_upp(endpos, buff + next_buff_length, length);
+ memcpy(buff, leaf_buff,(size_t) leaf_length);
+ (*keyinfo->store_key)(keyinfo, buff+leaf_length, &key_inserted);
+ buff_length= (uint) (endpos-buff);
+ _ma_store_page_used(share, buff, buff_length);
+
+ /* remove key from anc_buff */
+ if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key,
+ anc_buff+anc_length,(my_off_t *) 0,
+ &key_deleted)))
+ goto err;
+
+ new_anc_length= anc_length - s_length;
+ _ma_store_page_used(share, anc_buff, new_anc_length);
+
+ if (buff_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
+ {
+ /* All keys fitted into one page */
+ next_page_link->changed= 1;
+ if (_ma_dispose(info, next_page, 0))
+ goto err;
+
+ memcpy(leaf_buff, buff, (size_t) buff_length);
+
+ if (share->now_transactional)
+ {
+ /* Log changes to parent page */
+ if (_ma_log_delete(info, anc_page, anc_buff, key_deleted.key_pos,
+ key_deleted.changed_length,
+ key_deleted.move_length))
+ goto err;
+ /*
+ Log changes to leaf page. Data for leaf page is in buff
+ which contains original leaf_buff, parting key and next_buff
+ */
+ if (_ma_log_suffix(info, leaf_page, leaf_buff,
+ leaf_length, buff_length))
+ goto err;
+ }
+ }
+ else
+ {
+ /*
+ Balancing didn't free a page, so we have to split 'buff' into two
+ pages:
+ - Find key in middle of buffer
+ - Store everything before key in 'leaf_buff'
+ - Pack key into anc_buff at position of deleted key
+ Note that anc_buff may overflow! (is handled by caller)
+ - Store remaining keys in next_page (buff)
+ */
+ MARIA_KEY_PARAM anc_key_inserted;
+
+ anc_end_pos= anc_buff + new_anc_length;
+ DBUG_PRINT("test",("anc_buff: 0x%lx anc_end_pos: 0x%lx",
+ (long) anc_buff, (long) anc_end_pos));
+ if (!first_key &&
+ !_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length))
+ goto err;
+ if (!(half_pos= _ma_find_half_pos(info, nod_flag, keyinfo, buff,
+ leaf_key, &key_length, &after_key)))
+ goto err;
+ new_leaf_length= (uint) (half_pos-buff);
+ memcpy(leaf_buff, buff, (size_t) new_leaf_length);
+ _ma_store_page_used(share, leaf_buff, new_leaf_length);
+
+ /* Correct new keypointer to leaf_page */
+ half_pos=after_key;
+ _ma_kpointer(info,leaf_key+key_length,next_page);
+
+ /* Save key in anc_buff */
+ prev_key= (first_key ? (uchar*) 0 : anc_key);
+ t_length=(*keyinfo->pack_key)(keyinfo,key_reflength,
+ (keypos == anc_end_pos ? (uchar*) 0 :
+ keypos),
+ prev_key, prev_key,
+ leaf_key, &anc_key_inserted);
+ if (t_length >= 0)
+ bmove_upp(anc_end_pos+t_length, anc_end_pos,
+ (uint) (anc_end_pos - keypos));
+ else
+ bmove(keypos,keypos-t_length,(uint) (anc_end_pos-keypos)+t_length);
+ (*keyinfo->store_key)(keyinfo,keypos, &anc_key_inserted);
+ new_anc_length+= t_length;
+ _ma_store_page_used(share, anc_buff, new_anc_length);
+
+ /* Store key first in new page */
+ if (nod_flag)
+ bmove(buff+share->keypage_header, half_pos-nod_flag,
+ (size_t) nod_flag);
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key))
+ goto err;
+ t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (uchar*) 0,
+ (uchar*) 0, (uchar*) 0,
+ leaf_key, &key_inserted);
+ /* t_length will always be > 0 for a new page !*/
+ length= (uint) ((buff + buff_length) - half_pos);
+ bmove(buff+p_length+t_length, half_pos, (size_t) length);
+ (*keyinfo->store_key)(keyinfo,buff+p_length, &key_inserted);
+ new_buff_length= length + t_length + p_length;
+ _ma_store_page_used(share, buff, new_buff_length);
+
+ if (share->now_transactional)
+ {
+ /*
+ Log changes to parent page
+ This has one key deleted from it and one key inserted to it at
+ keypos
+
+ ma_log_add ensures that we don't log changes that is outside of
+ key block size, as the REDO code can't handle that
+ */
+ if (_ma_log_add(info, anc_page, anc_buff, anc_length,
+ keypos,
+ anc_key_inserted.move_length +
+ max(anc_key_inserted.changed_length -
+ anc_key_inserted.move_length,
+ key_deleted.changed_length),
+ anc_key_inserted.move_length -
+ key_deleted.move_length, 1))
+ goto err;
+
+ /*
+ Log changes to leaf page.
+ This contains original data with new data added at end
+ */
+ DBUG_ASSERT(leaf_length <= new_leaf_length);
+ if (_ma_log_suffix(info, leaf_page, leaf_buff, leaf_length,
+ new_leaf_length))
+ goto err;
+ /*
+ Log changes to next page
+
+ This contains original data with some prefix data deleted and
+ some compressed data at start possible extended
+
+ Data in buff was originally:
+ org_leaf_buff [leaf_length]
+ separator_key [buff_key_inserted.move_length]
+ next_key_changes [buff_key_inserted.changed_length -move_length]
+ next_page_data [next_buff_length - p_length -
+ (buff_key_inserted.changed_length -move_length)]
+
+ After changes it's now:
+ unpacked_key [key_inserted.changed_length]
+ next_suffix [next_buff_length - key_inserted.changed_length]
+
+ */
+ DBUG_ASSERT(new_buff_length <= next_buff_length);
+ if (_ma_log_prefix(info, next_page, buff,
+ key_inserted.changed_length,
+ (int) (new_buff_length - next_buff_length)))
+ goto err;
+ }
+ next_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, next_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ buff))
+ goto err;
+ }
+
+ leaf_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ leaf_buff))
+ goto err;
+ DBUG_RETURN(new_anc_length <=
+ ((info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length)));
+ }
+
+ DBUG_PRINT("test",("use left page"));
+
+ keypos= _ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length);
+ if (!keypos)
+ goto err;
+ next_page= _ma_kpos(key_reflength,keypos);
+ if (!_ma_fetch_keypage(info, keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, buff, 0, &next_page_link))
+ goto err;
+ buff_length= _ma_get_page_used(share, buff);
+ endpos= buff + buff_length;
+ DBUG_DUMP("prev",buff,buff_length);
+
+ /* find keys to make a big key-page */
+ bmove(next_keypos - key_reflength, leaf_buff + share->keypage_header,
+ key_reflength);
+ next_keypos=keypos;
+ if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos,
+ anc_key))
+ goto err;
+ if (!_ma_get_last_key(info,keyinfo,buff,leaf_key,endpos,&length))
+ goto err;
+
+ /* merge pages and put parting key from anc_buff between */
+ prev_key=(leaf_length == p_length ? (uchar*) 0 : leaf_key);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
+ (leaf_length == p_length ?
+ (uchar*) 0 : leaf_buff+p_length),
+ prev_key, prev_key,
+ anc_key, &key_inserted);
+ if (t_length >= 0)
+ bmove(endpos+t_length, leaf_buff+p_length,
+ (size_t) (leaf_length-p_length));
+ else /* We gained space */
+ bmove(endpos,leaf_buff+((int) p_length-t_length),
+ (size_t) (leaf_length-p_length+t_length));
+ (*keyinfo->store_key)(keyinfo,endpos, &key_inserted);
+
+ /* Remember for logging how many bytes of leaf_buff that are not changed */
+ DBUG_ASSERT((int) key_inserted.changed_length >= key_inserted.move_length);
+ unchanged_leaf_length= leaf_length - (key_inserted.changed_length -
+ key_inserted.move_length);
+
+ new_buff_length= buff_length + leaf_length - p_length + t_length;
+ _ma_store_page_used(share, buff, new_buff_length);
+
+ /* remove key from anc_buff */
+ if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key,
+ anc_buff+anc_length,(my_off_t *) 0,
+ &key_deleted)))
+ goto err;
+
+ new_anc_length= anc_length - s_length;
+ _ma_store_page_used(share, anc_buff, new_anc_length);
+
+ if (new_buff_length <= (uint) (keyinfo->block_length -
+ KEYPAGE_CHECKSUM_SIZE))
+ {
+ /* All keys fitted into one page */
+ leaf_page_link->changed= 1;
+ if (_ma_dispose(info, leaf_page, 0))
+ goto err;
+
+ if (share->now_transactional)
+ {
+ /* Log changes to parent page */
+ if (_ma_log_delete(info, anc_page, anc_buff, key_deleted.key_pos,
+ key_deleted.changed_length, key_deleted.move_length))
+
+ goto err;
+ /*
+ Log changes to next page. Data for leaf page is in buff
+ that contains original leaf_buff, parting key and next_buff
+ */
+ if (_ma_log_suffix(info, next_page, buff,
+ buff_length, new_buff_length))
+ goto err;
+ }
+ }
+ else
+ {
+ /*
+ Balancing didn't free a page, so we have to split 'buff' into two
+ pages
+ - Find key in middle of buffer (buff)
+ - Pack key at half_buff into anc_buff at position of deleted key
+ Note that anc_buff may overflow! (is handled by caller)
+ - Move everything after middlekey to 'leaf_buff'
+ - Shorten buff at 'endpos'
+ */
+ MARIA_KEY_PARAM anc_key_inserted;
+
+ if (first_key)
+ anc_pos= 0; /* First key */
+ else if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_pos=anc_key,keypos,
+ &length))
+ goto err;
+ if (!(endpos= _ma_find_half_pos(info, nod_flag, keyinfo, buff, leaf_key,
+ &key_length, &half_pos)))
+ goto err;
+
+ /* Correct new keypointer to leaf_page */
+ _ma_kpointer(info,leaf_key+key_length,leaf_page);
+
+ /* Save key in anc_buff */
+ DBUG_DUMP("anc_buff", anc_buff, new_anc_length);
+ DBUG_DUMP("key_to_anc",leaf_key,key_length);
+ anc_end_pos= anc_buff + new_anc_length;
+ t_length=(*keyinfo->pack_key)(keyinfo,key_reflength,
+ keypos == anc_end_pos ? (uchar*) 0
+ : keypos,
+ anc_pos, anc_pos,
+ leaf_key, &anc_key_inserted);
+ if (t_length >= 0)
+ bmove_upp(anc_end_pos+t_length, anc_end_pos,
+ (uint) (anc_end_pos-keypos));
+ else
+ bmove(keypos,keypos-t_length,(uint) (anc_end_pos-keypos)+t_length);
+ (*keyinfo->store_key)(keyinfo,keypos, &anc_key_inserted);
+ new_anc_length+= t_length;
+ _ma_store_page_used(share, anc_buff, new_anc_length);
+
+ /* Store first key on new page */
+ if (nod_flag)
+ bmove(leaf_buff + share->keypage_header, half_pos-nod_flag,
+ (size_t) nod_flag);
+ if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key)))
+ goto err;
+ DBUG_DUMP("key_to_leaf",leaf_key,length);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (uchar*) 0,
+ (uchar*) 0, (uchar*) 0, leaf_key,
+ &key_inserted);
+ /* t_length will always be > 0 for a new page !*/
+ length= (uint) ((buff + new_buff_length) - half_pos);
+ DBUG_PRINT("info",("t_length: %d length: %d",t_length,(int) length));
+ bmove(leaf_buff+p_length+t_length, half_pos, (size_t) length);
+ (*keyinfo->store_key)(keyinfo,leaf_buff+p_length, &key_inserted);
+ new_leaf_length= length + t_length + p_length;
+ _ma_store_page_used(share, leaf_buff, new_leaf_length);
+ new_buff_length= (uint) (endpos - buff);
+ _ma_store_page_used(share, buff, new_buff_length);
+
+ if (share->now_transactional)
+ {
+ /*
+ Log changes to parent page
+ This has one key deleted from it and one key inserted to it at
+ keypos
+
+ ma_log_add() ensures that we don't log changes that is outside of
+ key block size, as the REDO code can't handle that
+ */
+ if (_ma_log_add(info, anc_page, anc_buff, anc_length,
+ keypos,
+ anc_key_inserted.move_length +
+ max(anc_key_inserted.changed_length -
+ anc_key_inserted.move_length,
+ key_deleted.changed_length),
+ anc_key_inserted.move_length -
+ key_deleted.move_length, 1))
+ goto err;
+
+ /*
+ Log changes to leaf page.
+ This contains original data with new data added first
+ */
+ DBUG_ASSERT(leaf_length <= new_leaf_length);
+ if (_ma_log_prefix(info, leaf_page, leaf_buff,
+ new_leaf_length - unchanged_leaf_length,
+ (int) (new_leaf_length - leaf_length)))
+ goto err;
+ /*
+ Log changes to next page
+ This contains original data with some suffix data deleted
+
+ */
+ DBUG_ASSERT(new_buff_length <= buff_length);
+ if (_ma_log_suffix(info, next_page, buff,
+ buff_length, new_buff_length))
+ goto err;
+ }
+
+ leaf_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ leaf_buff))
+ goto err;
+ }
+ next_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, next_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS, buff))
+ goto err;
+
+ DBUG_RETURN(new_anc_length <=
+ ((info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length)));
+
+err:
+ DBUG_RETURN(-1);
+} /* underflow */
+
+
+/**
+ @brief Remove a key from page
+
+ @fn remove_key()
+ keyinfo Key handle
+ keypos Where on page key starts
+ lastkey Unpacked version of key to be removed
+ page_end Pointer to end of page
+ next_block If <> 0 and node-page, this is set to address of
+ next page
+ s_temp Information about what changes was done one the page:
+ s_temp.key_pos Start of key
+ s_temp.move_length Number of bytes removed at keypos
+ s_temp.changed_length Number of bytes changed at keypos
+
+ @todo
+ The current code doesn't handle the case that the next key may be
+ packed better against the previous key if there is a case difference
+
+ @return
+ @retval 0 error
+ @retval # How many chars was removed
+*/
+
+static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar *keypos, uchar *lastkey,
+ uchar *page_end, my_off_t *next_block,
+ MARIA_KEY_PARAM *s_temp)
+{
+ int s_length;
+ uchar *start;
+ DBUG_ENTER("remove_key");
+ DBUG_PRINT("enter", ("keypos: 0x%lx page_end: 0x%lx",
+ (long) keypos, (long) page_end));
+
+ start= s_temp->key_pos= keypos;
+ s_temp->changed_length= 0;
+ if (!(keyinfo->flag &
+ (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY |
+ HA_BINARY_PACK_KEY)))
+ {
+ s_length=(int) (keyinfo->keylength+nod_flag);
+ if (next_block && nod_flag)
+ *next_block= _ma_kpos(nod_flag,keypos+s_length);
+ }
+ else
+ { /* Let keypos point at next key */
+ /* Calculate length of key */
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey))
+ DBUG_RETURN(0); /* Error */
+
+ if (next_block && nod_flag)
+ *next_block= _ma_kpos(nod_flag,keypos);
+ s_length=(int) (keypos-start);
+ if (keypos != page_end)
+ {
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ {
+ uchar *old_key= start;
+ uint next_length,prev_length,prev_pack_length;
+
+ /* keypos points here on start of next key */
+ get_key_length(next_length,keypos);
+ get_key_pack_length(prev_length,prev_pack_length,old_key);
+ if (next_length > prev_length)
+ {
+ uint diff= (next_length-prev_length);
+ /* We have to copy data from the current key to the next key */
+ keypos-= diff + prev_pack_length;
+ store_key_length(keypos, prev_length);
+ bmove(keypos + prev_pack_length, lastkey + prev_length, diff);
+ s_length=(int) (keypos-start);
+ s_temp->changed_length= diff + prev_pack_length;
+ }
+ }
+ else
+ {
+ /* Check if a variable length first key part */
+ if ((keyinfo->seg->flag & HA_PACK_KEY) && *keypos & 128)
+ {
+ /* Next key is packed against the current one */
+ uint next_length,prev_length,prev_pack_length,lastkey_length,
+ rest_length;
+ if (keyinfo->seg[0].length >= 127)
+ {
+ if (!(prev_length=mi_uint2korr(start) & 32767))
+ goto end;
+ next_length=mi_uint2korr(keypos) & 32767;
+ keypos+=2;
+ prev_pack_length=2;
+ }
+ else
+ {
+ if (!(prev_length= *start & 127))
+ goto end; /* Same key as previous*/
+ next_length= *keypos & 127;
+ keypos++;
+ prev_pack_length=1;
+ }
+ if (!(*start & 128))
+ prev_length=0; /* prev key not packed */
+ if (keyinfo->seg[0].flag & HA_NULL_PART)
+ lastkey++; /* Skip null marker */
+ get_key_length(lastkey_length,lastkey);
+ if (!next_length) /* Same key after */
+ {
+ next_length=lastkey_length;
+ rest_length=0;
+ }
+ else
+ get_key_length(rest_length,keypos);
+
+ if (next_length >= prev_length)
+ {
+ /* Next key is based on deleted key */
+ uint pack_length;
+ uint diff= (next_length-prev_length);
+
+ /* keypos points to data of next key (after key length) */
+ bmove(keypos - diff, lastkey + prev_length, diff);
+ rest_length+= diff;
+ pack_length= prev_length ? get_pack_length(rest_length): 0;
+ keypos-= diff + pack_length + prev_pack_length;
+ s_length=(int) (keypos-start);
+ if (prev_length) /* Pack against prev key */
+ {
+ *keypos++= start[0];
+ if (prev_pack_length == 2)
+ *keypos++= start[1];
+ store_key_length(keypos,rest_length);
+ }
+ else
+ {
+ /* Next key is not packed anymore */
+ if (keyinfo->seg[0].flag & HA_NULL_PART)
+ {
+ rest_length++; /* Mark not null */
+ }
+ if (prev_pack_length == 2)
+ {
+ mi_int2store(keypos,rest_length);
+ }
+ else
+ *keypos= rest_length;
+ }
+ s_temp->changed_length= diff + pack_length + prev_pack_length;
+ }
+ }
+ }
+ }
+ }
+ end:
+ bmove(start, start+s_length, (uint) (page_end-start-s_length));
+ s_temp->move_length= s_length;
+ DBUG_RETURN((uint) s_length);
+} /* remove_key */
+
+
+/****************************************************************************
+ Logging of redos
+****************************************************************************/
+
+/**
+ @brief log entry where some parts are deleted and some things are changed
+
+ @fn _ma_log_delete()
+ @param info Maria handler
+ @param page Pageaddress for changed page
+ @param buff Page buffer
+ @param key_pos Start of change area
+ @param changed_length How many bytes where changed at key_pos
+ @param move_length How many bytes where deleted at key_pos
+
+*/
+
+static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint changed_length,
+ uint move_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 9 + 7], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3];
+ MARIA_SHARE *share= info->s;
+ uint translog_parts;
+ uint offset= (uint) (key_pos - buff);
+ DBUG_ENTER("_ma_log_delete");
+ DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d",
+ (ulong) page, changed_length, move_length));
+ DBUG_ASSERT(share->now_transactional && move_length);
+ DBUG_ASSERT(offset + changed_length <= _ma_get_page_used(share, buff));
+
+ /* Store address of new root page */
+ page/= share->block_size;
+ page_store(log_data + FILEID_STORE_SIZE, page);
+ log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE;
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos[3]= KEY_OP_SHIFT;
+ int2store(log_pos+4, -(int) move_length);
+ log_pos+= 6;
+ translog_parts= 1;
+ if (changed_length)
+ {
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, changed_length);
+ log_pos+= 3;
+ translog_parts= 2;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= buff + offset;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
+ }
+
+#ifdef EXTRA_DEBUG_KEY_CHANGES
+ {
+ int page_length= _ma_get_page_used(share, buff);
+ ha_checksum crc;
+ crc= my_checksum(0, buff + LSN_STORE_SIZE, page_length - LSN_STORE_SIZE);
+ log_pos[0]= KEY_OP_CHECK;
+ int2store(log_pos+1, page_length);
+ int4store(log_pos+3, crc);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7;
+ changed_length+= 7;
+ translog_parts++;
+ }
+#endif
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data);
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length +
+ changed_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c
new file mode 100644
index 00000000000..43bdd42588a
--- /dev/null
+++ b/storage/maria/ma_delete_all.c
@@ -0,0 +1,158 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Remove all rows from a MARIA table */
+/* This clears the status information and truncates files */
+
+#include "maria_def.h"
+#include "trnman.h"
+
+/**
+ @brief deletes all rows from a table
+
+ @param info Maria handler
+
+ @return Operation status
+ @retval 0 ok
+ @retval 1 error
+*/
+
+int maria_delete_all_rows(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+ my_bool log_record;
+ DBUG_ENTER("maria_delete_all_rows");
+
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ /**
+ @todo LOCK take X-lock on table here.
+ When we have versioning, if some other thread is looking at this table,
+ we cannot shrink the file like this.
+ */
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+ log_record= share->now_transactional && !share->temporary;
+ if (_ma_mark_file_changed(info))
+ goto err;
+
+ if (log_record)
+ {
+ /*
+ This record will be used by Recovery to finish the deletion if it
+ crashed. We force it because it's a non-undoable operation.
+ */
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ uchar log_data[FILEID_STORE_SIZE];
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DELETE_ALL,
+ info->trn, info, 0,
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, log_data, NULL) ||
+ translog_flush(lsn)))
+ goto err;
+ /*
+ If we fail in this function after this point, log and table will be
+ inconsistent.
+ */
+ }
+
+ /*
+ For recovery it matters that this is called after writing the log record,
+ so that resetting state.records and state.checksum actually happens under
+ log's mutex.
+ */
+ _ma_reset_status(info);
+
+ /*
+ If we are using delayed keys or if the user has done changes to the tables
+ since it was locked then there may be key blocks in the page cache. Or
+ there may be data blocks there. We need to throw them away or they may
+ re-enter the emptied table later.
+ */
+ if (_ma_flush_table_files(info, MARIA_FLUSH_DATA|MARIA_FLUSH_INDEX,
+ FLUSH_IGNORE_CHANGED, FLUSH_IGNORE_CHANGED) ||
+ my_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) ||
+ my_chsize(share->kfile.file, share->base.keystart, 0, MYF(MY_WME)) )
+ goto err;
+
+ if (_ma_initialize_data_file(share, info->dfile.file))
+ goto err;
+
+ /*
+ The operations above on the index/data file will be forced to disk at
+ Checkpoint or maria_close() time. So we can reset:
+ */
+ if (log_record)
+ info->trn->rec_lsn= LSN_IMPOSSIBLE;
+
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+#ifdef HAVE_MMAP
+ /* Resize mmaped area */
+ rw_wrlock(&info->s->mmap_lock);
+ _ma_remap_file(info, (my_off_t)0);
+ rw_unlock(&info->s->mmap_lock);
+#endif
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(0);
+
+err:
+ {
+ int save_errno=my_errno;
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ info->update|=HA_STATE_WRITTEN; /* Buffer changed */
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(my_errno=save_errno);
+ }
+} /* maria_delete_all_rows */
+
+
+/*
+ Reset status information
+
+ SYNOPSIS
+ _ma_reset_status()
+ maria Maria handler
+
+ DESCRIPTION
+ Resets data and index file information as if the file would be empty
+ Files are not touched.
+*/
+
+void _ma_reset_status(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_STATE_INFO *state= &share->state;
+ uint i;
+
+ info->state->records= info->state->del= state->split= 0;
+ state->changed= 0; /* File is optimized */
+ state->dellink= HA_OFFSET_ERROR;
+ state->sortkey= (ushort) ~0;
+ info->state->key_file_length= share->base.keystart;
+ info->state->data_file_length= 0;
+ info->state->empty= info->state->key_empty= 0;
+ info->state->checksum= 0;
+
+ /* Drop the delete key chain. */
+ state->key_del= HA_OFFSET_ERROR;
+ /* Clear all keys */
+ for (i=0 ; i < share->base.keys ; i++)
+ state->key_root[i]= HA_OFFSET_ERROR;
+}
diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c
new file mode 100644
index 00000000000..e6cbd961b7a
--- /dev/null
+++ b/storage/maria/ma_delete_table.c
@@ -0,0 +1,111 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "ma_fulltext.h"
+#include "trnman_public.h"
+
+/**
+ @brief drops (deletes) a table
+
+ @param name table's name
+
+ @return Operation status
+ @retval 0 ok
+ @retval 1 error
+*/
+
+int maria_delete_table(const char *name)
+{
+ char from[FN_REFLEN];
+#ifdef USE_RAID
+ uint raid_type=0,raid_chunks=0;
+#endif
+ MARIA_HA *info;
+ myf sync_dir;
+ DBUG_ENTER("maria_delete_table");
+
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(name,"delete");
+#endif
+ /** @todo LOCK take X-lock on table */
+ /*
+ We need to know if this table is transactional.
+ When built with RAID support, we also need to determine if this table
+ makes use of the raid feature. If yes, we need to remove all raid
+ chunks. This is done with my_raid_delete(). Unfortunately it is
+ necessary to open the table just to check this. We use
+ 'open_for_repair' to be able to open even a crashed table. If even
+ this open fails, we assume no raid configuration for this table
+ and try to remove the normal data file only. This may however
+ leave the raid chunks behind.
+ */
+ if (!(info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR)))
+ {
+#ifdef USE_RAID
+ raid_type= 0;
+#endif
+ sync_dir= 0;
+ }
+ else
+ {
+#ifdef USE_RAID
+ raid_type= info->s->base.raid_type;
+ raid_chunks= info->s->base.raid_chunks;
+#endif
+ sync_dir= (info->s->now_transactional && !info->s->temporary &&
+ !maria_in_recovery) ?
+ MY_SYNC_DIR : 0;
+ maria_close(info);
+ }
+#ifdef USE_RAID
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(name,"delete");
+#endif
+#endif /* USE_RAID */
+
+ if (sync_dir)
+ {
+ /*
+ For this log record to be of any use for Recovery, we need the upper
+ MySQL layer to be crash-safe in DDLs.
+ For now this record can serve when we apply logs to a backup, so we sync
+ it.
+ */
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char *)name;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= strlen(name) + 1;
+ if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DROP_TABLE,
+ &dummy_transaction_object, NULL,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length,
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, NULL, NULL) ||
+ translog_flush(lsn)))
+ DBUG_RETURN(1);
+ }
+
+ fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ if (my_delete_with_symlink(from, MYF(MY_WME | sync_dir)))
+ DBUG_RETURN(my_errno);
+ fn_format(from,name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+#ifdef USE_RAID
+ if (raid_type)
+ DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME | sync_dir)) ?
+ my_errno : 0);
+#endif
+ DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME | sync_dir)) ?
+ my_errno : 0);
+}
diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c
new file mode 100644
index 00000000000..cb33160bdf6
--- /dev/null
+++ b/storage/maria/ma_dynrec.c
@@ -0,0 +1,1976 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Functions to handle space-packed-records and blobs
+
+ A row may be stored in one or more linked blocks.
+ The block size is between MARIA_MIN_BLOCK_LENGTH and MARIA_MAX_BLOCK_LENGTH.
+ Each block is aligned on MARIA_DYN_ALIGN_SIZE.
+ The reson for the max block size is to not have too many different types
+ of blocks. For the differnet block types, look at _ma_get_block_info()
+*/
+
+#include "maria_def.h"
+
+static my_bool write_dynamic_record(MARIA_HA *info,const uchar *record,
+ ulong reclength);
+static int _ma_find_writepos(MARIA_HA *info,ulong reclength,my_off_t *filepos,
+ ulong *length);
+static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ uchar *record, ulong reclength);
+static my_bool delete_dynamic_record(MARIA_HA *info,MARIA_RECORD_POS filepos,
+ uint second_read);
+static my_bool _ma_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
+ uint length);
+
+#ifdef THREAD
+/* Play it safe; We have a small stack when using threads */
+#undef my_alloca
+#undef my_afree
+#define my_alloca(A) my_malloc((A),MYF(0))
+#define my_afree(A) my_free((A),MYF(0))
+#endif
+
+ /* Interface function from MARIA_HA */
+
+#ifdef HAVE_MMAP
+
+/*
+ Create mmaped area for MARIA handler
+
+ SYNOPSIS
+ _ma_dynmap_file()
+ info MARIA handler
+
+ RETURN
+ 0 ok
+ 1 error.
+*/
+
+my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size)
+{
+ DBUG_ENTER("_ma_dynmap_file");
+ if (size > (my_off_t) (~((size_t) 0)) - MEMMAP_EXTRA_MARGIN)
+ {
+ DBUG_PRINT("warning", ("File is too large for mmap"));
+ DBUG_RETURN(1);
+ }
+ /*
+ Ingo wonders if it is good to use MAP_NORESERVE. From the Linux man page:
+ MAP_NORESERVE
+ Do not reserve swap space for this mapping. When swap space is
+ reserved, one has the guarantee that it is possible to modify the
+ mapping. When swap space is not reserved one might get SIGSEGV
+ upon a write if no physical memory is available.
+ */
+ info->s->file_map= (uchar*)
+ my_mmap(0, (size_t)(size + MEMMAP_EXTRA_MARGIN),
+ info->s->mode==O_RDONLY ? PROT_READ :
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_NORESERVE,
+ info->dfile.file, 0L);
+ if (info->s->file_map == (uchar*) MAP_FAILED)
+ {
+ info->s->file_map= NULL;
+ DBUG_RETURN(1);
+ }
+#if defined(HAVE_MADVISE)
+ madvise((char*) info->s->file_map, size, MADV_RANDOM);
+#endif
+ info->s->mmaped_length= size;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Resize mmaped area for MARIA handler
+
+ SYNOPSIS
+ _ma_remap_file()
+ info MARIA handler
+
+ RETURN
+*/
+
+void _ma_remap_file(MARIA_HA *info, my_off_t size)
+{
+ if (info->s->file_map)
+ {
+ VOID(my_munmap((char*) info->s->file_map,
+ (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN));
+ _ma_dynmap_file(info, size);
+ }
+}
+#endif
+
+
+/*
+ Read bytes from MySAM handler, using mmap or pread
+
+ SYNOPSIS
+ _ma_mmap_pread()
+ info MARIA handler
+ Buffer Input buffer
+ Count Count of bytes for read
+ offset Start position
+ MyFlags
+
+ RETURN
+ 0 ok
+*/
+
+size_t _ma_mmap_pread(MARIA_HA *info, uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ DBUG_PRINT("info", ("maria_read with mmap %d\n", info->dfile.file));
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->mmap_lock);
+
+ /*
+ The following test may fail in the following cases:
+ - We failed to remap a memory area (fragmented memory?)
+ - This thread has done some writes, but not yet extended the
+ memory mapped area.
+ */
+
+ if (info->s->mmaped_length >= offset + Count)
+ {
+ memcpy(Buffer, info->s->file_map + offset, Count);
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return 0;
+ }
+ else
+ {
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return my_pread(info->dfile.file, Buffer, Count, offset, MyFlags);
+ }
+}
+
+
+ /* wrapper for my_pread in case if mmap isn't used */
+
+size_t _ma_nommap_pread(MARIA_HA *info, uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ return my_pread(info->dfile.file, Buffer, Count, offset, MyFlags);
+}
+
+
+/*
+ Write bytes to MySAM handler, using mmap or pwrite
+
+ SYNOPSIS
+ _ma_mmap_pwrite()
+ info MARIA handler
+ Buffer Output buffer
+ Count Count of bytes for write
+ offset Start position
+ MyFlags
+
+ RETURN
+ 0 ok
+ !=0 error. In this case return error from pwrite
+*/
+
+size_t _ma_mmap_pwrite(MARIA_HA *info, const uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ DBUG_PRINT("info", ("maria_write with mmap %d\n", info->dfile.file));
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->mmap_lock);
+
+ /*
+ The following test may fail in the following cases:
+ - We failed to remap a memory area (fragmented memory?)
+ - This thread has done some writes, but not yet extended the
+ memory mapped area.
+ */
+
+ if (info->s->mmaped_length >= offset + Count)
+ {
+ memcpy(info->s->file_map + offset, Buffer, Count);
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return 0;
+ }
+ else
+ {
+ info->s->nonmmaped_inserts++;
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return my_pwrite(info->dfile.file, Buffer, Count, offset, MyFlags);
+ }
+
+}
+
+
+ /* wrapper for my_pwrite in case if mmap isn't used */
+
+size_t _ma_nommap_pwrite(MARIA_HA *info, const uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ return my_pwrite(info->dfile.file, Buffer, Count, offset, MyFlags);
+}
+
+
+my_bool _ma_write_dynamic_record(MARIA_HA *info, const uchar *record)
+{
+ ulong reclength= _ma_rec_pack(info,info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ record);
+ return (write_dynamic_record(info,info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ reclength));
+}
+
+my_bool _ma_update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec __attribute__ ((unused)),
+ const uchar *record)
+{
+ uint length= _ma_rec_pack(info, info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ record);
+ return (update_dynamic_record(info, pos,
+ info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ length));
+}
+
+
+my_bool _ma_write_blob_record(MARIA_HA *info, const uchar *record)
+{
+ uchar *rec_buff;
+ int error;
+ ulong reclength,reclength2,extra;
+
+ extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER+1);
+ reclength= (info->s->base.pack_reclength +
+ _ma_calc_total_blob_length(info,record)+ extra);
+ if (!(rec_buff=(uchar*) my_alloca(reclength)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM; /* purecov: inspected */
+ return(1);
+ }
+ reclength2= _ma_rec_pack(info,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ record);
+ DBUG_PRINT("info",("reclength: %lu reclength2: %lu",
+ reclength, reclength2));
+ DBUG_ASSERT(reclength2 <= reclength);
+ error= write_dynamic_record(info,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ reclength2);
+ my_afree(rec_buff);
+ return(error != 0);
+}
+
+
+my_bool _ma_update_blob_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec __attribute__ ((unused)),
+ const uchar *record)
+{
+ uchar *rec_buff;
+ int error;
+ ulong reclength,extra;
+
+ extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER);
+ reclength= (info->s->base.pack_reclength+
+ _ma_calc_total_blob_length(info,record)+ extra);
+#ifdef NOT_USED /* We now support big rows */
+ if (reclength > MARIA_DYN_MAX_ROW_LENGTH)
+ {
+ my_errno=HA_ERR_TO_BIG_ROW;
+ return 1;
+ }
+#endif
+ if (!(rec_buff=(uchar*) my_alloca(reclength)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM; /* purecov: inspected */
+ return(1);
+ }
+ reclength= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ record);
+ error=update_dynamic_record(info,pos,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ reclength);
+ my_afree(rec_buff);
+ return(error != 0);
+}
+
+
+my_bool _ma_delete_dynamic_record(MARIA_HA *info,
+ const uchar *record __attribute__ ((unused)))
+{
+ return delete_dynamic_record(info, info->cur_row.lastpos, 0);
+}
+
+
+ /* Write record to data-file */
+
+static my_bool write_dynamic_record(MARIA_HA *info, const uchar *record,
+ ulong reclength)
+{
+ int flag;
+ ulong length;
+ my_off_t filepos;
+ DBUG_ENTER("write_dynamic_record");
+
+ flag=0;
+ do
+ {
+ if (_ma_find_writepos(info,reclength,&filepos,&length))
+ goto err;
+ if (_ma_write_part_record(info,filepos,length,
+ (info->append_insert_at_end ?
+ HA_OFFSET_ERROR : info->s->state.dellink),
+ (uchar**) &record,&reclength,&flag))
+ goto err;
+ } while (reclength);
+
+ DBUG_RETURN(0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+ /* Get a block for data ; The given data-area must be used !! */
+
+static int _ma_find_writepos(MARIA_HA *info,
+ ulong reclength, /* record length */
+ my_off_t *filepos, /* Return file pos */
+ ulong *length) /* length of block at filepos */
+{
+ MARIA_BLOCK_INFO block_info;
+ ulong tmp;
+ DBUG_ENTER("_ma_find_writepos");
+
+ if (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end)
+ {
+ /* Deleted blocks exists; Get last used block */
+ *filepos=info->s->state.dellink;
+ block_info.second_read=0;
+ info->rec_cache.seek_not_done=1;
+ if (!(_ma_get_block_info(&block_info, info->dfile.file,
+ info->s->state.dellink) &
+ BLOCK_DELETED))
+ {
+ DBUG_PRINT("error",("Delete link crashed"));
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(-1);
+ }
+ info->s->state.dellink=block_info.next_filepos;
+ info->state->del--;
+ info->state->empty-= block_info.block_len;
+ *length= block_info.block_len;
+ }
+ else
+ {
+ /* No deleted blocks; Allocate a new block */
+ *filepos=info->state->data_file_length;
+ if ((tmp=reclength+3 + test(reclength >= (65520-3))) <
+ info->s->base.min_block_length)
+ tmp= info->s->base.min_block_length;
+ else
+ tmp= ((tmp+MARIA_DYN_ALIGN_SIZE-1) &
+ (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1)));
+ if (info->state->data_file_length >
+ (info->s->base.max_data_file_length - tmp))
+ {
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ DBUG_RETURN(-1);
+ }
+ if (tmp > MARIA_MAX_BLOCK_LENGTH)
+ tmp=MARIA_MAX_BLOCK_LENGTH;
+ *length= tmp;
+ info->state->data_file_length+= tmp;
+ info->s->state.split++;
+ info->update|=HA_STATE_WRITE_AT_END;
+ }
+ DBUG_RETURN(0);
+} /* _ma_find_writepos */
+
+
+
+/*
+ Unlink a deleted block from the deleted list.
+ This block will be combined with the preceding or next block to form
+ a big block.
+*/
+
+static bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO *block_info)
+{
+ DBUG_ENTER("unlink_deleted_block");
+ if (block_info->filepos == info->s->state.dellink)
+ {
+ /* First deleted block; We can just use this ! */
+ info->s->state.dellink=block_info->next_filepos;
+ }
+ else
+ {
+ MARIA_BLOCK_INFO tmp;
+ tmp.second_read=0;
+ /* Unlink block from the previous block */
+ if (!(_ma_get_block_info(&tmp, info->dfile.file, block_info->prev_filepos)
+ & BLOCK_DELETED))
+ DBUG_RETURN(1); /* Something is wrong */
+ mi_sizestore(tmp.header+4,block_info->next_filepos);
+ if (info->s->file_write(info, tmp.header+4,8,
+ block_info->prev_filepos+4, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ /* Unlink block from next block */
+ if (block_info->next_filepos != HA_OFFSET_ERROR)
+ {
+ if (!(_ma_get_block_info(&tmp, info->dfile.file,
+ block_info->next_filepos)
+ & BLOCK_DELETED))
+ DBUG_RETURN(1); /* Something is wrong */
+ mi_sizestore(tmp.header+12,block_info->prev_filepos);
+ if (info->s->file_write(info, tmp.header+12,8,
+ block_info->next_filepos+12,
+ MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ }
+ }
+ /* We now have one less deleted block */
+ info->state->del--;
+ info->state->empty-= block_info->block_len;
+ info->s->state.split--;
+
+ /*
+ If this was a block that we where accessing through table scan
+ (maria_rrnd() or maria_scan(), then ensure that we skip over this block
+ when doing next maria_rrnd() or maria_scan().
+ */
+ if (info->cur_row.nextpos == block_info->filepos)
+ info->cur_row.nextpos+= block_info->block_len;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Add a backward link to delete block
+
+ SYNOPSIS
+ update_backward_delete_link()
+ info MARIA handler
+ delete_block Position to delete block to update.
+ If this is 'HA_OFFSET_ERROR', nothing will be done
+ filepos Position to block that 'delete_block' should point to
+
+ RETURN
+ 0 ok
+ 1 error. In this case my_error is set.
+*/
+
+static my_bool update_backward_delete_link(MARIA_HA *info,
+ my_off_t delete_block,
+ MARIA_RECORD_POS filepos)
+{
+ MARIA_BLOCK_INFO block_info;
+ DBUG_ENTER("update_backward_delete_link");
+
+ if (delete_block != HA_OFFSET_ERROR)
+ {
+ block_info.second_read=0;
+ if (_ma_get_block_info(&block_info, info->dfile.file, delete_block)
+ & BLOCK_DELETED)
+ {
+ uchar buff[8];
+ mi_sizestore(buff,filepos);
+ if (info->s->file_write(info,buff, 8, delete_block+12, MYF(MY_NABP)))
+ DBUG_RETURN(1); /* Error on write */
+ }
+ else
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(1); /* Wrong delete link */
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+/* Delete datarecord from database */
+/* info->rec_cache.seek_not_done is updated in cmp_record */
+
+static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ uint second_read)
+{
+ uint length,b_type;
+ MARIA_BLOCK_INFO block_info,del_block;
+ int error;
+ my_bool remove_next_block;
+ DBUG_ENTER("delete_dynamic_record");
+
+ /* First add a link from the last block to the new one */
+ error= update_backward_delete_link(info, info->s->state.dellink, filepos);
+
+ block_info.second_read=second_read;
+ do
+ {
+ /* Remove block at 'filepos' */
+ if ((b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR) ||
+ (length=(uint) (block_info.filepos-filepos) +block_info.block_len) <
+ MARIA_MIN_BLOCK_LENGTH)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(1);
+ }
+ /* Check if next block is a delete block */
+ del_block.second_read=0;
+ remove_next_block=0;
+ if (_ma_get_block_info(&del_block, info->dfile.file, filepos + length) &
+ BLOCK_DELETED && del_block.block_len+length <
+ MARIA_DYN_MAX_BLOCK_LENGTH)
+ {
+ /* We can't remove this yet as this block may be the head block */
+ remove_next_block=1;
+ length+=del_block.block_len;
+ }
+
+ block_info.header[0]=0;
+ mi_int3store(block_info.header+1,length);
+ mi_sizestore(block_info.header+4,info->s->state.dellink);
+ if (b_type & BLOCK_LAST)
+ bfill(block_info.header+12,8,255);
+ else
+ mi_sizestore(block_info.header+12,block_info.next_filepos);
+ if (info->s->file_write(info,(uchar*) block_info.header,20,filepos,
+ MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ info->s->state.dellink = filepos;
+ info->state->del++;
+ info->state->empty+=length;
+ filepos=block_info.next_filepos;
+
+ /* Now it's safe to unlink the deleted block directly after this one */
+ if (remove_next_block && unlink_deleted_block(info,&del_block))
+ error=1;
+ } while (!(b_type & BLOCK_LAST));
+
+ DBUG_RETURN(error);
+}
+
+
+ /* Write a block to datafile */
+
+int _ma_write_part_record(MARIA_HA *info,
+ my_off_t filepos, /* points at empty block */
+ ulong length, /* length of block */
+ my_off_t next_filepos,/* Next empty block */
+ uchar **record, /* pointer to record ptr */
+ ulong *reclength, /* length of *record */
+ int *flag) /* *flag == 0 if header */
+{
+ ulong head_length,res_length,extra_length,long_block,del_length;
+ uchar *pos,*record_end;
+ my_off_t next_delete_block;
+ uchar temp[MARIA_SPLIT_LENGTH+MARIA_DYN_DELETE_BLOCK_HEADER];
+ DBUG_ENTER("_ma_write_part_record");
+
+ next_delete_block=HA_OFFSET_ERROR;
+
+ res_length=extra_length=0;
+ if (length > *reclength + MARIA_SPLIT_LENGTH)
+ { /* Splitt big block */
+ res_length=MY_ALIGN(length- *reclength - MARIA_EXTEND_BLOCK_LENGTH,
+ MARIA_DYN_ALIGN_SIZE);
+ length-= res_length; /* Use this for first part */
+ }
+ long_block= (length < 65520L && *reclength < 65520L) ? 0 : 1;
+ if (length == *reclength+ 3 + long_block)
+ {
+ /* Block is exactly of the right length */
+ temp[0]=(uchar) (1+ *flag)+(uchar) long_block; /* Flag is 0 or 6 */
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ head_length=4;
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ head_length=3;
+ }
+ }
+ else if (length-long_block < *reclength+4)
+ { /* To short block */
+ if (next_filepos == HA_OFFSET_ERROR)
+ next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end ?
+ info->s->state.dellink : info->state->data_file_length);
+ if (*flag == 0) /* First block */
+ {
+ if (*reclength > MARIA_MAX_BLOCK_LENGTH)
+ {
+ head_length= 16;
+ temp[0]=13;
+ mi_int4store(temp+1,*reclength);
+ mi_int3store(temp+5,length-head_length);
+ mi_sizestore((uchar*) temp+8,next_filepos);
+ }
+ else
+ {
+ head_length=5+8+long_block*2;
+ temp[0]=5+(uchar) long_block;
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ mi_int3store(temp+4,length-head_length);
+ mi_sizestore((uchar*) temp+7,next_filepos);
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ mi_int2store(temp+3,length-head_length);
+ mi_sizestore((uchar*) temp+5,next_filepos);
+ }
+ }
+ }
+ else
+ {
+ head_length=3+8+long_block;
+ temp[0]=11+(uchar) long_block;
+ if (long_block)
+ {
+ mi_int3store(temp+1,length-head_length);
+ mi_sizestore((uchar*) temp+4,next_filepos);
+ }
+ else
+ {
+ mi_int2store(temp+1,length-head_length);
+ mi_sizestore((uchar*) temp+3,next_filepos);
+ }
+ }
+ }
+ else
+ { /* Block with empty info last */
+ head_length=4+long_block;
+ extra_length= length- *reclength-head_length;
+ temp[0]= (uchar) (3+ *flag)+(uchar) long_block; /* 3,4 or 9,10 */
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ temp[4]= (uchar) (extra_length);
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ temp[3]= (uchar) (extra_length);
+ }
+ length= *reclength+head_length; /* Write only what is needed */
+ }
+ DBUG_DUMP("header",(uchar*) temp,head_length);
+
+ /* Make a long block for one write */
+ record_end= *record+length-head_length;
+ del_length=(res_length ? MARIA_DYN_DELETE_BLOCK_HEADER : 0);
+ bmove((uchar*) (*record-head_length),(uchar*) temp,head_length);
+ memcpy(temp,record_end,(size_t) (extra_length+del_length));
+ bzero((uchar*) record_end,extra_length);
+
+ if (res_length)
+ {
+ /* Check first if we can join this block with the next one */
+ MARIA_BLOCK_INFO del_block;
+ my_off_t next_block=filepos+length+extra_length+res_length;
+
+ del_block.second_read=0;
+ if (next_block < info->state->data_file_length &&
+ info->s->state.dellink != HA_OFFSET_ERROR)
+ {
+ if ((_ma_get_block_info(&del_block, info->dfile.file, next_block)
+ & BLOCK_DELETED) &&
+ res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH)
+ {
+ if (unlink_deleted_block(info,&del_block))
+ goto err;
+ res_length+=del_block.block_len;
+ }
+ }
+
+ /* Create a delete link of the last part of the block */
+ pos=record_end+extra_length;
+ pos[0]= '\0';
+ mi_int3store(pos+1,res_length);
+ mi_sizestore(pos+4,info->s->state.dellink);
+ bfill(pos+12,8,255); /* End link */
+ next_delete_block=info->s->state.dellink;
+ info->s->state.dellink= filepos+length+extra_length;
+ info->state->del++;
+ info->state->empty+=res_length;
+ info->s->state.split++;
+ }
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->update & HA_STATE_WRITE_AT_END)
+ {
+ if (info->update & HA_STATE_EXTEND_BLOCK)
+ {
+ info->update&= ~HA_STATE_EXTEND_BLOCK;
+ if (my_block_write(&info->rec_cache,(uchar*) *record-head_length,
+ length+extra_length+del_length,filepos))
+ goto err;
+ }
+ else if (my_b_write(&info->rec_cache,(uchar*) *record-head_length,
+ length+extra_length+del_length))
+ goto err;
+ }
+ else
+ {
+ info->rec_cache.seek_not_done=1;
+ if (info->s->file_write(info,(uchar*) *record-head_length,
+ length+extra_length+
+ del_length,filepos,info->s->write_flag))
+ goto err;
+ }
+ memcpy(record_end,temp,(size_t) (extra_length+del_length));
+ *record=record_end;
+ *reclength-=(length-head_length);
+ *flag=6;
+
+ if (del_length)
+ {
+ /* link the next delete block to this */
+ if (update_backward_delete_link(info, next_delete_block,
+ info->s->state.dellink))
+ goto err;
+ }
+
+ DBUG_RETURN(0);
+err:
+ DBUG_PRINT("exit",("errno: %d",my_errno));
+ DBUG_RETURN(1);
+} /* _ma_write_part_record */
+
+
+ /* update record from datafile */
+
+static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ uchar *record, ulong reclength)
+{
+ int flag;
+ uint error;
+ ulong length;
+ MARIA_BLOCK_INFO block_info;
+ DBUG_ENTER("update_dynamic_record");
+
+ flag=block_info.second_read=0;
+ while (reclength > 0)
+ {
+ if (filepos != info->s->state.dellink)
+ {
+ block_info.next_filepos= HA_OFFSET_ERROR;
+ if ((error= _ma_get_block_info(&block_info, info->dfile.file, filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ DBUG_PRINT("error",("Got wrong block info"));
+ if (!(error & BLOCK_FATAL_ERROR))
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+ length=(ulong) (block_info.filepos-filepos) + block_info.block_len;
+ if (length < reclength)
+ {
+ uint tmp=MY_ALIGN(reclength - length + 3 +
+ test(reclength >= 65520L),MARIA_DYN_ALIGN_SIZE);
+ /* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */
+ tmp= min(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length;
+ /* Check if we can extend this block */
+ if (block_info.filepos + block_info.block_len ==
+ info->state->data_file_length &&
+ info->state->data_file_length <
+ info->s->base.max_data_file_length-tmp)
+ {
+ /* extend file */
+ DBUG_PRINT("info",("Extending file with %d bytes",tmp));
+ if (info->cur_row.nextpos == info->state->data_file_length)
+ info->cur_row.nextpos+= tmp;
+ info->state->data_file_length+= tmp;
+ info->update|= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK;
+ length+=tmp;
+ }
+ else if (length < MARIA_MAX_BLOCK_LENGTH - MARIA_MIN_BLOCK_LENGTH)
+ {
+ /*
+ Check if next block is a deleted block
+ Above we have MARIA_MIN_BLOCK_LENGTH to avoid the problem where
+ the next block is so small it can't be splited which could
+ casue problems
+ */
+
+ MARIA_BLOCK_INFO del_block;
+ del_block.second_read=0;
+ if (_ma_get_block_info(&del_block, info->dfile.file,
+ block_info.filepos + block_info.block_len) &
+ BLOCK_DELETED)
+ {
+ /* Use; Unlink it and extend the current block */
+ DBUG_PRINT("info",("Extending current block"));
+ if (unlink_deleted_block(info,&del_block))
+ goto err;
+ if ((length+=del_block.block_len) > MARIA_MAX_BLOCK_LENGTH)
+ {
+ /*
+ New block was too big, link overflow part back to
+ delete list
+ */
+ my_off_t next_pos;
+ ulong rest_length= length-MARIA_MAX_BLOCK_LENGTH;
+ set_if_bigger(rest_length, MARIA_MIN_BLOCK_LENGTH);
+ next_pos= del_block.filepos+ del_block.block_len - rest_length;
+
+ if (update_backward_delete_link(info, info->s->state.dellink,
+ next_pos))
+ DBUG_RETURN(1);
+
+ /* create delete link for data that didn't fit into the page */
+ del_block.header[0]=0;
+ mi_int3store(del_block.header+1, rest_length);
+ mi_sizestore(del_block.header+4,info->s->state.dellink);
+ bfill(del_block.header+12,8,255);
+ if (info->s->file_write(info,(uchar*) del_block.header, 20,
+ next_pos, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ info->s->state.dellink= next_pos;
+ info->s->state.split++;
+ info->state->del++;
+ info->state->empty+= rest_length;
+ length-= rest_length;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ if (_ma_find_writepos(info,reclength,&filepos,&length))
+ goto err;
+ }
+ if (_ma_write_part_record(info,filepos,length,block_info.next_filepos,
+ &record,&reclength,&flag))
+ goto err;
+ if ((filepos=block_info.next_filepos) == HA_OFFSET_ERROR)
+ {
+ /* Start writing data on deleted blocks */
+ filepos=info->s->state.dellink;
+ }
+ }
+
+ if (block_info.next_filepos != HA_OFFSET_ERROR)
+ if (delete_dynamic_record(info,block_info.next_filepos,1))
+ goto err;
+ DBUG_RETURN(0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+ /* Pack a record. Return new reclength */
+
+uint _ma_rec_pack(MARIA_HA *info, register uchar *to,
+ register const uchar *from)
+{
+ uint length,new_length,flag,bit,i;
+ uchar *pos,*end,*startpos,*packpos;
+ enum en_fieldtype type;
+ reg3 MARIA_COLUMNDEF *column;
+ MARIA_BLOB *blob;
+ DBUG_ENTER("_ma_rec_pack");
+
+ flag= 0;
+ bit= 1;
+ startpos= packpos=to;
+ to+= info->s->base.pack_bytes;
+ blob= info->blobs;
+ column= info->s->columndef;
+ if (info->s->base.null_bytes)
+ {
+ memcpy(to, from, info->s->base.null_bytes);
+ from+= info->s->base.null_bytes;
+ to+= info->s->base.null_bytes;
+ }
+
+ for (i=info->s->base.fields ; i-- > 0; from+= length, column++)
+ {
+ length=(uint) column->length;
+ if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL)
+ {
+ if (type == FIELD_BLOB)
+ {
+ if (!blob->length)
+ flag|=bit;
+ else
+ {
+ char *temp_pos;
+ size_t tmp_length=length-portable_sizeof_char_ptr;
+ memcpy((uchar*) to,from,tmp_length);
+ memcpy_fixed(&temp_pos,from+tmp_length,sizeof(char*));
+ memcpy(to+tmp_length,temp_pos,(size_t) blob->length);
+ to+=tmp_length+blob->length;
+ }
+ blob++;
+ }
+ else if (type == FIELD_SKIP_ZERO)
+ {
+ if (memcmp((uchar*) from, maria_zero_string, length) == 0)
+ flag|=bit;
+ else
+ {
+ memcpy((uchar*) to,from,(size_t) length);
+ to+=length;
+ }
+ }
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ pos= (uchar*) from; end= (uchar*) from + length;
+ if (type == FIELD_SKIP_ENDSPACE)
+ { /* Pack trailing spaces */
+ while (end > from && *(end-1) == ' ')
+ end--;
+ }
+ else
+ { /* Pack pref-spaces */
+ while (pos < end && *pos == ' ')
+ pos++;
+ }
+ new_length=(uint) (end-pos);
+ if (new_length +1 + test(column->length > 255 && new_length > 127)
+ < length)
+ {
+ if (column->length > 255 && new_length > 127)
+ {
+ to[0]=(char) ((new_length & 127)+128);
+ to[1]=(char) (new_length >> 7);
+ to+=2;
+ }
+ else
+ *to++= (char) new_length;
+ memcpy((uchar*) to,pos,(size_t) new_length); to+=new_length;
+ flag|=bit;
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ }
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *(uchar*) from;
+ *to++= *from;
+ }
+ else
+ {
+ tmp_length= uint2korr(from);
+ store_key_length_inc(to,tmp_length);
+ }
+ memcpy(to, from+pack_length,tmp_length);
+ to+= tmp_length;
+ continue;
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ continue; /* Normal field */
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ *packpos++ = (char) (uchar) flag;
+ bit=1; flag=0;
+ }
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ }
+ }
+ if (bit != 1)
+ *packpos= (char) (uchar) flag;
+ if (info->s->calc_checksum)
+ *to++= (uchar) info->cur_row.checksum;
+ DBUG_PRINT("exit",("packed length: %d",(int) (to-startpos)));
+ DBUG_RETURN((uint) (to-startpos));
+} /* _ma_rec_pack */
+
+
+
+/*
+ Check if a record was correctly packed. Used only by maria_chk
+ Returns 0 if record is ok.
+*/
+
+my_bool _ma_rec_check(MARIA_HA *info,const uchar *record, uchar *rec_buff,
+ ulong packed_length, my_bool with_checksum,
+ ha_checksum checksum)
+{
+ uint length,new_length,flag,bit,i;
+ uchar *pos,*end,*packpos,*to;
+ enum en_fieldtype type;
+ reg3 MARIA_COLUMNDEF *column;
+ DBUG_ENTER("_ma_rec_check");
+
+ packpos=rec_buff; to= rec_buff+info->s->base.pack_bytes;
+ column= info->s->columndef;
+ flag= *packpos; bit=1;
+ record+= info->s->base.null_bytes;
+ to+= info->s->base.null_bytes;
+
+ for (i=info->s->base.fields ; i-- > 0; record+= length, column++)
+ {
+ length=(uint) column->length;
+ if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL)
+ {
+ if (type == FIELD_BLOB)
+ {
+ uint blob_length=
+ _ma_calc_blob_length(length-portable_sizeof_char_ptr,record);
+ if (!blob_length && !(flag & bit))
+ goto err;
+ if (blob_length)
+ to+=length - portable_sizeof_char_ptr+ blob_length;
+ }
+ else if (type == FIELD_SKIP_ZERO)
+ {
+ if (memcmp((uchar*) record, maria_zero_string, length) == 0)
+ {
+ if (!(flag & bit))
+ goto err;
+ }
+ else
+ to+=length;
+ }
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ pos= (uchar*) record; end= (uchar*) record + length;
+ if (type == FIELD_SKIP_ENDSPACE)
+ { /* Pack trailing spaces */
+ while (end > record && *(end-1) == ' ')
+ end--;
+ }
+ else
+ { /* Pack pre-spaces */
+ while (pos < end && *pos == ' ')
+ pos++;
+ }
+ new_length=(uint) (end-pos);
+ if (new_length +1 + test(column->length > 255 && new_length > 127)
+ < length)
+ {
+ if (!(flag & bit))
+ goto err;
+ if (column->length > 255 && new_length > 127)
+ {
+ if (to[0] != (char) ((new_length & 127)+128) ||
+ to[1] != (char) (new_length >> 7))
+ goto err;
+ to+=2;
+ }
+ else if (*to++ != (char) new_length)
+ goto err;
+ to+=new_length;
+ }
+ else
+ to+=length;
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *(uchar*) record;
+ to+= 1+ tmp_length;
+ continue;
+ }
+ else
+ {
+ tmp_length= uint2korr(record);
+ to+= get_pack_length(tmp_length)+tmp_length;
+ }
+ continue;
+ }
+ else
+ {
+ to+=length;
+ continue; /* Normal field */
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ flag= *++packpos;
+ bit=1;
+ }
+ }
+ else
+ to+= length;
+ }
+ if (packed_length != (uint) (to - rec_buff) +
+ test(info->s->calc_checksum) || (bit != 1 && (flag & ~(bit - 1))))
+ goto err;
+ if (with_checksum && ((uchar) checksum != (uchar) *to))
+ {
+ DBUG_PRINT("error",("wrong checksum for row"));
+ goto err;
+ }
+ DBUG_RETURN(0);
+
+err:
+ DBUG_RETURN(1);
+}
+
+
+/*
+ @brief Unpacks a record
+
+ @return Recordlength
+ @retval >0 ok
+ @retval MY_FILE_ERROR (== -1) Error.
+ my_errno is set to HA_ERR_WRONG_IN_RECORD
+*/
+
+ulong _ma_rec_unpack(register MARIA_HA *info, register uchar *to, uchar *from,
+ ulong found_length)
+{
+ uint flag,bit,length,min_pack_length, column_length;
+ enum en_fieldtype type;
+ uchar *from_end,*to_end,*packpos;
+ reg3 MARIA_COLUMNDEF *column, *end_column;
+ DBUG_ENTER("_ma_rec_unpack");
+
+ to_end=to + info->s->base.reclength;
+ from_end=from+found_length;
+ flag= (uchar) *from; bit=1; packpos=from;
+ if (found_length < info->s->base.min_pack_length)
+ goto err;
+ from+= info->s->base.pack_bytes;
+ min_pack_length= info->s->base.min_pack_length - info->s->base.pack_bytes;
+
+ if ((length= info->s->base.null_bytes))
+ {
+ memcpy(to, from, length);
+ from+= length;
+ to+= length;
+ min_pack_length-= length;
+ }
+
+ for (column= info->s->columndef, end_column= column + info->s->base.fields;
+ column < end_column ; to+= column_length, column++)
+ {
+ column_length= column->length;
+ if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL &&
+ (type != FIELD_CHECK))
+ {
+ if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column_length-1);
+ if (pack_length == 1)
+ {
+ length= (uint) *(uchar*) from;
+ if (length > column_length-1)
+ goto err;
+ *to= *from++;
+ }
+ else
+ {
+ get_key_length(length, from);
+ if (length > column_length-2)
+ goto err;
+ int2store(to,length);
+ }
+ if (from+length > from_end)
+ goto err;
+ memcpy(to+pack_length, from, length);
+ from+= length;
+ min_pack_length--;
+ continue;
+ }
+ if (flag & bit)
+ {
+ if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO)
+ bzero((uchar*) to,column_length);
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ if (column->length > 255 && *from & 128)
+ {
+ if (from + 1 >= from_end)
+ goto err;
+ length= (*from & 127)+ ((uint) (uchar) *(from+1) << 7); from+=2;
+ }
+ else
+ {
+ if (from == from_end)
+ goto err;
+ length= (uchar) *from++;
+ }
+ min_pack_length--;
+ if (length >= column_length ||
+ min_pack_length + length > (uint) (from_end - from))
+ goto err;
+ if (type == FIELD_SKIP_ENDSPACE)
+ {
+ memcpy(to,(uchar*) from,(size_t) length);
+ bfill((uchar*) to+length,column_length-length,' ');
+ }
+ else
+ {
+ bfill((uchar*) to,column_length-length,' ');
+ memcpy(to+column_length-length,(uchar*) from,(size_t) length);
+ }
+ from+=length;
+ }
+ }
+ else if (type == FIELD_BLOB)
+ {
+ uint size_length=column_length- portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length,from);
+ ulong from_left= (ulong) (from_end - from);
+ if (from_left < size_length ||
+ from_left - size_length < blob_length ||
+ from_left - size_length - blob_length < min_pack_length)
+ goto err;
+ memcpy((uchar*) to,(uchar*) from,(size_t) size_length);
+ from+=size_length;
+ memcpy_fixed((uchar*) to+size_length,(uchar*) &from,sizeof(char*));
+ from+=blob_length;
+ }
+ else
+ {
+ if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE)
+ min_pack_length--;
+ if (min_pack_length + column_length > (uint) (from_end - from))
+ goto err;
+ memcpy(to,(uchar*) from,(size_t) column_length); from+=column_length;
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ flag= (uchar) *++packpos; bit=1;
+ }
+ }
+ else
+ {
+ if (min_pack_length > (uint) (from_end - from))
+ goto err;
+ min_pack_length-=column_length;
+ memcpy(to, (uchar*) from, (size_t) column_length);
+ from+=column_length;
+ }
+ }
+ if (info->s->calc_checksum)
+ info->cur_row.checksum= (uint) (uchar) *from++;
+ if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1))))
+ DBUG_RETURN(found_length);
+
+err:
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+ DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx",
+ (long) to, (long) to_end, (long) from, (long) from_end));
+ DBUG_DUMP("from",(uchar*) info->rec_buff,info->s->base.min_pack_length);
+ DBUG_RETURN(MY_FILE_ERROR);
+} /* _ma_rec_unpack */
+
+
+ /* Calc length of blob. Update info in blobs->length */
+
+ulong _ma_calc_total_blob_length(MARIA_HA *info, const uchar *record)
+{
+ ulong length;
+ MARIA_BLOB *blob,*end;
+
+ for (length=0, blob= info->blobs, end=blob+info->s->base.blobs ;
+ blob != end;
+ blob++)
+ {
+ blob->length= _ma_calc_blob_length(blob->pack_length,record + blob->offset);
+ length+=blob->length;
+ }
+ return length;
+}
+
+
+ulong _ma_calc_blob_length(uint length, const uchar *pos)
+{
+ switch (length) {
+ case 1:
+ return (uint) (uchar) *pos;
+ case 2:
+ return (uint) uint2korr(pos);
+ case 3:
+ return uint3korr(pos);
+ case 4:
+ return uint4korr(pos);
+ default:
+ break;
+ }
+ return 0; /* Impossible */
+}
+
+
+void _ma_store_blob_length(uchar *pos,uint pack_length,uint length)
+{
+ switch (pack_length) {
+ case 1:
+ *pos= (uchar) length;
+ break;
+ case 2:
+ int2store(pos,length);
+ break;
+ case 3:
+ int3store(pos,length);
+ break;
+ case 4:
+ int4store(pos,length);
+ default:
+ break;
+ }
+ return;
+}
+
+
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _ma_read_dynamic_record()
+ info MARIA_HA pointer to table.
+ filepos From where to read the record.
+ buf Destination for record.
+
+ NOTE
+ If a write buffer is active, it needs to be flushed if its contents
+ intersects with the record to read. We always check if the position
+ of the first uchar of the write buffer is lower than the position
+ past the last uchar to read. In theory this is also true if the write
+ buffer is completely below the read segment. That is, if there is no
+ intersection. But this case is unusual. We flush anyway. Only if the
+ first uchar in the write buffer is above the last uchar to read, we do
+ not flush.
+
+ A dynamic record may need several reads. So this check must be done
+ before every read. Reading a dynamic record starts with reading the
+ block header. If the record does not fit into the free space of the
+ header, the block may be longer than the header. In this case a
+ second read is necessary. These one or two reads repeat for every
+ part of the record.
+
+ RETURN
+ 0 OK
+ # Error number
+*/
+
+int _ma_read_dynamic_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos)
+{
+ int block_of_record;
+ uint b_type;
+ MARIA_BLOCK_INFO block_info;
+ File file;
+ uchar *to;
+ uint left_length;
+ DBUG_ENTER("_ma_read_dynamic_record");
+
+ if (filepos == HA_OFFSET_ERROR)
+ goto err;
+
+ LINT_INIT(to);
+ LINT_INIT(left_length);
+ file= info->dfile.file;
+ block_of_record= 0; /* First block of record is numbered as zero. */
+ block_info.second_read= 0;
+ do
+ {
+ /* A corrupted table can have wrong pointers. (Bug# 19835) */
+ if (filepos == HA_OFFSET_ERROR)
+ goto panic;
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ (info->rec_cache.pos_in_file < filepos +
+ MARIA_BLOCK_INFO_HEADER_LENGTH) &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ info->rec_cache.seek_not_done=1;
+ if ((b_type= _ma_get_block_info(&block_info, file, filepos)) &
+ (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
+ my_errno=HA_ERR_RECORD_DELETED;
+ goto err;
+ }
+ if (block_of_record++ == 0) /* First block */
+ {
+ if (block_info.rec_len > (uint) info->s->base.max_pack_length)
+ goto panic;
+ if (info->s->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ info->s->base.extra_rec_buff_size))
+ goto err;
+ }
+ to= info->rec_buff;
+ left_length=block_info.rec_len;
+ }
+ if (left_length < block_info.data_len || ! block_info.data_len)
+ goto panic; /* Wrong linked record */
+ /* copy information that is already read */
+ {
+ uint offset= (uint) (block_info.filepos - filepos);
+ uint prefetch_len= (sizeof(block_info.header) - offset);
+ filepos+= sizeof(block_info.header);
+
+ if (prefetch_len > block_info.data_len)
+ prefetch_len= block_info.data_len;
+ if (prefetch_len)
+ {
+ memcpy((uchar*) to, block_info.header + offset, prefetch_len);
+ block_info.data_len-= prefetch_len;
+ left_length-= prefetch_len;
+ to+= prefetch_len;
+ }
+ }
+ /* read rest of record from file */
+ if (block_info.data_len)
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file < filepos + block_info.data_len &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ /*
+ What a pity that this method is not called 'file_pread' and that
+ there is no equivalent without seeking. We are at the right
+ position already. :(
+ */
+ if (info->s->file_read(info, (uchar*) to, block_info.data_len,
+ filepos, MYF(MY_NABP)))
+ goto panic;
+ left_length-=block_info.data_len;
+ to+=block_info.data_len;
+ }
+ filepos= block_info.next_filepos;
+ } while (left_length);
+
+ info->update|= HA_STATE_AKTIV; /* We have a aktive record */
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) !=
+ MY_FILE_ERROR ? 0 : my_errno);
+
+err:
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(my_errno);
+
+panic:
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+}
+
+ /* compare unique constraint between stored rows */
+
+my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const uchar *record, MARIA_RECORD_POS pos)
+{
+ uchar *old_rec_buff,*old_record;
+ my_off_t old_rec_buff_size;
+ my_bool error;
+ DBUG_ENTER("_ma_cmp_dynamic_unique");
+
+ if (!(old_record=my_alloca(info->s->base.reclength)))
+ DBUG_RETURN(1);
+
+ /* Don't let the compare destroy blobs that may be in use */
+ old_rec_buff= info->rec_buff;
+ old_rec_buff_size= info->rec_buff_size;
+
+ if (info->s->base.blobs)
+ {
+ info->rec_buff= 0;
+ info->rec_buff_size= 0;
+ }
+ error= _ma_read_dynamic_record(info, old_record, pos) != 0;
+ if (!error)
+ error=_ma_unique_comp(def, record, old_record, def->null_are_equal) != 0;
+ if (info->s->base.blobs)
+ {
+ my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ info->rec_buff= old_rec_buff;
+ info->rec_buff_size= old_rec_buff_size;
+ }
+ my_afree(old_record);
+ DBUG_RETURN(error);
+}
+
+
+ /* Compare of record on disk with packed record in memory */
+
+my_bool _ma_cmp_dynamic_record(register MARIA_HA *info,
+ register const uchar *record)
+{
+ uint flag, reclength, b_type,cmp_length;
+ my_off_t filepos;
+ uchar *buffer;
+ MARIA_BLOCK_INFO block_info;
+ my_bool error= 1;
+ DBUG_ENTER("_ma_cmp_dynamic_record");
+
+ /* We are going to do changes; dont let anybody disturb */
+ dont_break(); /* Dont allow SIGHUP or SIGINT */
+
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ info->update&= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK);
+ if (flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(1);
+ }
+ info->rec_cache.seek_not_done=1;
+
+ /* If nobody have touched the database we don't have to test rec */
+
+ buffer=info->rec_buff;
+ if ((info->opt_flag & READ_CHECK_USED))
+ { /* If check isn't disabled */
+ if (info->s->base.blobs)
+ {
+ if (!(buffer=(uchar*) my_alloca(info->s->base.pack_reclength+
+ _ma_calc_total_blob_length(info,record))))
+ DBUG_RETURN(1);
+ }
+ reclength= _ma_rec_pack(info,buffer,record);
+ record= buffer;
+
+ filepos= info->cur_row.lastpos;
+ flag=block_info.second_read=0;
+ block_info.next_filepos=filepos;
+ while (reclength > 0)
+ {
+ if ((b_type= _ma_get_block_info(&block_info, info->dfile.file,
+ block_info.next_filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ if (flag == 0) /* First block */
+ {
+ flag=1;
+ if (reclength != block_info.rec_len)
+ {
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ } else if (reclength < block_info.data_len)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+ reclength-= block_info.data_len;
+ cmp_length= block_info.data_len;
+ if (!reclength && info->s->calc_checksum)
+ cmp_length--; /* 'record' may not contain checksum */
+
+ if (_ma_cmp_buffer(info->dfile.file, record, block_info.filepos,
+ cmp_length))
+ {
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ flag=1;
+ record+=block_info.data_len;
+ }
+ }
+ my_errno=0;
+ error= 0;
+err:
+ if (buffer != info->rec_buff)
+ my_afree((uchar*) buffer);
+ DBUG_PRINT("exit", ("result: %d", error));
+ DBUG_RETURN(error);
+}
+
+
+ /* Compare file to buffert */
+
+static my_bool _ma_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
+ uint length)
+{
+ uint next_length;
+ uchar temp_buff[IO_SIZE*2];
+ DBUG_ENTER("_ma_cmp_buffer");
+
+ next_length= IO_SIZE*2 - (uint) (filepos & (IO_SIZE-1));
+
+ while (length > IO_SIZE*2)
+ {
+ if (my_pread(file,temp_buff,next_length,filepos, MYF(MY_NABP)) ||
+ memcmp(buff, temp_buff, next_length))
+ goto err;
+ filepos+=next_length;
+ buff+=next_length;
+ length-= next_length;
+ next_length=IO_SIZE*2;
+ }
+ if (my_pread(file,temp_buff,length,filepos,MYF(MY_NABP)))
+ goto err;
+ DBUG_RETURN(memcmp(buff, temp_buff, length) != 0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Read next record from datafile during table scan.
+
+ SYNOPSIS
+ _ma_read_rnd_dynamic_record()
+ info MARIA_HA pointer to table.
+ buf Destination for record.
+ filepos From where to read the record.
+ skip_deleted_blocks If to repeat reading until a non-deleted
+ record is found.
+
+ NOTE
+ This is identical to _ma_read_dynamic_record(), except the following
+ cases:
+
+ - If there is no active row at 'filepos', continue scanning for
+ an active row. (This is becasue the previous
+ _ma_read_rnd_dynamic_record() call stored the next block position
+ in filepos, but this position may not be a start block for a row
+ - We may have READ_CACHING enabled, in which case we use the cache
+ to read rows.
+
+ For other comments, check _ma_read_dynamic_record()
+
+ RETURN
+ 0 OK
+ != 0 Error number
+*/
+
+int _ma_read_rnd_dynamic_record(MARIA_HA *info,
+ uchar *buf,
+ MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks)
+{
+ int block_of_record, info_read;
+ uint left_len,b_type;
+ uchar *to;
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_read_rnd_dynamic_record");
+
+ info_read=0;
+ LINT_INIT(to);
+
+ if (info->lock_type == F_UNLCK)
+ {
+#ifndef UNSAFE_LOCKING
+#else
+ info->tmp_lock_type=F_RDLCK;
+#endif
+ }
+ else
+ info_read=1; /* memory-keyinfoblock is ok */
+
+ block_of_record= 0; /* First block of record is numbered as zero. */
+ block_info.second_read= 0;
+ left_len=1;
+ do
+ {
+ if (filepos >= info->state->data_file_length)
+ {
+ if (!info_read)
+ { /* Check if changed */
+ info_read=1;
+ info->rec_cache.seek_not_done=1;
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ goto panic;
+ }
+ if (filepos >= info->state->data_file_length)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ goto err;
+ }
+ }
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache,(uchar*) block_info.header,filepos,
+ sizeof(block_info.header),
+ (!block_of_record && skip_deleted_blocks ?
+ READING_NEXT : 0) | READING_HEADER))
+ goto panic;
+ b_type= _ma_get_block_info(&block_info,-1,filepos);
+ }
+ else
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH &&
+ flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(my_errno);
+ info->rec_cache.seek_not_done=1;
+ b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos);
+ }
+
+ if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ && skip_deleted_blocks)
+ {
+ filepos=block_info.filepos+block_info.block_len;
+ block_info.second_read=0;
+ continue; /* Search after next_record */
+ }
+ if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ {
+ my_errno= HA_ERR_RECORD_DELETED;
+ info->cur_row.lastpos= block_info.filepos;
+ info->cur_row.nextpos= block_info.filepos+block_info.block_len;
+ }
+ goto err;
+ }
+ if (block_of_record == 0) /* First block */
+ {
+ if (block_info.rec_len > (uint) share->base.max_pack_length)
+ goto panic;
+ info->cur_row.lastpos= filepos;
+ if (share->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ info->s->base.extra_rec_buff_size))
+ goto err;
+ }
+ to= info->rec_buff;
+ left_len=block_info.rec_len;
+ }
+ if (left_len < block_info.data_len)
+ goto panic; /* Wrong linked record */
+
+ /* copy information that is already read */
+ {
+ uint offset=(uint) (block_info.filepos - filepos);
+ uint tmp_length= (sizeof(block_info.header) - offset);
+ filepos=block_info.filepos;
+
+ if (tmp_length > block_info.data_len)
+ tmp_length= block_info.data_len;
+ if (tmp_length)
+ {
+ memcpy((uchar*) to, block_info.header+offset,tmp_length);
+ block_info.data_len-=tmp_length;
+ left_len-=tmp_length;
+ to+=tmp_length;
+ filepos+=tmp_length;
+ }
+ }
+ /* read rest of record from file */
+ if (block_info.data_len)
+ {
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache,(uchar*) to,filepos,
+ block_info.data_len,
+ (!block_of_record && skip_deleted_blocks) ?
+ READING_NEXT : 0))
+ goto panic;
+ }
+ else
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file <
+ block_info.filepos + block_info.data_len &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ /* VOID(my_seek(info->dfile.file, filepos, MY_SEEK_SET, MYF(0))); */
+ if (my_read(info->dfile.file, (uchar*)to, block_info.data_len,
+ MYF(MY_NABP)))
+ {
+ if (my_errno == HA_ERR_FILE_TOO_SHORT)
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */
+ goto err;
+ }
+ }
+ }
+ /*
+ Increment block-of-record counter. If it was the first block,
+ remember the position behind the block for the next call.
+ */
+ if (block_of_record++ == 0)
+ {
+ info->cur_row.nextpos= block_info.filepos+block_info.block_len;
+ skip_deleted_blocks=0;
+ }
+ left_len-=block_info.data_len;
+ to+=block_info.data_len;
+ filepos=block_info.next_filepos;
+ } while (left_len);
+
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+ fast_ma_writeinfo(info);
+ if (_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) !=
+ MY_FILE_ERROR)
+ DBUG_RETURN(0);
+ DBUG_RETURN(my_errno); /* Wrong record */
+
+panic:
+ my_errno=HA_ERR_WRONG_IN_RECORD; /* Something is fatal wrong */
+err:
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(my_errno);
+}
+
+
+ /* Read and process header from a dynamic-record-file */
+
+uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos)
+{
+ uint return_val=0;
+ uchar *header=info->header;
+
+ if (file >= 0)
+ {
+ /*
+ We do not use my_pread() here because we want to have the file
+ pointer set to the end of the header after this function.
+ my_pread() may leave the file pointer untouched.
+ */
+ VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
+ if (my_read(file, header, sizeof(info->header),MYF(0)) !=
+ sizeof(info->header))
+ goto err;
+ }
+ DBUG_DUMP("header",header,MARIA_BLOCK_INFO_HEADER_LENGTH);
+ if (info->second_read)
+ {
+ if (info->header[0] <= 6 || info->header[0] == 13)
+ return_val=BLOCK_SYNC_ERROR;
+ }
+ else
+ {
+ if (info->header[0] > 6 && info->header[0] != 13)
+ return_val=BLOCK_SYNC_ERROR;
+ }
+ info->next_filepos= HA_OFFSET_ERROR; /* Dummy if no next block */
+
+ switch (info->header[0]) {
+ case 0:
+ if ((info->block_len=(uint) mi_uint3korr(header+1)) <
+ MARIA_MIN_BLOCK_LENGTH ||
+ (info->block_len & (MARIA_DYN_ALIGN_SIZE -1)))
+ goto err;
+ info->filepos=filepos;
+ info->next_filepos=mi_sizekorr(header+4);
+ info->prev_filepos=mi_sizekorr(header+12);
+#if SIZEOF_OFF_T == 4
+ if ((mi_uint4korr(header+4) != 0 &&
+ (mi_uint4korr(header+4) != (ulong) ~0 ||
+ info->next_filepos != (ulong) ~0)) ||
+ (mi_uint4korr(header+12) != 0 &&
+ (mi_uint4korr(header+12) != (ulong) ~0 ||
+ info->prev_filepos != (ulong) ~0)))
+ goto err;
+#endif
+ return return_val | BLOCK_DELETED; /* Deleted block */
+
+ case 1:
+ info->rec_len=info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->filepos=filepos+3;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+ case 2:
+ info->rec_len=info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->filepos=filepos+4;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+
+ case 13:
+ info->rec_len=mi_uint4korr(header+1);
+ info->block_len=info->data_len=mi_uint3korr(header+5);
+ info->next_filepos=mi_sizekorr(header+8);
+ info->second_read=1;
+ info->filepos=filepos+16;
+ return return_val | BLOCK_FIRST;
+
+ case 3:
+ info->rec_len=info->data_len=mi_uint2korr(header+1);
+ info->block_len=info->rec_len+ (uint) header[3];
+ info->filepos=filepos+4;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+ case 4:
+ info->rec_len=info->data_len=mi_uint3korr(header+1);
+ info->block_len=info->rec_len+ (uint) header[4];
+ info->filepos=filepos+5;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+
+ case 5:
+ info->rec_len=mi_uint2korr(header+1);
+ info->block_len=info->data_len=mi_uint2korr(header+3);
+ info->next_filepos=mi_sizekorr(header+5);
+ info->second_read=1;
+ info->filepos=filepos+13;
+ return return_val | BLOCK_FIRST;
+ case 6:
+ info->rec_len=mi_uint3korr(header+1);
+ info->block_len=info->data_len=mi_uint3korr(header+4);
+ info->next_filepos=mi_sizekorr(header+7);
+ info->second_read=1;
+ info->filepos=filepos+15;
+ return return_val | BLOCK_FIRST;
+
+ /* The following blocks are identical to 1-6 without rec_len */
+ case 7:
+ info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->filepos=filepos+3;
+ return return_val | BLOCK_LAST;
+ case 8:
+ info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->filepos=filepos+4;
+ return return_val | BLOCK_LAST;
+
+ case 9:
+ info->data_len=mi_uint2korr(header+1);
+ info->block_len=info->data_len+ (uint) header[3];
+ info->filepos=filepos+4;
+ return return_val | BLOCK_LAST;
+ case 10:
+ info->data_len=mi_uint3korr(header+1);
+ info->block_len=info->data_len+ (uint) header[4];
+ info->filepos=filepos+5;
+ return return_val | BLOCK_LAST;
+
+ case 11:
+ info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->next_filepos=mi_sizekorr(header+3);
+ info->second_read=1;
+ info->filepos=filepos+11;
+ return return_val;
+ case 12:
+ info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->next_filepos=mi_sizekorr(header+4);
+ info->second_read=1;
+ info->filepos=filepos+12;
+ return return_val;
+ }
+
+err:
+ my_errno=HA_ERR_WRONG_IN_RECORD; /* Garbage */
+ return BLOCK_ERROR;
+}
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
new file mode 100644
index 00000000000..26e129245d6
--- /dev/null
+++ b/storage/maria/ma_extra.c
@@ -0,0 +1,578 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#include "ma_blockrec.h"
+
+static void maria_extra_keyflag(MARIA_HA *info,
+ enum ha_extra_function function);
+
+/**
+ @brief Set options and buffers to optimize table handling
+
+ @param name table's name
+ @param info open table
+ @param function operation
+ @param extra_arg Pointer to extra argument (normally pointer to
+ ulong); used when function is one of:
+ HA_EXTRA_WRITE_CACHE
+ HA_EXTRA_CACHE
+
+ @return Operation status
+ @retval 0 ok
+ @retval !=0 error
+*/
+
+int maria_extra(MARIA_HA *info, enum ha_extra_function function,
+ void *extra_arg)
+{
+ int error= 0;
+ ulong cache_size;
+ MARIA_SHARE *share= info->s;
+ my_bool block_records= share->data_file_type == BLOCK_RECORD;
+ DBUG_ENTER("maria_extra");
+ DBUG_PRINT("enter",("function: %d",(int) function));
+
+ switch (function) {
+ case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */
+ info->lastinx= 0; /* Use first index as def */
+ info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->page_changed= 1;
+ /* Next/prev gives first/last */
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ reinit_io_cache(&info->rec_cache,READ_CACHE,0,
+ (pbool) (info->lock_type != F_UNLCK),
+ (pbool) test(info->update & HA_STATE_ROW_CHANGED)
+ );
+ }
+ info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND |
+ HA_STATE_PREV_FOUND);
+ break;
+ case HA_EXTRA_CACHE:
+ if (block_records)
+ break; /* Not supported */
+
+ if (info->lock_type == F_UNLCK &&
+ (share->options & HA_OPTION_PACK_RECORD))
+ {
+ error= 1; /* Not possibly if not locked */
+ my_errno= EACCES;
+ break;
+ }
+ if (info->s->file_map) /* Don't use cache if mmap */
+ break;
+#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
+ if ((share->options & HA_OPTION_COMPRESS_RECORD))
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ if (_ma_memmap_file(info))
+ {
+ /* We don't nead MADV_SEQUENTIAL if small file */
+ madvise((char*) share->file_map, share->state.state.data_file_length,
+ share->state.state.data_file_length <= RECORD_CACHE_SIZE*16 ?
+ MADV_RANDOM : MADV_SEQUENTIAL);
+ pthread_mutex_unlock(&share->intern_lock);
+ break;
+ }
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+#endif
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ info->opt_flag&= ~WRITE_CACHE_USED;
+ if ((error= end_io_cache(&info->rec_cache)))
+ break;
+ }
+ if (!(info->opt_flag &
+ (READ_CACHE_USED | WRITE_CACHE_USED | MEMMAP_USED)))
+ {
+ cache_size= (extra_arg ? *(ulong*) extra_arg :
+ my_default_record_cache_size);
+ if (!(init_io_cache(&info->rec_cache, info->dfile.file,
+ (uint) min(info->state->data_file_length+1,
+ cache_size),
+ READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK),
+ MYF(share->write_flag & MY_WAIT_IF_FULL))))
+ {
+ info->opt_flag|= READ_CACHE_USED;
+ info->update&= ~HA_STATE_ROW_CHANGED;
+ }
+ if (share->concurrent_insert)
+ info->rec_cache.end_of_file= info->state->data_file_length;
+ }
+ break;
+ case HA_EXTRA_REINIT_CACHE:
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ reinit_io_cache(&info->rec_cache, READ_CACHE, info->cur_row.nextpos,
+ (pbool) (info->lock_type != F_UNLCK),
+ (pbool) test(info->update & HA_STATE_ROW_CHANGED));
+ info->update&= ~HA_STATE_ROW_CHANGED;
+ if (share->concurrent_insert)
+ info->rec_cache.end_of_file= info->state->data_file_length;
+ }
+ break;
+ case HA_EXTRA_WRITE_CACHE:
+ if (info->lock_type == F_UNLCK)
+ {
+ error= 1; /* Not possibly if not locked */
+ break;
+ }
+ if (block_records)
+ break; /* Not supported */
+
+ cache_size= (extra_arg ? *(ulong*) extra_arg :
+ my_default_record_cache_size);
+ if (!(info->opt_flag &
+ (READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) &&
+ !share->state.header.uniques)
+ if (!(init_io_cache(&info->rec_cache, info->dfile.file, cache_size,
+ WRITE_CACHE,info->state->data_file_length,
+ (pbool) (info->lock_type != F_UNLCK),
+ MYF(share->write_flag & MY_WAIT_IF_FULL))))
+ {
+ info->opt_flag|= WRITE_CACHE_USED;
+ info->update&= ~(HA_STATE_ROW_CHANGED |
+ HA_STATE_WRITE_AT_END |
+ HA_STATE_EXTEND_BLOCK);
+ }
+ break;
+ case HA_EXTRA_PREPARE_FOR_UPDATE:
+ if (info->s->data_file_type != DYNAMIC_RECORD)
+ break;
+ /* Remove read/write cache if dynamic rows */
+ case HA_EXTRA_NO_CACHE:
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ error= end_io_cache(&info->rec_cache);
+ /* Sergei will insert full text index caching here */
+ }
+#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
+ if (info->opt_flag & MEMMAP_USED)
+ madvise((char*) share->file_map, share->state.state.data_file_length,
+ MADV_RANDOM);
+#endif
+ break;
+ case HA_EXTRA_FLUSH_CACHE:
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ if ((error= flush_io_cache(&info->rec_cache)))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* Fatal error found */
+ }
+ }
+ break;
+ case HA_EXTRA_NO_READCHECK:
+ info->opt_flag&= ~READ_CHECK_USED; /* No readcheck */
+ break;
+ case HA_EXTRA_READCHECK:
+ info->opt_flag|= READ_CHECK_USED;
+ break;
+ case HA_EXTRA_KEYREAD: /* Read only keys to record */
+ case HA_EXTRA_REMEMBER_POS:
+ info->opt_flag|= REMEMBER_OLD_POS;
+ bmove((uchar*) info->lastkey+share->base.max_key_length*2,
+ (uchar*) info->lastkey,info->lastkey_length);
+ info->save_update= info->update;
+ info->save_lastinx= info->lastinx;
+ info->save_lastpos= info->cur_row.lastpos;
+ info->save_lastkey_length= info->lastkey_length;
+ if (function == HA_EXTRA_REMEMBER_POS)
+ break;
+ /* fall through */
+ case HA_EXTRA_KEYREAD_CHANGE_POS:
+ info->opt_flag|= KEY_READ_USED;
+ info->read_record= _ma_read_key_record;
+ break;
+ case HA_EXTRA_NO_KEYREAD:
+ case HA_EXTRA_RESTORE_POS:
+ if (info->opt_flag & REMEMBER_OLD_POS)
+ {
+ bmove((uchar*) info->lastkey,
+ (uchar*) info->lastkey+share->base.max_key_length*2,
+ info->save_lastkey_length);
+ info->update= info->save_update | HA_STATE_WRITTEN;
+ info->lastinx= info->save_lastinx;
+ info->cur_row.lastpos= info->save_lastpos;
+ info->lastkey_length= info->save_lastkey_length;
+ }
+ info->read_record= share->read_record;
+ info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS);
+ break;
+ case HA_EXTRA_NO_USER_CHANGE: /* Database is somehow locked agains changes */
+ info->lock_type= F_EXTRA_LCK; /* Simulate as locked */
+ break;
+ case HA_EXTRA_WAIT_LOCK:
+ info->lock_wait= 0;
+ break;
+ case HA_EXTRA_NO_WAIT_LOCK:
+ info->lock_wait= MY_SHORT_WAIT;
+ break;
+ case HA_EXTRA_NO_KEYS:
+ /* we're going to modify pieces of the state, stall Checkpoint */
+ pthread_mutex_lock(&share->intern_lock);
+ if (info->lock_type == F_UNLCK)
+ {
+ pthread_mutex_unlock(&share->intern_lock);
+ error= 1; /* Not possibly if not lock */
+ break;
+ }
+ if (maria_is_any_key_active(share->state.key_map))
+ {
+ MARIA_KEYDEF *key= share->keyinfo;
+ uint i;
+ for (i =0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1)
+ {
+ maria_clear_key_active(share->state.key_map, i);
+ info->update|= HA_STATE_CHANGED;
+ }
+ }
+
+ if (!share->changed)
+ {
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+ share->changed= 1; /* Update on close */
+ if (!share->global_changed)
+ {
+ share->global_changed= 1;
+ share->state.open_count++;
+ }
+ }
+ share->state.state= *info->state;
+ /*
+ That state write to disk must be done, even for transactional tables;
+ indeed the table's share is going to be lost (there was a
+ HA_EXTRA_FORCE_REOPEN before, which set share->last_version to
+ 0), and so the only way it leaves information (share->state.key_map)
+ for the posterity is by writing it to disk.
+ */
+ DBUG_ASSERT(!maria_in_recovery);
+ error= _ma_state_info_write(share, 1|2);
+ }
+ pthread_mutex_unlock(&share->intern_lock);
+ break;
+ case HA_EXTRA_FORCE_REOPEN:
+ /*
+ MySQL uses this case after it has closed all other instances
+ of this table.
+ We however do a flush here for additional safety.
+ */
+ /** @todo consider porting these flush-es to MyISAM */
+ DBUG_ASSERT(share->reopen == 1);
+ error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE);
+ if (!error && share->changed)
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ if (!(error= _ma_state_info_write(share, 1|2)))
+ share->changed= 0;
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+ pthread_mutex_lock(&THR_LOCK_maria);
+ pthread_mutex_lock(&share->intern_lock); /* protect against Checkpoint */
+ /* this makes the share not be re-used next time the table is opened */
+ share->last_version= 0L; /* Impossible version */
+ pthread_mutex_unlock(&share->intern_lock);
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ break;
+ case HA_EXTRA_PREPARE_FOR_DROP:
+ case HA_EXTRA_PREPARE_FOR_RENAME:
+ {
+ my_bool do_flush= test(function != HA_EXTRA_PREPARE_FOR_DROP);
+ enum flush_type type;
+ pthread_mutex_lock(&THR_LOCK_maria);
+ /*
+ This share, to have last_version=0, needs to save all its data/index
+ blocks to disk if this is not for a DROP TABLE. Otherwise they would be
+ invisible to future openers; and they could even go to disk late and
+ cancel the work of future openers.
+ */
+ if (info->lock_type != F_UNLCK && !info->was_locked)
+ {
+ info->was_locked= info->lock_type;
+ if (maria_lock_database(info, F_UNLCK))
+ error= my_errno;
+ info->lock_type= F_UNLCK;
+ }
+ if (share->kfile.file >= 0)
+ _ma_decrement_open_count(info);
+ pthread_mutex_lock(&share->intern_lock);
+ type= do_flush ? FLUSH_RELEASE : FLUSH_IGNORE_CHANGED;
+ if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ type, type))
+ {
+ error=my_errno;
+ share->changed= 1;
+ }
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ if (end_io_cache(&info->rec_cache))
+ error= 1;
+ }
+ if (share->kfile.file >= 0)
+ {
+ if (do_flush)
+ {
+ /* Save the state so that others can find it from disk. */
+ if (_ma_state_info_write(share, 1 | 2) ||
+ my_sync(share->kfile.file, MYF(0)))
+ error= my_errno;
+#ifdef ASK_MONTY /* see same tag in HA_EXTRA_FORCE_REOPEN */
+ else
+ share->changed= 0;
+#endif
+ }
+ else
+ {
+ /* be sure that state is not tried for write as file may be closed */
+ share->changed= 0;
+ }
+ }
+ if (share->data_file_type == BLOCK_RECORD &&
+ share->bitmap.file.file >= 0)
+ {
+ if (do_flush && my_sync(share->bitmap.file.file, MYF(0)))
+ error= my_errno;
+ }
+ /* For protection against Checkpoint, we set under intern_lock: */
+ share->last_version= 0L; /* Impossible version */
+ pthread_mutex_unlock(&share->intern_lock);
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ break;
+ }
+ case HA_EXTRA_FLUSH:
+ if (!share->temporary)
+ error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_KEEP, FLUSH_KEEP);
+#ifdef HAVE_PWRITE
+ _ma_decrement_open_count(info);
+#endif
+ if (share->not_flushed)
+ {
+ share->not_flushed= 0;
+ if (_ma_sync_table_files(info))
+ error= my_errno;
+ if (error)
+ {
+ share->changed= 1;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* Fatal error found */
+ }
+ }
+ if (share->base.blobs && info->rec_buff_size >
+ share->base.default_rec_buff_size)
+ {
+ info->rec_buff_size= 1; /* Force realloc */
+ _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ share->base.default_rec_buff_size);
+ }
+ break;
+ case HA_EXTRA_NORMAL: /* Theese isn't in use */
+ info->quick_mode= 0;
+ break;
+ case HA_EXTRA_QUICK:
+ info->quick_mode= 1;
+ break;
+ case HA_EXTRA_NO_ROWS:
+ if (!share->state.header.uniques)
+ info->opt_flag|= OPT_NO_ROWS;
+ break;
+ case HA_EXTRA_PRELOAD_BUFFER_SIZE:
+ info->preload_buff_size= *((ulong *) extra_arg);
+ break;
+ case HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
+ case HA_EXTRA_CHANGE_KEY_TO_DUP:
+ maria_extra_keyflag(info, function);
+ break;
+ case HA_EXTRA_MMAP:
+#ifdef HAVE_MMAP
+ if (block_records)
+ break; /* Not supported */
+ pthread_mutex_lock(&share->intern_lock);
+ /*
+ Memory map the data file if it is not already mapped. It is safe
+ to memory map a file while other threads are using file I/O on it.
+ Assigning a new address to a function pointer is an atomic
+ operation. intern_lock prevents that two or more mappings are done
+ at the same time.
+ */
+ if (!share->file_map)
+ {
+ if (_ma_dynmap_file(info, share->state.state.data_file_length))
+ {
+ DBUG_PRINT("warning",("mmap failed: errno: %d",errno));
+ error= my_errno= errno;
+ }
+ else
+ {
+ share->file_read= _ma_mmap_pread;
+ share->file_write= _ma_mmap_pwrite;
+ }
+ }
+ pthread_mutex_unlock(&share->intern_lock);
+#endif
+ break;
+ case HA_EXTRA_MARK_AS_LOG_TABLE:
+ pthread_mutex_lock(&share->intern_lock);
+ share->is_log_table= TRUE;
+ pthread_mutex_unlock(&share->intern_lock);
+ break;
+ case HA_EXTRA_KEY_CACHE:
+ case HA_EXTRA_NO_KEY_CACHE:
+ default:
+ break;
+ }
+ DBUG_RETURN(error);
+} /* maria_extra */
+
+
+/*
+ Start/Stop Inserting Duplicates Into a Table, WL#1648.
+*/
+
+static void maria_extra_keyflag(MARIA_HA *info,
+ enum ha_extra_function function)
+{
+ uint idx;
+
+ for (idx= 0; idx< info->s->base.keys; idx++)
+ {
+ switch (function) {
+ case HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
+ info->s->keyinfo[idx].flag|= HA_NOSAME;
+ break;
+ case HA_EXTRA_CHANGE_KEY_TO_DUP:
+ info->s->keyinfo[idx].flag&= ~(HA_NOSAME);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+
+int maria_reset(MARIA_HA *info)
+{
+ int error= 0;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_reset");
+ /*
+ Free buffers and reset the following flags:
+ EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK
+
+ If the row buffer cache is large (for dynamic tables), reduce it
+ to save memory.
+ */
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ error= end_io_cache(&info->rec_cache);
+ }
+ if (share->base.blobs && info->rec_buff_size >
+ share->base.default_rec_buff_size)
+ {
+ info->rec_buff_size= 1; /* Force realloc */
+ _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ share->base.default_rec_buff_size);
+ }
+#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
+ if (info->opt_flag & MEMMAP_USED)
+ madvise((char*) share->file_map, share->state.state.data_file_length,
+ MADV_RANDOM);
+#endif
+ info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS);
+ info->quick_mode= 0;
+ info->lastinx= 0; /* Use first index as def */
+ info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->page_changed= 1;
+ info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND |
+ HA_STATE_PREV_FOUND);
+ DBUG_RETURN(error);
+}
+
+
+int _ma_sync_table_files(const MARIA_HA *info)
+{
+ return (my_sync(info->dfile.file, MYF(MY_WME)) ||
+ my_sync(info->s->kfile.file, MYF(MY_WME)));
+}
+
+
+/**
+ @brief flushes the data and/or index file of a table
+
+ This is useful when one wants to read a table using OS syscalls (like
+ my_copy()) and first wants to be sure that MySQL-level caches go down to
+ the OS so that OS syscalls can see all data. It can flush rec_cache,
+ bitmap, pagecache of data file, pagecache of index file.
+
+ @param info table
+ @param flush_data_or_index one or two of these flags:
+ MARIA_FLUSH_DATA, MARIA_FLUSH_INDEX
+ @param flush_type_for_data
+ @param flush_type_for_index
+
+ @note does not sync files (@see _ma_sync_table_files()).
+ @note Progressively this function will be used in all places where we flush
+ the index but not the data file (probable bugs).
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
+ enum flush_type flush_type_for_data,
+ enum flush_type flush_type_for_index)
+{
+ MARIA_SHARE *share= info->s;
+ /* flush data file first because it's more critical */
+ if (flush_data_or_index & MARIA_FLUSH_DATA)
+ {
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ /* normally any code which creates a WRITE_CACHE destroys it later */
+ DBUG_ASSERT(0);
+ if (end_io_cache(&info->rec_cache))
+ goto err;
+ info->opt_flag&= ~WRITE_CACHE_USED;
+ }
+ if (share->data_file_type == BLOCK_RECORD)
+ {
+ if(_ma_bitmap_flush(share) ||
+ flush_pagecache_blocks(share->pagecache, &info->dfile,
+ flush_type_for_data))
+ goto err;
+ }
+ }
+ if ((flush_data_or_index & MARIA_FLUSH_INDEX) &&
+ flush_pagecache_blocks(share->pagecache, &share->kfile,
+ flush_type_for_index))
+ goto err;
+ return 0;
+err:
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ return 1;
+}
diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c
new file mode 100644
index 00000000000..28305d9be47
--- /dev/null
+++ b/storage/maria/ma_ft_boolean_search.c
@@ -0,0 +1,975 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* TODO: add caching - pre-read several index entries at once */
+
+/*
+ Added optimization for full-text queries with plus-words. It was
+ implemented by sharing maximal document id (max_docid) variable
+ inside plus subtree. max_docid could be used by any word in plus
+ subtree, but it could be updated by plus-word only.
+
+ The idea is: there is no need to search for docid smaller than
+ biggest docid inside current plus subtree.
+
+ Examples:
+ +word1 word2
+ share same max_docid
+ max_docid updated by word1
+ +word1 +(word2 word3)
+ share same max_docid
+ max_docid updated by word1
+ +(word1 -word2) +(+word3 word4)
+ share same max_docid
+ max_docid updated by word3
+*/
+
+#define FT_CORE
+#include "ma_ftdefs.h"
+
+/* search with boolean queries */
+
+static double _wghts[11]=
+{
+ 0.131687242798354,
+ 0.197530864197531,
+ 0.296296296296296,
+ 0.444444444444444,
+ 0.666666666666667,
+ 1.000000000000000,
+ 1.500000000000000,
+ 2.250000000000000,
+ 3.375000000000000,
+ 5.062500000000000,
+ 7.593750000000000};
+static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */
+
+static double _nwghts[11]=
+{
+ -0.065843621399177,
+ -0.098765432098766,
+ -0.148148148148148,
+ -0.222222222222222,
+ -0.333333333333334,
+ -0.500000000000000,
+ -0.750000000000000,
+ -1.125000000000000,
+ -1.687500000000000,
+ -2.531250000000000,
+ -3.796875000000000};
+static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
+
+#define FTB_FLAG_TRUNC 1
+/* At most one of the following flags can be set */
+#define FTB_FLAG_YES 2
+#define FTB_FLAG_NO 4
+#define FTB_FLAG_WONLY 8
+
+typedef struct st_ftb_expr FTB_EXPR;
+struct st_ftb_expr
+{
+ FTB_EXPR *up;
+ uint flags;
+/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
+ my_off_t docid[2];
+ my_off_t max_docid;
+ float weight;
+ float cur_weight;
+ LIST *phrase; /* phrase words */
+ LIST *document; /* for phrase search */
+ uint yesses; /* number of "yes" words matched */
+ uint nos; /* number of "no" words matched */
+ uint ythresh; /* number of "yes" words in expr */
+ uint yweaks; /* number of "yes" words for scan only */
+};
+
+typedef struct st_ftb_word
+{
+ FTB_EXPR *up;
+ uint flags;
+/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
+ my_off_t docid[2]; /* for index search and for scan */
+ my_off_t key_root;
+ my_off_t *max_docid;
+ MARIA_KEYDEF *keyinfo;
+ struct st_ftb_word *prev;
+ float weight;
+ uint ndepth;
+ uint len;
+ uchar off;
+ uchar word[1];
+} FTB_WORD;
+
+typedef struct st_ft_info
+{
+ struct _ft_vft *please;
+ MARIA_HA *info;
+ CHARSET_INFO *charset;
+ FTB_EXPR *root;
+ FTB_WORD **list;
+ FTB_WORD *last_word;
+ MEM_ROOT mem_root;
+ QUEUE queue;
+ TREE no_dupes;
+ my_off_t lastpos;
+ uint keynr;
+ uchar with_scan;
+ enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
+} FTB;
+
+static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
+{
+ int i;
+
+ /* if a==curdoc, take it as a < b */
+ if (v && a->docid[0] == *v)
+ return -1;
+
+ /* ORDER BY docid, ndepth DESC */
+ i=CMP_NUM(a->docid[0], b->docid[0]);
+ if (!i)
+ i=CMP_NUM(b->ndepth,a->ndepth);
+ return i;
+}
+
+static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
+{
+ /* ORDER BY word DESC, ndepth DESC */
+ int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
+ (uchar*) (*a)->word+1,(*a)->len-1,0,0);
+ if (!i)
+ i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
+ return i;
+}
+
+
+typedef struct st_my_ftb_param
+{
+ FTB *ftb;
+ FTB_EXPR *ftbe;
+ uchar *up_quot;
+ uint depth;
+} MY_FTB_PARAM;
+
+
+static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *info)
+{
+ MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
+ FTB_WORD *ftbw;
+ FTB_EXPR *ftbe, *tmp_expr;
+ FT_WORD *phrase_word;
+ LIST *tmp_element;
+ int r= info->weight_adjust;
+ float weight= (float)
+ (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
+
+ switch (info->type) {
+ case FT_TOKEN_WORD:
+ ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
+ sizeof(FTB_WORD) +
+ (info->trunc ? HA_MAX_KEY_BUFF :
+ word_len * ftb_param->ftb->charset->mbmaxlen +
+ HA_FT_WLEN +
+ ftb_param->ftb->info->s->rec_reflength));
+ ftbw->len= word_len + 1;
+ ftbw->flags= 0;
+ ftbw->off= 0;
+ if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
+ if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
+ if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
+ ftbw->weight= weight;
+ ftbw->up= ftb_param->ftbe;
+ ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
+ ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
+ ftbw->key_root= HA_OFFSET_ERROR;
+ memcpy(ftbw->word + 1, word, word_len);
+ ftbw->word[0]= word_len;
+ if (info->yesno > 0) ftbw->up->ythresh++;
+ ftb_param->ftb->queue.max_elements++;
+ ftbw->prev= ftb_param->ftb->last_word;
+ ftb_param->ftb->last_word= ftbw;
+ ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
+ for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
+ if (! (tmp_expr->flags & FTB_FLAG_YES))
+ break;
+ ftbw->max_docid= &tmp_expr->max_docid;
+ /* fall through */
+ case FT_TOKEN_STOPWORD:
+ if (! ftb_param->up_quot) break;
+ phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
+ tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
+ phrase_word->pos= word;
+ phrase_word->len= word_len;
+ tmp_element->data= (void *)phrase_word;
+ ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
+ /* Allocate document list at this point.
+ It allows to avoid huge amount of allocs/frees for each row.*/
+ tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
+ tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
+ ftb_param->ftbe->document=
+ list_add(ftb_param->ftbe->document, tmp_element);
+ break;
+ case FT_TOKEN_LEFT_PAREN:
+ ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
+ ftbe->flags= 0;
+ if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
+ if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
+ ftbe->weight= weight;
+ ftbe->up= ftb_param->ftbe;
+ ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
+ ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
+ ftbe->phrase= NULL;
+ ftbe->document= 0;
+ if (info->quot) ftb_param->ftb->with_scan|= 2;
+ if (info->yesno > 0) ftbe->up->ythresh++;
+ ftb_param->ftbe= ftbe;
+ ftb_param->depth++;
+ ftb_param->up_quot= info->quot;
+ break;
+ case FT_TOKEN_RIGHT_PAREN:
+ if (ftb_param->ftbe->document)
+ {
+ /* Circuit document list */
+ for (tmp_element= ftb_param->ftbe->document;
+ tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
+ tmp_element->next= ftb_param->ftbe->document;
+ ftb_param->ftbe->document->prev= tmp_element;
+ }
+ info->quot= 0;
+ if (ftb_param->ftbe->up)
+ {
+ DBUG_ASSERT(ftb_param->depth);
+ ftb_param->ftbe= ftb_param->ftbe->up;
+ ftb_param->depth--;
+ ftb_param->up_quot= 0;
+ }
+ break;
+ case FT_TOKEN_EOF:
+ default:
+ break;
+ }
+ return(0);
+}
+
+
+static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
+ char *query, int len)
+{
+ MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
+ MYSQL_FTPARSER_BOOLEAN_INFO info;
+ CHARSET_INFO *cs= ftb_param->ftb->charset;
+ uchar **start= (uchar**) &query;
+ char *end= query + len;
+ FT_WORD w;
+
+ info.prev= ' ';
+ info.quot= 0;
+ while (maria_ft_get_word(cs, start, end, &w, &info))
+ param->mysql_add_word(param, w.pos, w.len, &info);
+ return(0);
+}
+
+
+static int _ftb_parse_query(FTB *ftb, uchar *query, uint len,
+ struct st_mysql_ftparser *parser)
+{
+ MYSQL_FTPARSER_PARAM *param;
+ MY_FTB_PARAM ftb_param;
+ DBUG_ENTER("_ftb_parse_query");
+ DBUG_ASSERT(parser);
+
+ if (ftb->state != UNINITIALIZED)
+ DBUG_RETURN(0);
+ if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
+ DBUG_RETURN(1);
+
+ ftb_param.ftb= ftb;
+ ftb_param.depth= 0;
+ ftb_param.ftbe= ftb->root;
+ ftb_param.up_quot= 0;
+
+ param->mysql_parse= ftb_parse_query_internal;
+ param->mysql_add_word= ftb_query_add_word;
+ param->mysql_ftparam= (void *)&ftb_param;
+ param->cs= ftb->charset;
+ param->doc= query;
+ param->length= len;
+ param->flags= 0;
+ param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
+ DBUG_RETURN(parser->parse(param));
+}
+
+
+static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
+ const void *a,const void *b)
+{
+ return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
+}
+
+/* returns 1 if the search was finished (must-word wasn't found) */
+static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
+{
+ int r;
+ int subkeys=1;
+ my_bool can_go_down;
+ MARIA_HA *info=ftb->info;
+ uint off= 0, extra=HA_FT_WLEN+info->s->base.rec_reflength;
+ uchar *lastkey_buf= ftbw->word+ftbw->off;
+
+ if (ftbw->flags & FTB_FLAG_TRUNC)
+ lastkey_buf+=ftbw->len;
+
+ if (init_search)
+ {
+ ftbw->key_root=info->s->state.key_root[ftb->keynr];
+ ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
+
+ r= _ma_search(info, ftbw->keyinfo, ftbw->word, ftbw->len,
+ SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
+ }
+ else
+ {
+ uint sflag= SEARCH_BIGGER;
+ if (ftbw->docid[0] < *ftbw->max_docid)
+ {
+ sflag|= SEARCH_SAME;
+ _ma_dpointer(info, (ftbw->word + ftbw->len + HA_FT_WLEN),
+ *ftbw->max_docid);
+ }
+ r= _ma_search(info, ftbw->keyinfo, lastkey_buf,
+ USE_WHOLE_KEY, sflag, ftbw->key_root);
+ }
+
+ can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
+ /* Skip rows inserted by concurrent insert */
+ while (!r)
+ {
+ if (can_go_down)
+ {
+ /* going down ? */
+ off=info->lastkey_length-extra;
+ subkeys=ft_sintXkorr(info->lastkey+off);
+ }
+ if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length)
+ break;
+ r= _ma_search_next(info, ftbw->keyinfo, info->lastkey,
+ info->lastkey_length,
+ SEARCH_BIGGER, ftbw->key_root);
+ }
+
+ if (!r && !ftbw->off)
+ {
+ r= ha_compare_text(ftb->charset,
+ (uchar*) info->lastkey+1,
+ info->lastkey_length-extra-1,
+ (uchar*) ftbw->word+1,
+ ftbw->len-1,
+ (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0);
+ }
+
+ if (r) /* not found */
+ {
+ if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
+ {
+ ftbw->docid[0]=HA_OFFSET_ERROR;
+ if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
+ {
+ /*
+ This word MUST BE present in every document returned,
+ so we can stop the search right now
+ */
+ ftb->state=INDEX_DONE;
+ return 1; /* search is done */
+ }
+ else
+ return 0;
+ }
+
+ /* going up to the first-level tree to continue search there */
+ _ma_dpointer(info, (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
+ ftbw->key_root=info->s->state.key_root[ftb->keynr];
+ ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
+ ftbw->off=0;
+ return _ft2_search(ftb, ftbw, 0);
+ }
+
+ /* matching key found */
+ memcpy(lastkey_buf, info->lastkey, info->lastkey_length);
+ if (lastkey_buf == ftbw->word)
+ ftbw->len=info->lastkey_length-extra;
+
+ /* going down ? */
+ if (subkeys<0)
+ {
+ /*
+ yep, going down, to the second-level tree
+ TODO here: subkey-based optimization
+ */
+ ftbw->off=off;
+ ftbw->key_root= info->cur_row.lastpos;
+ ftbw->keyinfo=& info->s->ft2_keyinfo;
+ r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root);
+ DBUG_ASSERT(r==0); /* found something */
+ memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length);
+ }
+ ftbw->docid[0]= info->cur_row.lastpos;
+ if (ftbw->flags & FTB_FLAG_YES)
+ *ftbw->max_docid= info->cur_row.lastpos;
+ return 0;
+}
+
+static void _ftb_init_index_search(FT_INFO *ftb)
+{
+ int i;
+ FTB_WORD *ftbw;
+
+ if ((ftb->state != READY && ftb->state !=INDEX_DONE) ||
+ ftb->keynr == NO_SUCH_KEY)
+ return;
+ ftb->state=INDEX_SEARCH;
+
+ for (i=ftb->queue.elements; i; i--)
+ {
+ ftbw=(FTB_WORD *)(ftb->queue.root[i]);
+
+ if (ftbw->flags & FTB_FLAG_TRUNC)
+ {
+ /*
+ special treatment for truncation operator
+ 1. there are some (besides this) +words
+ | no need to search in the index, it can never ADD new rows
+ | to the result, and to remove half-matched rows we do scan anyway
+ 2. -trunc*
+ | same as 1.
+ 3. in 1 and 2, +/- need not be on the same expr. level,
+ but can be on any upper level, as in +word +(trunc1* trunc2*)
+ 4. otherwise
+ | We have to index-search for this prefix.
+ | It may cause duplicates, as in the index (sorted by <word,docid>)
+ | <aaaa,row1>
+ | <aabb,row2>
+ | <aacc,row1>
+ | Searching for "aa*" will find row1 twice...
+ */
+ FTB_EXPR *ftbe;
+ for (ftbe=(FTB_EXPR*)ftbw;
+ ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
+ ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
+ {
+ if (ftbe->flags & FTB_FLAG_NO || /* 2 */
+ ftbe->up->ythresh - ftbe->up->yweaks >1) /* 1 */
+ {
+ FTB_EXPR *top_ftbe=ftbe->up;
+ ftbw->docid[0]=HA_OFFSET_ERROR;
+ for (ftbe=(FTB_EXPR *)ftbw;
+ ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
+ ftbe=ftbe->up)
+ ftbe->up->yweaks++;
+ ftbe=0;
+ break;
+ }
+ }
+ if (!ftbe)
+ continue;
+ /* 4 */
+ if (!is_tree_inited(& ftb->no_dupes))
+ init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
+ _ftb_no_dupes_cmp,0,0,0);
+ else
+ reset_tree(& ftb->no_dupes);
+ }
+
+ ftbw->off=0; /* in case of reinit */
+ if (_ft2_search(ftb, ftbw, 1))
+ return;
+ }
+ queue_fix(& ftb->queue);
+}
+
+
+FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr, uchar *query,
+ uint query_len, CHARSET_INFO *cs)
+{
+ FTB *ftb;
+ FTB_EXPR *ftbe;
+ FTB_WORD *ftbw;
+
+ if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
+ return 0;
+ ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean;
+ ftb->state=UNINITIALIZED;
+ ftb->info=info;
+ ftb->keynr=keynr;
+ ftb->charset=cs;
+ DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
+ ftb->with_scan=0;
+ ftb->lastpos=HA_OFFSET_ERROR;
+ bzero(& ftb->no_dupes, sizeof(TREE));
+ ftb->last_word= 0;
+
+ init_alloc_root(&ftb->mem_root, 1024, 1024);
+ ftb->queue.max_elements= 0;
+ if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR))))
+ goto err;
+ ftbe->weight=1;
+ ftbe->flags=FTB_FLAG_YES;
+ ftbe->nos=1;
+ ftbe->up=0;
+ ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
+ ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
+ ftbe->phrase= NULL;
+ ftbe->document= 0;
+ ftb->root=ftbe;
+ if (unlikely(_ftb_parse_query(ftb, query, query_len,
+ keynr == NO_SUCH_KEY ? &ft_default_parser :
+ info->s->keyinfo[keynr].parser)))
+ goto err;
+ /*
+ Hack: instead of init_queue, we'll use reinit queue to be able
+ to alloc queue with alloc_root()
+ */
+ if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
+ (ftb->queue.max_elements + 1) *
+ sizeof(void *))))
+ goto err;
+ reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
+ (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0);
+ for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
+ queue_insert(&ftb->queue, (uchar *)ftbw);
+ ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
+ sizeof(FTB_WORD *)*ftb->queue.elements);
+ memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
+ my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
+ (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset);
+ if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
+ ftb->state=READY;
+ return ftb;
+err:
+ free_root(& ftb->mem_root, MYF(0));
+ my_free((uchar*)ftb,MYF(0));
+ return 0;
+}
+
+
+typedef struct st_my_ftb_phrase_param
+{
+ LIST *phrase;
+ LIST *document;
+ CHARSET_INFO *cs;
+ uint phrase_length;
+ uint document_length;
+ uint match;
+} MY_FTB_PHRASE_PARAM;
+
+
+static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
+ FT_WORD *w= (FT_WORD *)phrase_param->document->data;
+ LIST *phrase, *document;
+ w->pos= word;
+ w->len= word_len;
+ phrase_param->document= phrase_param->document->prev;
+ if (phrase_param->phrase_length > phrase_param->document_length)
+ {
+ phrase_param->document_length++;
+ return 0;
+ }
+ /* TODO: rewrite phrase search to avoid
+ comparing the same word twice. */
+ for (phrase= phrase_param->phrase, document= phrase_param->document->next;
+ phrase; phrase= phrase->next, document= document->next)
+ {
+ FT_WORD *phrase_word= (FT_WORD *)phrase->data;
+ FT_WORD *document_word= (FT_WORD *)document->data;
+ if (my_strnncoll(phrase_param->cs,
+ (uchar*) phrase_word->pos, phrase_word->len,
+ (uchar*) document_word->pos, document_word->len))
+ return 0;
+ }
+ phrase_param->match++;
+ return 0;
+}
+
+
+static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
+ char *document, int len)
+{
+ FT_WORD word;
+ MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
+ const char *docend= document + len;
+ while (maria_ft_simple_get_word(phrase_param->cs, (uchar**) &document,
+ docend, &word, FALSE))
+ {
+ param->mysql_add_word(param, word.pos, word.len, 0);
+ if (phrase_param->match)
+ break;
+ }
+ return 0;
+}
+
+
+/*
+ Checks if given buffer matches phrase list.
+
+ SYNOPSIS
+ _ftb_check_phrase()
+ s0 start of buffer
+ e0 end of buffer
+ phrase broken into list phrase
+ cs charset info
+
+ RETURN VALUE
+ 1 is returned if phrase found, 0 else.
+ -1 is returned if error occurs.
+*/
+
+static int _ftb_check_phrase(FTB *ftb, const uchar *document, uint len,
+ FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
+{
+ MY_FTB_PHRASE_PARAM ftb_param;
+ MYSQL_FTPARSER_PARAM *param;
+ DBUG_ENTER("_ftb_check_phrase");
+ DBUG_ASSERT(parser);
+
+ if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
+ DBUG_RETURN(0);
+ ftb_param.phrase= ftbe->phrase;
+ ftb_param.document= ftbe->document;
+ ftb_param.cs= ftb->charset;
+ ftb_param.phrase_length= list_length(ftbe->phrase);
+ ftb_param.document_length= 1;
+ ftb_param.match= 0;
+
+ param->mysql_parse= ftb_check_phrase_internal;
+ param->mysql_add_word= ftb_phrase_add_word;
+ param->mysql_ftparam= (void *)&ftb_param;
+ param->cs= ftb->charset;
+ param->doc= (uchar *)document;
+ param->length= len;
+ param->flags= 0;
+ param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
+ if (unlikely(parser->parse(param)))
+ return -1;
+ DBUG_RETURN(ftb_param.match ? 1 : 0);
+}
+
+
+static int _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
+{
+ FT_SEG_ITERATOR ftsi;
+ FTB_EXPR *ftbe;
+ float weight=ftbw->weight;
+ int yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != 0);
+ my_off_t curdoc=ftbw->docid[mode];
+ struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ ftb->info->s->keyinfo[ftb->keynr].parser;
+
+ for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
+ {
+ ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
+ if (ftbe->docid[mode] != curdoc)
+ {
+ ftbe->cur_weight=0;
+ ftbe->yesses=ftbe->nos=0;
+ ftbe->docid[mode]=curdoc;
+ }
+ if (ftbe->nos)
+ break;
+ if (yn_flag & FTB_FLAG_YES)
+ {
+ weight /= ftbe->ythresh;
+ ftbe->cur_weight += weight;
+ if ((int) ++ftbe->yesses == ythresh)
+ {
+ yn_flag=ftbe->flags;
+ weight=ftbe->cur_weight*ftbe->weight;
+ if (mode && ftbe->phrase)
+ {
+ int found= 0;
+
+ memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
+ while (_ma_ft_segiterator(&ftsi) && !found)
+ {
+ if (!ftsi.pos)
+ continue;
+ found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
+ if (unlikely(found < 0))
+ return 1;
+ }
+ if (!found)
+ break;
+ } /* ftbe->quot */
+ }
+ else
+ break;
+ }
+ else
+ if (yn_flag & FTB_FLAG_NO)
+ {
+ /*
+ NOTE: special sort function of queue assures that all
+ (yn_flag & FTB_FLAG_NO) != 0
+ events for every particular subexpression will
+ "auto-magically" happen BEFORE all the
+ (yn_flag & FTB_FLAG_YES) != 0 events. So no
+ already matched expression can become not-matched again.
+ */
+ ++ftbe->nos;
+ break;
+ }
+ else
+ {
+ if (ftbe->ythresh)
+ weight/=3;
+ ftbe->cur_weight += weight;
+ if ((int) ftbe->yesses < ythresh)
+ break;
+ if (!(yn_flag & FTB_FLAG_WONLY))
+ yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
+ weight*= ftbe->weight;
+ }
+ }
+ return 0;
+}
+
+
+int maria_ft_boolean_read_next(FT_INFO *ftb, char *record)
+{
+ FTB_EXPR *ftbe;
+ FTB_WORD *ftbw;
+ MARIA_HA *info=ftb->info;
+ my_off_t curdoc;
+
+ if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
+ return -1;
+
+ /* black magic ON */
+ if ((int) _ma_check_index(info, ftb->keynr) < 0)
+ return my_errno;
+ if (_ma_readinfo(info, F_RDLCK, 1))
+ return my_errno;
+ /* black magic OFF */
+
+ if (!ftb->queue.elements)
+ return my_errno=HA_ERR_END_OF_FILE;
+
+ /* Attention!!! Address of a local variable is used here! See err: label */
+ ftb->queue.first_cmp_arg=(void *)&curdoc;
+
+ while (ftb->state == INDEX_SEARCH &&
+ (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
+ HA_OFFSET_ERROR)
+ {
+ while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
+ {
+ if (unlikely(_ftb_climb_the_tree(ftb, ftbw, 0)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM;
+ goto err;
+ }
+
+ /* update queue */
+ _ft2_search(ftb, ftbw, 0);
+ queue_replaced(& ftb->queue);
+ }
+
+ ftbe=ftb->root;
+ if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
+ ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
+ {
+ /* curdoc matched ! */
+ if (is_tree_inited(&ftb->no_dupes) &&
+ tree_insert(&ftb->no_dupes, &curdoc, 0,
+ ftb->no_dupes.custom_arg)->count >1)
+ /* but it managed already to get past this line once */
+ continue;
+
+ info->cur_row.lastpos= curdoc;
+ /* Clear all states, except that the table was updated */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ if (!(*info->read_record)(info, record, curdoc))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ if (ftb->with_scan && maria_ft_boolean_find_relevance(ftb,record,0)==0)
+ continue; /* no match */
+ my_errno=0;
+ goto err;
+ }
+ goto err;
+ }
+ }
+ ftb->state=INDEX_DONE;
+ my_errno=HA_ERR_END_OF_FILE;
+err:
+ ftb->queue.first_cmp_arg=(void *)0;
+ return my_errno;
+}
+
+
+typedef struct st_my_ftb_find_param
+{
+ FT_INFO *ftb;
+ FT_SEG_ITERATOR *ftsi;
+} MY_FTB_FIND_PARAM;
+
+
+static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
+ FT_INFO *ftb= ftb_param->ftb;
+ FTB_WORD *ftbw;
+ int a, b, c;
+ for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
+ {
+ ftbw= ftb->list[c];
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
+ (uchar*)ftbw->word+1, ftbw->len-1,
+ (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0)
+ b= c;
+ else
+ a= c;
+ }
+ for (; c >= 0; c--)
+ {
+ ftbw= ftb->list[c];
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
+ (uchar*)ftbw->word + 1,ftbw->len - 1,
+ (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
+ break;
+ if (ftbw->docid[1] == ftb->info->cur_row.lastpos)
+ continue;
+ ftbw->docid[1]= ftb->info->cur_row.lastpos;
+ if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
+ return 1;
+ }
+ return(0);
+}
+
+
+static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
+ char *doc, int len)
+{
+ MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
+ FT_INFO *ftb= ftb_param->ftb;
+ char *end= doc + len;
+ FT_WORD w;
+ while (maria_ft_simple_get_word(ftb->charset, (uchar**) &doc, end, &w, TRUE))
+ param->mysql_add_word(param, w.pos, w.len, 0);
+ return(0);
+}
+
+
+float maria_ft_boolean_find_relevance(FT_INFO *ftb, uchar *record, uint length)
+{
+ FTB_EXPR *ftbe;
+ FT_SEG_ITERATOR ftsi, ftsi2;
+ MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos;
+ MY_FTB_FIND_PARAM ftb_param;
+ MYSQL_FTPARSER_PARAM *param;
+ struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ ftb->info->s->keyinfo[ftb->keynr].parser;
+
+ if (docid == HA_OFFSET_ERROR)
+ return -2.0;
+ if (!ftb->queue.elements)
+ return 0;
+ if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
+ return 0;
+
+ if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
+ {
+ FTB_EXPR *x;
+ uint i;
+
+ for (i=0; i < ftb->queue.elements; i++)
+ {
+ ftb->list[i]->docid[1]=HA_OFFSET_ERROR;
+ for (x=ftb->list[i]->up; x; x=x->up)
+ x->docid[1]=HA_OFFSET_ERROR;
+ }
+ }
+
+ ftb->lastpos=docid;
+
+ if (ftb->keynr==NO_SUCH_KEY)
+ _ma_ft_segiterator_dummy_init(record, length, &ftsi);
+ else
+ _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
+ memcpy(&ftsi2, &ftsi, sizeof(ftsi));
+
+ ftb_param.ftb= ftb;
+ ftb_param.ftsi= &ftsi2;
+ param->mysql_parse= ftb_find_relevance_parse;
+ param->mysql_add_word= ftb_find_relevance_add_word;
+ param->mysql_ftparam= (void *)&ftb_param;
+ param->flags= 0;
+ param->cs= ftb->charset;
+ param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
+
+ while (_ma_ft_segiterator(&ftsi))
+ {
+ if (!ftsi.pos)
+ continue;
+ param->doc= (uchar *)ftsi.pos;
+ param->length= ftsi.len;
+ if (unlikely(parser->parse(param)))
+ return 0;
+ }
+ ftbe=ftb->root;
+ if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
+ ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
+ { /* row matched ! */
+ return ftbe->cur_weight;
+ }
+ else
+ { /* match failed ! */
+ return 0.0;
+ }
+}
+
+
+void maria_ft_boolean_close_search(FT_INFO *ftb)
+{
+ if (is_tree_inited(& ftb->no_dupes))
+ {
+ delete_tree(& ftb->no_dupes);
+ }
+ free_root(& ftb->mem_root, MYF(0));
+ my_free((uchar*)ftb,MYF(0));
+}
+
+
+float maria_ft_boolean_get_relevance(FT_INFO *ftb)
+{
+ return ftb->root->cur_weight;
+}
+
+
+void maria_ft_boolean_reinit_search(FT_INFO *ftb)
+{
+ _ftb_init_index_search(ftb);
+}
diff --git a/storage/maria/ma_ft_eval.c b/storage/maria/ma_ft_eval.c
new file mode 100644
index 00000000000..5fc67c6c664
--- /dev/null
+++ b/storage/maria/ma_ft_eval.c
@@ -0,0 +1,254 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code
+ added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
+
+#include "ma_ftdefs.h"
+#include "maria_ft_eval.h"
+#include <stdarg.h>
+#include <my_getopt.h>
+
+static void print_error(int exit_code, const char *fmt,...);
+static void get_options(int argc, char *argv[]);
+static int create_record(char *pos, FILE *file);
+static void usage();
+
+static struct my_option my_long_options[] =
+{
+ {"", 's', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'q', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '#', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+int main(int argc, char *argv[])
+{
+ MARIA_HA *file;
+ int i,j;
+
+ MY_INIT(argv[0]);
+ get_options(argc,argv);
+ bzero((char*)recinfo,sizeof(recinfo));
+
+ maria_init();
+ /* First define 2 columns */
+ recinfo[0].type=FIELD_SKIP_ENDSPACE;
+ recinfo[0].length=docid_length;
+ recinfo[1].type=FIELD_BLOB;
+ recinfo[1].length= 4+portable_sizeof_char_ptr;
+
+ /* Define a key over the first column */
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[0].seg[0].type= HA_KEYTYPE_TEXT;
+ keyinfo[0].seg[0].flag= HA_BLOB_PART;
+ keyinfo[0].seg[0].start=recinfo[0].length;
+ keyinfo[0].seg[0].length=key_length;
+ keyinfo[0].seg[0].null_bit=0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].bit_start=4;
+ keyinfo[0].seg[0].language=MY_CHARSET_CURRENT;
+ keyinfo[0].flag = HA_FULLTEXT;
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+ if (maria_create(filename,1,keyinfo,2,recinfo,0,NULL,(MARIA_CREATE_INFO*) 0,0))
+ goto err;
+ if (!(file=maria_open(filename,2,0)))
+ goto err;
+ if (!silent)
+ printf("Initializing stopwords\n");
+ maria_ft_init_stopwords(stopwordlist);
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ my_errno=0;
+ i=0;
+ while (create_record(record,df))
+ {
+ error=maria_write(file,record);
+ if (error)
+ printf("I= %2d maria_write: %d errno: %d\n",i,error,my_errno);
+ i++;
+ }
+ fclose(df);
+
+ if (maria_close(file)) goto err;
+ if (!silent)
+ printf("- Reopening file\n");
+ if (!(file=maria_open(filename,2,0))) goto err;
+ if (!silent)
+ printf("- Reading rows with key\n");
+ for (i=1;create_record(record,qf);i++)
+ {
+ FT_DOCLIST *result;
+ double w;
+ int t, err;
+
+ result=maria_ft_nlq_init_search(file,0,blob_record,(uint) strlen(blob_record),1);
+ if (!result)
+ {
+ printf("Query %d failed with errno %3d\n",i,my_errno);
+ goto err;
+ }
+ if (!silent)
+ printf("Query %d. Found: %d.\n",i,result->ndocs);
+ for (j=0;(err=maria_ft_nlq_read_next(result, read_record))==0;j++)
+ {
+ t=uint2korr(read_record);
+ w=maria_ft_nlq_get_relevance(result);
+ printf("%d %.*s %f\n",i,t,read_record+2,w);
+ }
+ if (err != HA_ERR_END_OF_FILE)
+ {
+ printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno);
+ goto err;
+ }
+ maria_ft_nlq_close_search(result);
+ }
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return (0);
+
+ err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+
+}
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch (optid) {
+ case 's':
+ if (stopwordlist && stopwordlist != maria_ft_precompiled_stopwords)
+ break;
+ {
+ FILE *f; char s[HA_FT_MAXLEN]; int i=0,n=SWL_INIT;
+
+ if (!(stopwordlist=(const char**) malloc(n*sizeof(char *))))
+ print_error(1,"malloc(%d)",n*sizeof(char *));
+ if (!(f=fopen(argument,"r")))
+ print_error(1,"fopen(%s)",argument);
+ while (!feof(f))
+ {
+ if (!(fgets(s,HA_FT_MAXLEN,f)))
+ print_error(1,"fgets(s,%d,%s)",HA_FT_MAXLEN,argument);
+ if (!(stopwordlist[i++]=strdup(s)))
+ print_error(1,"strdup(%s)",s);
+ if (i >= n)
+ {
+ n+=SWL_PLUS;
+ if (!(stopwordlist=(const char**) realloc((char*) stopwordlist,
+ n*sizeof(char *))))
+ print_error(1,"realloc(%d)",n*sizeof(char *));
+ }
+ }
+ fclose(f);
+ stopwordlist[i]=NULL;
+ break;
+ }
+ case 'q': silent=1; break;
+ case 'S': if (stopwordlist==maria_ft_precompiled_stopwords) stopwordlist=NULL; break;
+ case '#':
+ DBUG_PUSH (argument);
+ break;
+ case 'V':
+ case '?':
+ case 'h':
+ usage();
+ exit(1);
+ }
+ return 0;
+}
+
+
+static void get_options(int argc, char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ if (!(d_file=argv[optind])) print_error(1,"No d_file");
+ if (!(df=fopen(d_file,"r")))
+ print_error(1,"fopen(%s)",d_file);
+ if (!(q_file=argv[optind+1])) print_error(1,"No q_file");
+ if (!(qf=fopen(q_file,"r")))
+ print_error(1,"fopen(%s)",q_file);
+ return;
+} /* get options */
+
+
+static int create_record(char *pos, FILE *file)
+{
+ uint tmp; char *ptr;
+
+ bzero((char *)pos,MAX_REC_LENGTH);
+
+ /* column 1 - VARCHAR */
+ if (!(fgets(pos+2,MAX_REC_LENGTH-32,file)))
+ {
+ if (feof(file))
+ return 0;
+ else
+ print_error(1,"fgets(docid) - 1");
+ }
+ tmp=(uint) strlen(pos+2)-1;
+ int2store(pos,tmp);
+ pos+=recinfo[0].length;
+
+ /* column 2 - BLOB */
+
+ if (!(fgets(blob_record,MAX_BLOB_LENGTH,file)))
+ print_error(1,"fgets(docid) - 2");
+ tmp=(uint) strlen(blob_record);
+ int4store(pos,tmp);
+ ptr=blob_record;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ return 1;
+}
+
+/* VARARGS */
+
+static void print_error(int exit_code, const char *fmt,...)
+{
+ va_list args;
+
+ va_start(args,fmt);
+ fprintf(stderr,"%s: error: ",my_progname);
+ VOID(vfprintf(stderr, fmt, args));
+ VOID(fputc('\n',stderr));
+ fflush(stderr);
+ va_end(args);
+ exit(exit_code);
+}
+
+
+static void usage()
+{
+ printf("%s [options]\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/ma_ft_eval.h b/storage/maria/ma_ft_eval.h
new file mode 100644
index 00000000000..481943dfb0b
--- /dev/null
+++ b/storage/maria/ma_ft_eval.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2006 MySQL AB & Sergei A. Golubchik
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+const char **stopwordlist=maria_ft_precompiled_stopwords;
+
+#define MAX_REC_LENGTH 128
+#define MAX_BLOB_LENGTH 60000
+char record[MAX_REC_LENGTH], read_record[MAX_REC_LENGTH+MAX_BLOB_LENGTH];
+char blob_record[MAX_BLOB_LENGTH+20*20];
+
+char *filename= (char*) "EVAL";
+
+int silent=0, error=0;
+
+uint key_length=MAX_BLOB_LENGTH,docid_length=32;
+char *d_file, *q_file;
+FILE *df,*qf;
+
+MARIA_COLUMNDEF recinfo[3];
+MARIA_KEYDEF keyinfo[2];
+HA_KEYSEG keyseg[10];
+
+#define SWL_INIT 500
+#define SWL_PLUS 50
+
+#define MAX_LINE_LENGTH 128
+char line[MAX_LINE_LENGTH];
diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c
new file mode 100644
index 00000000000..afee14c22b5
--- /dev/null
+++ b/storage/maria/ma_ft_nlq_search.c
@@ -0,0 +1,375 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+#define FT_CORE
+#include "ma_ftdefs.h"
+
+/* search with natural language queries */
+
+typedef struct ft_doc_rec
+{
+ my_off_t dpos;
+ double weight;
+} FT_DOC;
+
+struct st_ft_info
+{
+ struct _ft_vft *please;
+ MARIA_HA *info;
+ int ndocs;
+ int curdoc;
+ FT_DOC doc[1];
+};
+
+typedef struct st_all_in_one
+{
+ MARIA_HA *info;
+ uint keynr;
+ CHARSET_INFO *charset;
+ uchar *keybuff;
+ TREE dtree;
+} ALL_IN_ONE;
+
+typedef struct st_ft_superdoc
+{
+ FT_DOC doc;
+ FT_WORD *word_ptr;
+ double tmp_weight;
+} FT_SUPERDOC;
+
+static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)),
+ FT_SUPERDOC *p1, FT_SUPERDOC *p2)
+{
+ if (p1->doc.dpos < p2->doc.dpos)
+ return -1;
+ if (p1->doc.dpos == p2->doc.dpos)
+ return 0;
+ return 1;
+}
+
+static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
+{
+ int subkeys, r;
+ uint keylen, doc_cnt;
+ FT_SUPERDOC sdoc, *sptr;
+ TREE_ELEMENT *selem;
+ double gweight=1;
+ MARIA_HA *info= aio->info;
+ uchar *keybuff= (uchar*) aio->keybuff;
+ MARIA_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr;
+ my_off_t key_root=info->s->state.key_root[aio->keynr];
+ uint extra=HA_FT_WLEN+info->s->base.rec_reflength;
+#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
+ float tmp_weight;
+#else
+#error
+#endif
+
+ DBUG_ENTER("walk_and_match");
+
+ word->weight=LWS_FOR_QUERY;
+
+ keylen= _ma_ft_make_key(info,aio->keynr,(char*) keybuff,word,0);
+ keylen-=HA_FT_WLEN;
+ doc_cnt=0;
+
+ /* Skip rows inserted by current inserted */
+ for (r= _ma_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ;
+ !r &&
+ (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
+ info->cur_row.lastpos >= info->state->data_file_length ;
+ r= _ma_search_next(info, keyinfo, info->lastkey,
+ info->lastkey_length, SEARCH_BIGGER, key_root))
+ ;
+
+ info->update|= HA_STATE_AKTIV; /* for _ma_test_if_changed() */
+
+ /* The following should be safe, even if we compare doubles */
+ while (!r && gweight)
+ {
+
+ if (keylen &&
+ ha_compare_text(aio->charset,
+ (uchar*) info->lastkey+1, info->lastkey_length-extra-1,
+ (uchar*) keybuff+1, keylen-1, 0, 0))
+ break;
+
+ if (subkeys<0)
+ {
+ if (doc_cnt)
+ DBUG_RETURN(1); /* index is corrupted */
+ /*
+ TODO here: unsafe optimization, should this word
+ be skipped (based on subkeys) ?
+ */
+ keybuff+=keylen;
+ keyinfo=& info->s->ft2_keyinfo;
+ key_root= info->cur_row.lastpos;
+ keylen=0;
+ r= _ma_search_first(info, keyinfo, key_root);
+ goto do_skip;
+ }
+#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
+ tmp_weight=*(float*)&subkeys;
+#else
+#error
+#endif
+ /* The following should be safe, even if we compare doubles */
+ if (tmp_weight==0)
+ DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */
+
+ sdoc.doc.dpos= info->cur_row.lastpos;
+
+ /* saving document matched into dtree */
+ if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg)))
+ DBUG_RETURN(1);
+
+ sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem);
+
+ if (selem->count==1) /* document's first match */
+ sptr->doc.weight=0;
+ else
+ sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight;
+
+ sptr->word_ptr=word;
+ sptr->tmp_weight=tmp_weight;
+
+ doc_cnt++;
+
+ gweight=word->weight*GWS_IN_USE;
+ if (gweight < 0 || doc_cnt > 2000000)
+ gweight=0;
+
+ if (_ma_test_if_changed(info) == 0)
+ r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
+ SEARCH_BIGGER, key_root);
+ else
+ r= _ma_search(info, keyinfo, info->lastkey, info->lastkey_length,
+ SEARCH_BIGGER, key_root);
+do_skip:
+ while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
+ !r && info->cur_row.lastpos >= info->state->data_file_length)
+ r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
+ SEARCH_BIGGER, key_root);
+
+ }
+ word->weight=gweight;
+
+ DBUG_RETURN(0);
+}
+
+
+static int walk_and_copy(FT_SUPERDOC *from,
+ uint32 count __attribute__((unused)), FT_DOC **to)
+{
+ DBUG_ENTER("walk_and_copy");
+ from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
+ (*to)->dpos=from->doc.dpos;
+ (*to)->weight=from->doc.weight;
+ (*to)++;
+ DBUG_RETURN(0);
+}
+
+static int walk_and_push(FT_SUPERDOC *from,
+ uint32 count __attribute__((unused)), QUEUE *best)
+{
+ DBUG_ENTER("walk_and_copy");
+ from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
+ set_if_smaller(best->elements, ft_query_expansion_limit-1);
+ queue_insert(best, (uchar *)& from->doc);
+ DBUG_RETURN(0);
+}
+
+
+static int FT_DOC_cmp(void *unused __attribute__((unused)),
+ FT_DOC *a, FT_DOC *b)
+{
+ return sgn(b->weight - a->weight);
+}
+
+
+FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, uchar *query,
+ uint query_len, uint flags, uchar *record)
+{
+ TREE wtree;
+ ALL_IN_ONE aio;
+ FT_DOC *dptr;
+ FT_INFO *dlist=NULL;
+ MARIA_RECORD_POS saved_lastpos= info->cur_row.lastpos;
+ struct st_mysql_ftparser *parser;
+ MYSQL_FTPARSER_PARAM *ftparser_param;
+ DBUG_ENTER("maria_ft_init_nlq_search");
+
+ /* black magic ON */
+ if ((int) (keynr = _ma_check_index(info,keynr)) < 0)
+ DBUG_RETURN(NULL);
+ if (_ma_readinfo(info,F_RDLCK,1))
+ DBUG_RETURN(NULL);
+ /* black magic OFF */
+
+ aio.info=info;
+ aio.keynr=keynr;
+ aio.charset=info->s->keyinfo[keynr].seg->charset;
+ aio.keybuff= (uchar*) info->lastkey+info->s->base.max_key_length;
+ parser= info->s->keyinfo[keynr].parser;
+ if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr, 0)))
+ goto err;
+
+ bzero(&wtree,sizeof(wtree));
+
+ init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
+ NULL, NULL);
+
+ maria_ft_parse_init(&wtree, aio.charset);
+ ftparser_param->flags= 0;
+ if (maria_ft_parse(&wtree, query, query_len, parser, ftparser_param,
+ &wtree.mem_root))
+ goto err;
+
+ if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
+ left_root_right))
+ goto err;
+
+ if (flags & FT_EXPAND && ft_query_expansion_limit)
+ {
+ QUEUE best;
+ init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
+ 0);
+ tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
+ &best, left_root_right);
+ while (best.elements)
+ {
+ my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
+ if (!(*info->read_record)(info, record, docid))
+ {
+ info->update|= HA_STATE_AKTIV;
+ ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY;
+ if (unlikely(_ma_ft_parse(&wtree, info, keynr, record, ftparser_param,
+ &wtree.mem_root)))
+ {
+ delete_queue(&best);
+ goto err;
+ }
+ }
+ }
+ delete_queue(&best);
+ reset_tree(&aio.dtree);
+ if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
+ left_root_right))
+ goto err;
+
+ }
+
+ /*
+ If ndocs == 0, this will not allocate RAM for FT_INFO.doc[],
+ so if ndocs == 0, FT_INFO.doc[] must not be accessed.
+ */
+ dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
+ sizeof(FT_DOC)*
+ (int)(aio.dtree.elements_in_tree-1),
+ MYF(0));
+ if (!dlist)
+ goto err;
+
+ dlist->please= (struct _ft_vft *) & _ma_ft_vft_nlq;
+ dlist->ndocs=aio.dtree.elements_in_tree;
+ dlist->curdoc=-1;
+ dlist->info=aio.info;
+ dptr=dlist->doc;
+
+ tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy,
+ &dptr, left_root_right);
+
+ if (flags & FT_SORTED)
+ my_qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC),
+ (qsort2_cmp)&FT_DOC_cmp, 0);
+
+err:
+ delete_tree(&aio.dtree);
+ delete_tree(&wtree);
+ info->cur_row.lastpos= saved_lastpos;
+ DBUG_RETURN(dlist);
+}
+
+
+int maria_ft_nlq_read_next(FT_INFO *handler, char *record)
+{
+ MARIA_HA *info= (MARIA_HA *) handler->info;
+
+ if (++handler->curdoc >= handler->ndocs)
+ {
+ --handler->curdoc;
+ return HA_ERR_END_OF_FILE;
+ }
+
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ info->cur_row.lastpos= handler->doc[handler->curdoc].dpos;
+ if (!(*info->read_record)(info, record, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ return 0;
+ }
+ return my_errno;
+}
+
+
+float maria_ft_nlq_find_relevance(FT_INFO *handler,
+ uchar *record __attribute__((unused)),
+ uint length __attribute__((unused)))
+{
+ int a,b,c;
+ FT_DOC *docs=handler->doc;
+ MARIA_RECORD_POS docid= handler->info->cur_row.lastpos;
+
+ if (docid == HA_POS_ERROR)
+ return -5.0;
+
+ /* Assuming docs[] is sorted by dpos... */
+
+ for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
+ {
+ if (docs[c].dpos > docid)
+ b=c;
+ else
+ a=c;
+ }
+ /* bounds check to avoid accessing unallocated handler->doc */
+ if (a < handler->ndocs && docs[a].dpos == docid)
+ return (float) docs[a].weight;
+ else
+ return 0.0;
+}
+
+
+void maria_ft_nlq_close_search(FT_INFO *handler)
+{
+ my_free((uchar*)handler,MYF(0));
+}
+
+
+float maria_ft_nlq_get_relevance(FT_INFO *handler)
+{
+ return (float) handler->doc[handler->curdoc].weight;
+}
+
+
+void maria_ft_nlq_reinit_search(FT_INFO *handler)
+{
+ handler->curdoc=-1;
+}
+
diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c
new file mode 100644
index 00000000000..2cbbb2dc5f7
--- /dev/null
+++ b/storage/maria/ma_ft_parser.c
@@ -0,0 +1,426 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+#include "ma_ftdefs.h"
+
+typedef struct st_maria_ft_docstat {
+ FT_WORD *list;
+ uint uniq;
+ double sum;
+} FT_DOCSTAT;
+
+
+typedef struct st_my_maria_ft_parser_param
+{
+ TREE *wtree;
+ MEM_ROOT *mem_root;
+} MY_FT_PARSER_PARAM;
+
+
+static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
+{
+ return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
+ (uchar*) w2->pos, w2->len, 0, 0);
+}
+
+static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
+{
+ word->weight=LWS_IN_USE;
+ docstat->sum+=word->weight;
+ memcpy_fixed((docstat->list)++,word,sizeof(FT_WORD));
+ return 0;
+}
+
+/* transforms tree of words into the array, applying normalization */
+
+FT_WORD * maria_ft_linearize(TREE *wtree, MEM_ROOT *mem_root)
+{
+ FT_WORD *wlist,*p;
+ FT_DOCSTAT docstat;
+ DBUG_ENTER("maria_ft_linearize");
+
+ if ((wlist=(FT_WORD *) alloc_root(mem_root, sizeof(FT_WORD)*
+ (1+wtree->elements_in_tree))))
+ {
+ docstat.list=wlist;
+ docstat.uniq=wtree->elements_in_tree;
+ docstat.sum=0;
+ tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right);
+ }
+ delete_tree(wtree);
+ if (!wlist)
+ DBUG_RETURN(NULL);
+
+ docstat.list->pos=NULL;
+
+ for (p=wlist;p->pos;p++)
+ {
+ p->weight=PRENORM_IN_USE;
+ }
+
+ for (p=wlist;p->pos;p++)
+ {
+ p->weight/=NORM_IN_USE;
+ }
+
+ DBUG_RETURN(wlist);
+}
+
+my_bool maria_ft_boolean_check_syntax_string(const uchar *str)
+{
+ uint i, j;
+
+ if (!str ||
+ (strlen(str)+1 != sizeof(ft_boolean_syntax)) ||
+ (str[0] != ' ' && str[1] != ' '))
+ return 1;
+ for (i=0; i<sizeof(ft_boolean_syntax); i++)
+ {
+ /* limiting to 7-bit ascii only */
+ if ((unsigned char)(str[i]) > 127 ||
+ my_isalnum(default_charset_info, str[i]))
+ return 1;
+ for (j=0; j<i; j++)
+ if (str[i] == str[j] && (i != 11 || j != 10))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ RETURN VALUE
+ 0 - eof
+ 1 - word found
+ 2 - left bracket
+ 3 - right bracket
+ 4 - stopword found
+*/
+uchar maria_ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
+ FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param)
+{
+ uchar *doc=*start;
+ int ctype;
+ uint mwc, length, mbl;
+
+ param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
+ param->weight_adjust= param->wasign= 0;
+ param->type= FT_TOKEN_EOF;
+
+ while (doc<end)
+ {
+ for (; doc < end; doc+= (mbl > 0 ? mbl : 1))
+ {
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ break;
+ if (*doc == FTB_RQUOT && param->quot)
+ {
+ param->quot=doc;
+ *start=doc+1;
+ param->type= FT_TOKEN_RIGHT_PAREN;
+ goto ret;
+ }
+ if (!param->quot)
+ {
+ if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
+ {
+ /* param->prev=' '; */
+ *start=doc+1;
+ if (*doc == FTB_LQUOT) param->quot=*start;
+ param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN);
+ goto ret;
+ }
+ if (param->prev == ' ')
+ {
+ if (*doc == FTB_YES ) { param->yesno=+1; continue; } else
+ if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else
+ if (*doc == FTB_NO ) { param->yesno=-1; continue; } else
+ if (*doc == FTB_INC ) { param->weight_adjust++; continue; } else
+ if (*doc == FTB_DEC ) { param->weight_adjust--; continue; } else
+ if (*doc == FTB_NEG ) { param->wasign= !param->wasign; continue; }
+ }
+ }
+ param->prev=*doc;
+ param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
+ param->weight_adjust= param->wasign= 0;
+ }
+
+ mwc=length=0;
+ for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
+ {
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ mwc=0;
+ else if (!misc_word_char(*doc) || mwc)
+ break;
+ else
+ mwc++;
+ }
+ param->prev='A'; /* be sure *prev is true_word_char */
+ word->len= (uint)(doc-word->pos) - mwc;
+ if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
+ doc++;
+
+ if (((length >= ft_min_word_len && !is_stopword(word->pos, word->len))
+ || param->trunc) && length < ft_max_word_len)
+ {
+ *start=doc;
+ param->type= FT_TOKEN_WORD;
+ goto ret;
+ }
+ else if (length) /* make sure length > 0 (if start contains spaces only) */
+ {
+ *start= doc;
+ param->type= FT_TOKEN_STOPWORD;
+ goto ret;
+ }
+ }
+ if (param->quot)
+ {
+ param->quot=*start=doc;
+ param->type= 3; /* FT_RBR */
+ goto ret;
+ }
+ret:
+ return param->type;
+}
+
+uchar maria_ft_simple_get_word(CHARSET_INFO *cs, uchar **start,
+ const uchar *end, FT_WORD *word,
+ my_bool skip_stopwords)
+{
+ uchar *doc= *start;
+ uint mwc, length, mbl;
+ int ctype;
+ DBUG_ENTER("maria_ft_simple_get_word");
+
+ do
+ {
+ for (;; doc+= (mbl > 0 ? mbl : 1))
+ {
+ if (doc >= end)
+ DBUG_RETURN(0);
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ break;
+ }
+
+ mwc= length= 0;
+ for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
+ {
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ mwc= 0;
+ else if (!misc_word_char(*doc) || mwc)
+ break;
+ else
+ mwc++;
+ }
+
+ word->len= (uint)(doc-word->pos) - mwc;
+
+ if (skip_stopwords == FALSE ||
+ (length >= ft_min_word_len && length < ft_max_word_len &&
+ !is_stopword(word->pos, word->len)))
+ {
+ *start= doc;
+ DBUG_RETURN(1);
+ }
+ } while (doc < end);
+ DBUG_RETURN(0);
+}
+
+void maria_ft_parse_init(TREE *wtree, CHARSET_INFO *cs)
+{
+ DBUG_ENTER("maria_ft_parse_init");
+ if (!is_tree_inited(wtree))
+ init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs);
+ DBUG_VOID_RETURN;
+}
+
+
+static int maria_ft_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ TREE *wtree;
+ FT_WORD w;
+ MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam;
+ DBUG_ENTER("maria_ft_add_word");
+ wtree= ft_param->wtree;
+ if (param->flags & MYSQL_FTFLAGS_NEED_COPY)
+ {
+ uchar *ptr;
+ DBUG_ASSERT(wtree->with_delete == 0);
+ ptr= (uchar *)alloc_root(ft_param->mem_root, word_len);
+ memcpy(ptr, word, word_len);
+ w.pos= ptr;
+ }
+ else
+ w.pos= word;
+ w.len= word_len;
+ if (!tree_insert(wtree, &w, 0, wtree->custom_arg))
+ {
+ delete_tree(wtree);
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+static int maria_ft_parse_internal(MYSQL_FTPARSER_PARAM *param,
+ char *doc_arg, int doc_len)
+{
+ uchar *doc= (uchar*) doc_arg;
+ uchar *end= doc + doc_len;
+ MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam;
+ TREE *wtree= ft_param->wtree;
+ FT_WORD w;
+ DBUG_ENTER("maria_ft_parse_internal");
+
+ while (maria_ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
+ if (param->mysql_add_word(param, w.pos, w.len, 0))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+int maria_ft_parse(TREE *wtree, uchar *doc, int doclen,
+ struct st_mysql_ftparser *parser,
+ MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root)
+{
+ MY_FT_PARSER_PARAM my_param;
+ DBUG_ENTER("maria_ft_parse");
+ DBUG_ASSERT(parser);
+ my_param.wtree= wtree;
+ my_param.mem_root= mem_root;
+
+ param->mysql_parse= maria_ft_parse_internal;
+ param->mysql_add_word= maria_ft_add_word;
+ param->mysql_ftparam= &my_param;
+ param->cs= wtree->custom_arg;
+ param->doc= doc;
+ param->length= doclen;
+ param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
+ DBUG_RETURN(parser->parse(param));
+}
+
+
+#define MAX_PARAM_NR 2
+MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info,
+ uint keynr, uint paramnr)
+{
+ uint32 ftparser_nr;
+ struct st_mysql_ftparser *parser;
+ if (! info->ftparser_param)
+ {
+ /* info->ftparser_param can not be zero after the initialization,
+ because it always includes built-in fulltext parser. And built-in
+ parser can be called even if the table has no fulltext indexes and
+ no varchar/text fields. */
+ if (! info->s->ftparsers)
+ {
+ /* It's ok that modification to shared structure is done w/o mutex
+ locks, because all threads would set the same variables to the
+ same values. */
+ uint i, j, keys= info->s->state.header.keys, ftparsers= 1;
+ for (i= 0; i < keys; i++)
+ {
+ MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i];
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ for (j= 0;; j++)
+ {
+ if (j == i)
+ {
+ keyinfo->ftparser_nr= ftparsers++;
+ break;
+ }
+ if (info->s->keyinfo[j].flag & HA_FULLTEXT &&
+ keyinfo->parser == info->s->keyinfo[j].parser)
+ {
+ keyinfo->ftparser_nr= info->s->keyinfo[j].ftparser_nr;
+ break;
+ }
+ }
+ }
+ }
+ info->s->ftparsers= ftparsers;
+ }
+ /*
+ We have to allocate two MYSQL_FTPARSER_PARAM structures per plugin
+ because in a boolean search a parser is called recursively
+ ftb_find_relevance* calls ftb_check_phrase*
+ (MAX_PARAM_NR=2)
+ */
+ info->ftparser_param= (MYSQL_FTPARSER_PARAM *)
+ my_malloc(MAX_PARAM_NR * sizeof(MYSQL_FTPARSER_PARAM) *
+ info->s->ftparsers, MYF(MY_WME|MY_ZEROFILL));
+ init_alloc_root(&info->ft_memroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
+ if (! info->ftparser_param)
+ return 0;
+ }
+ if (keynr == NO_SUCH_KEY)
+ {
+ ftparser_nr= 0;
+ parser= &ft_default_parser;
+ }
+ else
+ {
+ ftparser_nr= info->s->keyinfo[keynr].ftparser_nr;
+ parser= info->s->keyinfo[keynr].parser;
+ }
+ DBUG_ASSERT(paramnr < MAX_PARAM_NR);
+ ftparser_nr= ftparser_nr*MAX_PARAM_NR + paramnr;
+ if (! info->ftparser_param[ftparser_nr].mysql_add_word)
+ {
+ /* Note, that mysql_add_word is used here as a flag:
+ mysql_add_word == 0 - parser is not initialized
+ mysql_add_word != 0 - parser is initialized, or no
+ initialization needed. */
+ info->ftparser_param[ftparser_nr].mysql_add_word= (void *)1;
+ if (parser->init && parser->init(&info->ftparser_param[ftparser_nr]))
+ return 0;
+ }
+ return &info->ftparser_param[ftparser_nr];
+}
+
+
+void maria_ftparser_call_deinitializer(MARIA_HA *info)
+{
+ uint i, j, keys= info->s->state.header.keys;
+ free_root(&info->ft_memroot, MYF(0));
+ if (! info->ftparser_param)
+ return;
+ for (i= 0; i < keys; i++)
+ {
+ MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i];
+ for (j=0; j < MAX_PARAM_NR; j++)
+ {
+ MYSQL_FTPARSER_PARAM *ftparser_param=
+ &info->ftparser_param[keyinfo->ftparser_nr*MAX_PARAM_NR + j];
+ if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word)
+ {
+ if (keyinfo->parser->deinit)
+ keyinfo->parser->deinit(ftparser_param);
+ ftparser_param->mysql_add_word= 0;
+ }
+ else
+ break;
+ }
+ }
+}
diff --git a/storage/maria/ma_ft_stem.c b/storage/maria/ma_ft_stem.c
new file mode 100644
index 00000000000..06fc0b2df6c
--- /dev/null
+++ b/storage/maria/ma_ft_stem.c
@@ -0,0 +1,18 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* mulitingual stem */
diff --git a/storage/maria/ma_ft_test1.c b/storage/maria/ma_ft_test1.c
new file mode 100644
index 00000000000..4c98e766234
--- /dev/null
+++ b/storage/maria/ma_ft_test1.c
@@ -0,0 +1,317 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code
+ added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
+
+#include "ma_ftdefs.h"
+#include "maria_ft_test1.h"
+#include <my_getopt.h>
+
+static int key_field=FIELD_VARCHAR,extra_field=FIELD_SKIP_ENDSPACE;
+static uint key_length=200,extra_length=50;
+static int key_type=HA_KEYTYPE_TEXT;
+static int verbose=0,silent=0,skip_update=0,
+ no_keys=0,no_stopwords=0,no_search=0,no_fulltext=0;
+static int create_flag=0,error=0;
+
+#define MAX_REC_LENGTH 300
+static char record[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH];
+
+static int run_test(const char *filename);
+static void get_options(int argc, char *argv[]);
+static void create_record(char *, int);
+static void usage();
+
+static struct my_option my_long_options[] =
+{
+ {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 's', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'N', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'K', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'F', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'U', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '#', "", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+int main(int argc, char *argv[])
+{
+ MY_INIT(argv[0]);
+
+ get_options(argc,argv);
+ maria_init();
+
+ exit(run_test("FT1"));
+}
+
+static MARIA_COLUMNDEF recinfo[3];
+static MARIA_KEYDEF keyinfo[2];
+static HA_KEYSEG keyseg[10];
+
+static int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ int i,j;
+ my_off_t pos;
+
+ bzero((char*) recinfo,sizeof(recinfo));
+
+ /* First define 2 columns */
+ recinfo[0].type=extra_field;
+ recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr :
+ extra_length);
+ if (extra_field == FIELD_VARCHAR)
+ recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length);
+ recinfo[1].type=key_field;
+ recinfo[1].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr :
+ key_length);
+ if (key_field == FIELD_VARCHAR)
+ recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);
+
+ /* Define a key over the first column */
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[0].seg[0].type= key_type;
+ keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART:
+ (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0;
+ keyinfo[0].seg[0].start=recinfo[0].length;
+ keyinfo[0].seg[0].length=key_length;
+ keyinfo[0].seg[0].null_bit= 0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].language= default_charset_info->number;
+ keyinfo[0].flag = (no_fulltext?HA_PACK_KEY:HA_FULLTEXT);
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+ if (maria_create(filename,(no_keys?0:1),keyinfo,2,recinfo,0,NULL,
+ (MARIA_CREATE_INFO*) 0, create_flag))
+ goto err;
+ if (!(file=maria_open(filename,2,0)))
+ goto err;
+
+ if (!silent)
+ printf("- %s stopwords\n",no_stopwords?"Skipping":"Initializing");
+ maria_ft_init_stopwords(no_stopwords?NULL:maria_ft_precompiled_stopwords);
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ my_errno=0;
+ for (i=NUPD ; i<NDATAS; i++ )
+ {
+ create_record(record,i);
+ error=maria_write(file,record);
+ if (verbose || error)
+ printf("I= %2d maria_write: %d errno: %d, record: %s\n",
+ i,error,my_errno,data[i].f0);
+ }
+
+ if (!skip_update)
+ {
+ if (!silent)
+ printf("- Updating rows\n");
+
+ /* Read through all rows and update them */
+ pos=(ha_rows) 0;
+ i=0;
+ while ((error=maria_rrnd(file,read_record,pos)) == 0)
+ {
+ create_record(record,NUPD-i-1);
+ if (maria_update(file,read_record,record))
+ {
+ printf("Can't update row: %.*s, error: %d\n",
+ keyinfo[0].seg[0].length,record,my_errno);
+ }
+ if(++i == NUPD) break;
+ pos=HA_OFFSET_ERROR;
+ }
+ if (i != NUPD)
+ printf("Found %d of %d rows\n", i,NUPD);
+ }
+
+ if (maria_close(file)) goto err;
+ if(no_search) return 0;
+ if (!silent)
+ printf("- Reopening file\n");
+ if (!(file=maria_open(filename,2,0))) goto err;
+ if (!silent)
+ printf("- Reading rows with key\n");
+ for (i=0 ; i < NQUERIES ; i++)
+ {
+ FT_DOCLIST *result;
+ result=maria_ft_nlq_init_search(file,0,(char*) query[i],strlen(query[i]),1);
+ if(!result)
+ {
+ printf("Query %d: `%s' failed with errno %3d\n",i,query[i],my_errno);
+ continue;
+ }
+ printf("Query %d: `%s'. Found: %d. Top five documents:\n",
+ i,query[i],result->ndocs);
+ for (j=0;j<5;j++)
+ {
+ double w; int err;
+ err= maria_ft_nlq_read_next(result, read_record);
+ if (err==HA_ERR_END_OF_FILE)
+ {
+ printf("No more matches!\n");
+ break;
+ }
+ else if (err)
+ {
+ printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno);
+ break;
+ }
+ w=maria_ft_nlq_get_relevance(result);
+ if (key_field == FIELD_VARCHAR)
+ {
+ uint l;
+ char *p;
+ p=recinfo[0].length+read_record;
+ l=uint2korr(p);
+ printf("%10.7f: %.*s\n",w,(int) l,p+2);
+ }
+ else
+ printf("%10.7f: %.*s\n",w,recinfo[1].length,
+ recinfo[0].length+read_record);
+ }
+ maria_ft_nlq_close_search(result);
+ }
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return (0);
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+}
+
+static char blob_key[MAX_REC_LENGTH];
+/* static char blob_record[MAX_REC_LENGTH+20*20]; */
+
+void create_record(char *pos, int n)
+{
+ bzero((char*) pos,MAX_REC_LENGTH);
+ if (recinfo[0].type == FIELD_BLOB)
+ {
+ uint tmp;
+ char *ptr;
+ strnmov(blob_key,data[n].f0,keyinfo[0].seg[0].length);
+ tmp=strlen(blob_key);
+ int4store(pos,tmp);
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ pos+=recinfo[0].length;
+ }
+ else if (recinfo[0].type == FIELD_VARCHAR)
+ {
+ uint tmp;
+ /* -1 is here because pack_length is stored in seg->length */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1);
+ strnmov(pos+pack_length,data[n].f0,keyinfo[0].seg[0].length);
+ tmp=strlen(pos+pack_length);
+ if (pack_length == 1)
+ *pos= (char) tmp;
+ else
+ int2store(pos,tmp);
+ pos+=recinfo[0].length;
+ }
+ else
+ {
+ strnmov(pos,data[n].f0,keyinfo[0].seg[0].length);
+ pos+=recinfo[0].length;
+ }
+ if (recinfo[1].type == FIELD_BLOB)
+ {
+ uint tmp;
+ char *ptr;
+ strnmov(blob_key,data[n].f2,keyinfo[0].seg[0].length);
+ tmp=strlen(blob_key);
+ int4store(pos,tmp);
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ pos+=recinfo[1].length;
+ }
+ else if (recinfo[1].type == FIELD_VARCHAR)
+ {
+ uint tmp;
+ /* -1 is here because pack_length is stored in seg->length */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1);
+ strnmov(pos+pack_length,data[n].f2,keyinfo[0].seg[0].length);
+ tmp=strlen(pos+1);
+ if (pack_length == 1)
+ *pos= (char) tmp;
+ else
+ int2store(pos,tmp);
+ pos+=recinfo[1].length;
+ }
+ else
+ {
+ strnmov(pos,data[n].f2,keyinfo[0].seg[0].length);
+ pos+=recinfo[1].length;
+ }
+}
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch(optid) {
+ case 'v': verbose=1; break;
+ case 's': silent=1; break;
+ case 'F': no_fulltext=1; no_search=1;
+ case 'U': skip_update=1; break;
+ case 'K': no_keys=no_search=1; break;
+ case 'N': no_search=1; break;
+ case 'S': no_stopwords=1; break;
+ case '#':
+ DBUG_PUSH (argument);
+ break;
+ case 'V':
+ case '?':
+ case 'h':
+ usage();
+ exit(1);
+ }
+ return 0;
+}
+
+/* Read options */
+
+static void get_options(int argc,char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(ho_error);
+ return;
+} /* get options */
+
+
+static void usage()
+{
+ printf("%s [options]\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/ma_ft_test1.h b/storage/maria/ma_ft_test1.h
new file mode 100644
index 00000000000..5883c42f5c5
--- /dev/null
+++ b/storage/maria/ma_ft_test1.h
@@ -0,0 +1,420 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+#define NUPD 20
+#define NDATAS 389
+struct { const char *f0, *f2; } data[NDATAS] = {
+ {"1", "General Information about MySQL"},
+ {"1.1", "What is MySQL?"},
+ {"1.2", "About this manual"},
+ {"1.3", "History of MySQL"},
+ {"1.4", "The main features of MySQL"},
+ {"1.5", "General SQL information and tutorials"},
+ {"1.6", "Useful MySQL-related links"},
+ {"1.7", "What are stored procedures and triggers and so on?"},
+ {"2", "MySQL mailing lists and how to ask questions/give error (bug) reports"},
+ {"2.1", "Subscribing to/un-subscribing from the MySQL mailing list"},
+ {"2.2", "Asking questions or reporting bugs"},
+ {"2.3", "I think I have found a bug. What information do you need to help me?"},
+ {"2.3.1", "MySQL keeps crashing"},
+ {"2.4", "Guidelines for answering questions on the mailing list"},
+ {"3", "Licensing or When do I have/want to pay for MySQL?"},
+ {"3.1", "How much does MySQL cost?"},
+ {"3.2", "How do I get commercial support?"},
+ {"3.2.1", "Types of commercial support"},
+ {"3.2.1.1", "Basic email support"},
+ {"3.2.1.2", "Extended email support"},
+/*------------------------------- NUPD=20 -------------------------------*/
+ {"3.2.1.3", "Asking: Login support"},
+ {"3.2.1.4", "Extended login support"},
+ {"3.3", "How do I pay for licenses/support?"},
+ {"3.4", "Who do I contact when I want more information about licensing/support?"},
+ {"3.5", "What Copyright does MySQL use?"},
+ {"3.6", "When may I distribute MySQL commercially without a fee?"},
+ {"3.7", "I want to sell a product that can be configured to use MySQL"},
+ {"3.8", "I am running a commercial web server using MySQL"},
+ {"3.9", "Do I need a license to sell commercial Perl/tcl/PHP/Web+ etc applications?"},
+ {"3.10", "Possible future changes in the licensing"},
+ {"4", "Compiling and installing MySQL"},
+ {"4.1", "How do I get MySQL?"},
+ {"4.2", "Which MySQL version should I use?"},
+ {"4.3", "How/when will you release updates?"},
+ {"4.4", "What operating systems does MySQL support?"},
+ {"4.5", "Compiling MySQL from source code"},
+ {"4.5.1", "Quick installation overview"},
+ {"4.5.2", "Usual configure switches"},
+ {"4.5.3", "Applying a patch"},
+ {"4.6", "Problems compiling?"},
+ {"4.7", "General compilation notes"},
+ {"4.8", "MIT-pthreads notes (FreeBSD)"},
+ {"4.9", "Perl installation comments"},
+ {"4.10", "Special things to consider for some machine/OS combinations"},
+ {"4.10.1", "Solaris notes"},
+ {"4.10.2", "SunOS 4 notes"},
+ {"4.10.3", "Linux notes for all versions"},
+ {"4.10.3.1", "Linux-x86 notes"},
+ {"4.10.3.2", "RedHat 5.0"},
+ {"4.10.3.3", "RedHat 5.1"},
+ {"4.10.3.4", "Linux-Sparc notes"},
+ {"4.10.3.5", "Linux-Alpha notes"},
+ {"4.10.3.6", "MkLinux notes"},
+ {"4.10.4", "Alpha-DEC-Unix notes"},
+ {"4.10.5", "Alpha-DEC-OSF1 notes"},
+ {"4.10.6", "SGI-IRIX notes"},
+ {"4.10.7", "FreeBSD notes"},
+ {"4.10.7.1", "FreeBSD-3.0 notes"},
+ {"4.10.8", "BSD/OS 2.# notes"},
+ {"4.10.8.1", "BSD/OS 3.# notes"},
+ {"4.10.9", "SCO notes"},
+ {"4.10.10", "SCO Unixware 7.0 notes"},
+ {"4.10.11", "IBM-AIX notes"},
+ {"4.10.12", "HP-UX notes"},
+ {"4.11", "TcX binaries"},
+ {"4.12", "Win32 notes"},
+ {"4.13", "Installation instructions for MySQL binary releases"},
+ {"4.13.1", "How to get MySQL Perl support working"},
+ {"4.13.2", "Linux notes"},
+ {"4.13.3", "HP-UX notes"},
+ {"4.13.4", "Linking client libraries"},
+ {"4.14", "Problems running mysql_install_db"},
+ {"4.15", "Problems starting MySQL"},
+ {"4.16", "Automatic start/stop of MySQL"},
+ {"4.17", "Option files"},
+ {"5", "How standards-compatible is MySQL?"},
+ {"5.1", "What extensions has MySQL to ANSI SQL92?"},
+ {"5.2", "What functionality is missing in MySQL?"},
+ {"5.2.1", "Sub-selects"},
+ {"5.2.2", "SELECT INTO TABLE"},
+ {"5.2.3", "Transactions"},
+ {"5.2.4", "Triggers"},
+ {"5.2.5", "Foreign Keys"},
+ {"5.2.5.1", "Some reasons NOT to use FOREIGN KEYS"},
+ {"5.2.6", "Views"},
+ {"5.2.7", "-- as start of a comment"},
+ {"5.3", "What standards does MySQL follow?"},
+ {"5.4", "What functions exist only for compatibility?"},
+ {"5.5", "Limitations of BLOB and TEXT types"},
+ {"5.6", "How to cope without COMMIT-ROLLBACK"},
+ {"6", "The MySQL access privilege system"},
+ {"6.1", "What the privilege system does"},
+ {"6.2", "Connecting to the MySQL server"},
+ {"6.2.1", "Keeping your password secure"},
+ {"6.3", "Privileges provided by MySQL"},
+ {"6.4", "How the privilege system works"},
+ {"6.5", "The privilege tables"},
+ {"6.6", "Setting up the initial MySQL privileges"},
+ {"6.7", "Adding new user privileges to MySQL"},
+ {"6.8", "An example permission setup"},
+ {"6.9", "Causes of Access denied errors"},
+ {"6.10", "How to make MySQL secure against crackers"},
+ {"7", "MySQL language reference"},
+ {"7.1", "Literals: how to write strings and numbers"},
+ {"7.1.1", "Strings"},
+ {"7.1.2", "Numbers"},
+ {"7.1.3", "NULL values"},
+ {"7.1.4", "Database, table, index, column and alias names"},
+ {"7.1.4.1", "Case sensitivity in names"},
+ {"7.2", "Column types"},
+ {"7.2.1", "Column type storage requirements"},
+ {"7.2.5", "Numeric types"},
+ {"7.2.6", "Date and time types"},
+ {"7.2.6.1", "The DATE type"},
+ {"7.2.6.2", "The TIME type"},
+ {"7.2.6.3", "The DATETIME type"},
+ {"7.2.6.4", "The TIMESTAMP type"},
+ {"7.2.6.5", "The YEAR type"},
+ {"7.2.6.6", "Miscellaneous date and time properties"},
+ {"7.2.7", "String types"},
+ {"7.2.7.1", "The CHAR and VARCHAR types"},
+ {"7.2.7.2", "The BLOB and TEXT types"},
+ {"7.2.7.3", "The ENUM type"},
+ {"7.2.7.4", "The SET type"},
+ {"7.2.8", "Choosing the right type for a column"},
+ {"7.2.9", "Column indexes"},
+ {"7.2.10", "Multiple-column indexes"},
+ {"7.2.11", "Using column types from other database engines"},
+ {"7.3", "Functions for use in SELECT and WHERE clauses"},
+ {"7.3.1", "Grouping functions"},
+ {"7.3.2", "Normal arithmetic operations"},
+ {"7.3.3", "Bit functions"},
+ {"7.3.4", "Logical operations"},
+ {"7.3.5", "Comparison operators"},
+ {"7.3.6", "String comparison functions"},
+ {"7.3.7", "Control flow functions"},
+ {"7.3.8", "Mathematical functions"},
+ {"7.3.9", "String functions"},
+ {"7.3.10", "Date and time functions"},
+ {"7.3.11", "Miscellaneous functions"},
+ {"7.3.12", "Functions for use with GROUP BY clauses"},
+ {"7.4", "CREATE DATABASE syntax"},
+ {"7.5", "DROP DATABASE syntax"},
+ {"7.6", "CREATE TABLE syntax"},
+ {"7.7", "ALTER TABLE syntax"},
+ {"7.8", "OPTIMIZE TABLE syntax"},
+ {"7.9", "DROP TABLE syntax"},
+ {"7.10", "DELETE syntax"},
+ {"7.11", "SELECT syntax"},
+ {"7.12", "JOIN syntax"},
+ {"7.13", "INSERT syntax"},
+ {"7.14", "REPLACE syntax"},
+ {"7.15", "LOAD DATA INFILE syntax"},
+ {"7.16", "UPDATE syntax"},
+ {"7.17", "USE syntax"},
+ {"7.18", "SHOW syntax (Get information about tables, columns...)"},
+ {"7.19", "EXPLAIN syntax (Get information about a SELECT)"},
+ {"7.20", "DESCRIBE syntax (Get information about columns)"},
+ {"7.21", "LOCK TABLES/UNLOCK TABLES syntax"},
+ {"7.22", "SET OPTION syntax"},
+ {"7.23", "GRANT syntax (Compatibility function)"},
+ {"7.24", "CREATE INDEX syntax (Compatibility function)"},
+ {"7.25", "DROP INDEX syntax (Compatibility function)"},
+ {"7.26", "Comment syntax"},
+ {"7.27", "CREATE FUNCTION/DROP FUNCTION syntax"},
+ {"7.28", "Is MySQL picky about reserved words?"},
+ {"8", "Example SQL queries"},
+ {"8.1", "Queries from twin project"},
+ {"8.1.1", "Find all non-distributed twins"},
+ {"8.1.2", "Show a table on twin pair status"},
+ {"9", "How safe/stable is MySQL?"},
+ {"9.1", "How stable is MySQL?"},
+ {"9.2", "Why are there is so many releases of MySQL?"},
+ {"9.3", "Checking a table for errors"},
+ {"9.4", "How to repair tables"},
+ {"9.5", "Is there anything special to do when upgrading/downgrading MySQL?"},
+ {"9.5.1", "Upgrading from a 3.21 version to 3.22"},
+ {"9.5.2", "Upgrading from a 3.20 version to 3.21"},
+ {"9.5.3", "Upgrading to another architecture"},
+ {"9.6", "Year 2000 compliance"},
+ {"10", "MySQL Server functions"},
+ {"10.1", "What languages are supported by MySQL?"},
+ {"10.1.1", "Character set used for data &#38; sorting"},
+ {"10.2", "The update log"},
+ {"10.3", "How big can MySQL tables be?"},
+ {"11", "Getting maximum performance from MySQL"},
+ {"11.1", "How does one change the size of MySQL buffers?"},
+ {"11.2", "How compiling and linking affects the speed of MySQL"},
+ {"11.3", "How does MySQL use memory?"},
+ {"11.4", "How does MySQL use indexes?"},
+ {"11.5", "What optimizations are done on WHERE clauses?"},
+ {"11.6", "How does MySQL open &#38; close tables?"},
+ {"11.6.0.1", "What are the drawbacks of creating possibly thousands of tables in a database?"},
+ {"11.7", "How does MySQL lock tables?"},
+ {"11.8", "How should I arrange my table to be as fast/small as possible?"},
+ {"11.9", "What affects the speed of INSERT statements?"},
+ {"11.10", "What affects the speed DELETE statements?"},
+ {"11.11", "How do I get MySQL to run at full speed?"},
+ {"11.12", "What are the different row formats? Or, when should VARCHAR/CHAR be used?"},
+ {"11.13", "Why so many open tables?"},
+ {"12", "MySQL benchmark suite"},
+ {"13", "MySQL Utilites"},
+ {"13.1", "Overview of the different MySQL programs"},
+ {"13.2", "The MySQL table check, optimize and repair program"},
+ {"13.2.1", "isamchk memory use"},
+ {"13.2.2", "Getting low-level table information"},
+ {"13.3", "The MySQL compressed read-only table generator"},
+ {"14", "Adding new functions to MySQL"},
+ {"15", "MySQL ODBC Support"},
+ {"15.1", "Operating systems supported by MyODBC"},
+ {"15.2", "How to report problems with MyODBC"},
+ {"15.3", "Programs known to work with MyODBC"},
+ {"15.4", "How to fill in the various fields in the ODBC administrator program"},
+ {"15.5", "How to get the value of an AUTO_INCREMENT column in ODBC"},
+ {"16", "Problems and common errors"},
+ {"16.1", "Some common errors when using MySQL"},
+ {"16.1.1", "MySQL server has gone away error"},
+ {"16.1.2", "Can't connect to local MySQL server error"},
+ {"16.1.3", "Out of memory error"},
+ {"16.1.4", "Packet too large error"},
+ {"16.1.5", "The table is full error"},
+ {"16.1.6", "Commands out of sync error in client"},
+ {"16.1.7", "Removing user error"},
+ {"16.2", "How MySQL handles a full disk"},
+ {"16.3", "How to run SQL commands from a text file"},
+ {"16.4", "Where MySQL stores temporary files"},
+ {"16.5", "Access denied error"},
+ {"16.6", "How to run MySQL as a normal user"},
+ {"16.7", "Problems with file permissions"},
+ {"16.8", "File not found"},
+ {"16.9", "Problems using DATE columns"},
+ {"16.10", "Case sensitivity in searches"},
+ {"16.11", "Problems with NULL values"},
+ {"17", "Solving some common problems with MySQL"},
+ {"17.1", "Database replication"},
+ {"17.2", "Database backups"},
+ {"18", "MySQL client tools and API's"},
+ {"18.1", "MySQL C API"},
+ {"18.2", "C API datatypes"},
+ {"18.3", "C API function overview"},
+ {"18.4", "C API function descriptions"},
+ {"18.4.1", "mysql_affected_rows()"},
+ {"18.4.2", "mysql_close()"},
+ {"18.4.3", "mysql_connect()"},
+ {"18.4.4", "mysql_create_db()"},
+ {"18.4.5", "mysql_data_seek()"},
+ {"18.4.6", "mysql_debug()"},
+ {"18.4.7", "mysql_drop_db()"},
+ {"18.4.8", "mysql_dump_debug_info()"},
+ {"18.4.9", "mysql_eof()"},
+ {"18.4.10", "mysql_errno()"},
+ {"18.4.11", "mysql_error()"},
+ {"18.4.12", "mysql_escape_string()"},
+ {"18.4.13", "mysql_fetch_field()"},
+ {"18.4.14", "mysql_fetch_fields()"},
+ {"18.4.15", "mysql_fetch_field_direct()"},
+ {"18.4.16", "mysql_fetch_lengths()"},
+ {"18.4.17", "mysql_fetch_row()"},
+ {"18.4.18", "mysql_field_seek()"},
+ {"18.4.19", "mysql_field_tell()"},
+ {"18.4.20", "mysql_free_result()"},
+ {"18.4.21", "mysql_get_client_info()"},
+ {"18.4.22", "mysql_get_host_info()"},
+ {"18.4.23", "mysql_get_proto_info()"},
+ {"18.4.24", "mysql_get_server_info()"},
+ {"18.4.25", "mysql_info()"},
+ {"18.4.26", "mysql_init()"},
+ {"18.4.27", "mysql_insert_id()"},
+ {"18.4.28", "mysql_kill()"},
+ {"18.4.29", "mysql_list_dbs()"},
+ {"18.4.30", "mysql_list_fields()"},
+ {"18.4.31", "mysql_list_processes()"},
+ {"18.4.32", "mysql_list_tables()"},
+ {"18.4.33", "mysql_num_fields()"},
+ {"18.4.34", "mysql_num_rows()"},
+ {"18.4.35", "mysql_query()"},
+ {"18.4.36", "mysql_real_connect()"},
+ {"18.4.37", "mysql_real_query()"},
+ {"18.4.38", "mysql_reload()"},
+ {"18.4.39", "mysql_row_tell()"},
+ {"18.4.40", "mysql_select_db()"},
+ {"18.4.41", "mysql_shutdown()"},
+ {"18.4.42", "mysql_stat()"},
+ {"18.4.43", "mysql_store_result()"},
+ {"18.4.44", "mysql_thread_id()"},
+ {"18.4.45", "mysql_use_result()"},
+ {"18.4.46", "Why is it that after mysql_query() returns success, mysql_store_result() sometimes returns NULL?"},
+ {"18.4.47", "What results can I get from a query?"},
+ {"18.4.48", "How can I get the unique ID for the last inserted row?"},
+ {"18.4.49", "Problems linking with the C API"},
+ {"18.4.50", "How to make a thread-safe client"},
+ {"18.5", "MySQL Perl API's"},
+ {"18.5.1", "DBI with DBD::mysql"},
+ {"18.5.1.1", "The DBI interface"},
+ {"18.5.1.2", "More DBI/DBD information"},
+ {"18.6", "MySQL Java connectivity (JDBC)"},
+ {"18.7", "MySQL PHP API's"},
+ {"18.8", "MySQL C++ API's"},
+ {"18.9", "MySQL Python API's"},
+ {"18.10", "MySQL TCL API's"},
+ {"19", "How MySQL compares to other databases"},
+ {"19.1", "How MySQL compares to mSQL"},
+ {"19.1.1", "How to convert mSQL tools for MySQL"},
+ {"19.1.2", "How mSQL and MySQL client/server communications protocols differ"},
+ {"19.1.3", "How mSQL 2.0 SQL syntax differs from MySQL"},
+ {"19.2", "How MySQL compares to PostgreSQL"},
+ {"A", "Some users of MySQL"},
+ {"B", "Contributed programs"},
+ {"C", "Contributors to MySQL"},
+ {"D", "MySQL change history"},
+ {"19.3", "Changes in release 3.22.x (Alpha version)"},
+ {"19.3.1", "Changes in release 3.22.7"},
+ {"19.3.2", "Changes in release 3.22.6"},
+ {"19.3.3", "Changes in release 3.22.5"},
+ {"19.3.4", "Changes in release 3.22.4"},
+ {"19.3.5", "Changes in release 3.22.3"},
+ {"19.3.6", "Changes in release 3.22.2"},
+ {"19.3.7", "Changes in release 3.22.1"},
+ {"19.3.8", "Changes in release 3.22.0"},
+ {"19.4", "Changes in release 3.21.x"},
+ {"19.4.1", "Changes in release 3.21.33"},
+ {"19.4.2", "Changes in release 3.21.32"},
+ {"19.4.3", "Changes in release 3.21.31"},
+ {"19.4.4", "Changes in release 3.21.30"},
+ {"19.4.5", "Changes in release 3.21.29"},
+ {"19.4.6", "Changes in release 3.21.28"},
+ {"19.4.7", "Changes in release 3.21.27"},
+ {"19.4.8", "Changes in release 3.21.26"},
+ {"19.4.9", "Changes in release 3.21.25"},
+ {"19.4.10", "Changes in release 3.21.24"},
+ {"19.4.11", "Changes in release 3.21.23"},
+ {"19.4.12", "Changes in release 3.21.22"},
+ {"19.4.13", "Changes in release 3.21.21a"},
+ {"19.4.14", "Changes in release 3.21.21"},
+ {"19.4.15", "Changes in release 3.21.20"},
+ {"19.4.16", "Changes in release 3.21.19"},
+ {"19.4.17", "Changes in release 3.21.18"},
+ {"19.4.18", "Changes in release 3.21.17"},
+ {"19.4.19", "Changes in release 3.21.16"},
+ {"19.4.20", "Changes in release 3.21.15"},
+ {"19.4.21", "Changes in release 3.21.14b"},
+ {"19.4.22", "Changes in release 3.21.14a"},
+ {"19.4.23", "Changes in release 3.21.13"},
+ {"19.4.24", "Changes in release 3.21.12"},
+ {"19.4.25", "Changes in release 3.21.11"},
+ {"19.4.26", "Changes in release 3.21.10"},
+ {"19.4.27", "Changes in release 3.21.9"},
+ {"19.4.28", "Changes in release 3.21.8"},
+ {"19.4.29", "Changes in release 3.21.7"},
+ {"19.4.30", "Changes in release 3.21.6"},
+ {"19.4.31", "Changes in release 3.21.5"},
+ {"19.4.32", "Changes in release 3.21.4"},
+ {"19.4.33", "Changes in release 3.21.3"},
+ {"19.4.34", "Changes in release 3.21.2"},
+ {"19.4.35", "Changes in release 3.21.0"},
+ {"19.5", "Changes in release 3.20.x"},
+ {"19.5.1", "Changes in release 3.20.18"},
+ {"19.5.2", "Changes in release 3.20.17"},
+ {"19.5.3", "Changes in release 3.20.16"},
+ {"19.5.4", "Changes in release 3.20.15"},
+ {"19.5.5", "Changes in release 3.20.14"},
+ {"19.5.6", "Changes in release 3.20.13"},
+ {"19.5.7", "Changes in release 3.20.11"},
+ {"19.5.8", "Changes in release 3.20.10"},
+ {"19.5.9", "Changes in release 3.20.9"},
+ {"19.5.10", "Changes in release 3.20.8"},
+ {"19.5.11", "Changes in release 3.20.7"},
+ {"19.5.12", "Changes in release 3.20.6"},
+ {"19.5.13", "Changes in release 3.20.3"},
+ {"19.5.14", "Changes in release 3.20.0"},
+ {"19.6", "Changes in release 3.19.x"},
+ {"19.6.1", "Changes in release 3.19.5"},
+ {"19.6.2", "Changes in release 3.19.4"},
+ {"19.6.3", "Changes in release 3.19.3"},
+ {"E", "Known errors and design deficiencies in MySQL"},
+ {"F", "List of things we want to add to MySQL in the future (The TODO)"},
+ {"19.7", "Things that must done in the real near future"},
+ {"19.8", "Things that have to be done sometime"},
+ {"19.9", "Some things we don't have any plans to do"},
+ {"G", "Comments on porting to other systems"},
+ {"19.10", "Debugging MySQL"},
+ {"19.11", "Comments about RTS threads"},
+ {"19.12", "What is the difference between different thread packages?"},
+ {"H", "Description of MySQL regular expression syntax"},
+ {"I", "What is Unireg?"},
+ {"J", "The MySQL server license"},
+ {"K", "The MySQL license for Microsoft operating systems"},
+ {"*", "SQL command, type and function index"},
+ {"*", "Concept Index"}
+};
+
+#define NQUERIES 5
+const char *query[NQUERIES]={
+ "mysql information and manual",
+ "upgrading from previous version",
+ "column indexes",
+ "against about after more right the with/without", /* stopwords test */
+ "mysql license and copyright"
+};
diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c
new file mode 100644
index 00000000000..c6bed0e8e51
--- /dev/null
+++ b/storage/maria/ma_ft_update.c
@@ -0,0 +1,357 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* functions to work with full-text indices */
+
+#include "ma_ftdefs.h"
+#include <math.h>
+
+void _ma_ft_segiterator_init(MARIA_HA *info, uint keynr, const uchar *record,
+ FT_SEG_ITERATOR *ftsi)
+{
+ DBUG_ENTER("_ma_ft_segiterator_init");
+
+ ftsi->num=info->s->keyinfo[keynr].keysegs;
+ ftsi->seg=info->s->keyinfo[keynr].seg;
+ ftsi->rec=record;
+ DBUG_VOID_RETURN;
+}
+
+void _ma_ft_segiterator_dummy_init(const uchar *record, uint len,
+ FT_SEG_ITERATOR *ftsi)
+{
+ DBUG_ENTER("_ma_ft_segiterator_dummy_init");
+
+ ftsi->num=1;
+ ftsi->seg=0;
+ ftsi->pos=record;
+ ftsi->len=len;
+ DBUG_VOID_RETURN;
+}
+
+/*
+ This function breaks convention "return 0 in success"
+ but it's easier to use like this
+
+ while(_ma_ft_segiterator())
+
+ so "1" means "OK", "0" means "EOF"
+*/
+
+uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
+{
+ DBUG_ENTER("_ma_ft_segiterator");
+
+ if (!ftsi->num)
+ DBUG_RETURN(0);
+
+ ftsi->num--;
+ if (!ftsi->seg)
+ DBUG_RETURN(1);
+
+ ftsi->seg--;
+
+ if (ftsi->seg->null_bit &&
+ (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit))
+ {
+ ftsi->pos=0;
+ DBUG_RETURN(1);
+ }
+ ftsi->pos= ftsi->rec+ftsi->seg->start;
+ if (ftsi->seg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= (ftsi->seg->bit_start);
+ ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos :
+ uint2korr(ftsi->pos));
+ ftsi->pos+= pack_length; /* Skip VARCHAR length */
+ DBUG_RETURN(1);
+ }
+ if (ftsi->seg->flag & HA_BLOB_PART)
+ {
+ ftsi->len= _ma_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
+ memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start,
+ sizeof(char*));
+ DBUG_RETURN(1);
+ }
+ ftsi->len=ftsi->seg->length;
+ DBUG_RETURN(1);
+}
+
+
+/* parses a document i.e. calls maria_ft_parse for every keyseg */
+
+uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, const uchar *record,
+ MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root)
+{
+ FT_SEG_ITERATOR ftsi;
+ struct st_mysql_ftparser *parser;
+ DBUG_ENTER("_ma_ft_parse");
+
+ _ma_ft_segiterator_init(info, keynr, record, &ftsi);
+
+ maria_ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset);
+ parser= info->s->keyinfo[keynr].parser;
+ while (_ma_ft_segiterator(&ftsi))
+ {
+ if (ftsi.pos)
+ if (maria_ft_parse(parsed, (uchar *)ftsi.pos, ftsi.len, parser, param,
+ mem_root))
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const uchar *record,
+ MEM_ROOT *mem_root)
+{
+ TREE ptree;
+ MYSQL_FTPARSER_PARAM *param;
+ DBUG_ENTER("_ma_ft_parserecord");
+ if (! (param= maria_ftparser_call_initializer(info, keynr, 0)))
+ DBUG_RETURN(NULL);
+ bzero((char*) &ptree, sizeof(ptree));
+ param->flags= 0;
+ if (_ma_ft_parse(&ptree, info, keynr, record, param, mem_root))
+ DBUG_RETURN(NULL);
+
+ DBUG_RETURN(maria_ft_linearize(&ptree, mem_root));
+}
+
+static int _ma_ft_store(MARIA_HA *info, uint keynr, uchar *keybuf,
+ FT_WORD *wlist, my_off_t filepos)
+{
+ uint key_length;
+ DBUG_ENTER("_ma_ft_store");
+
+ for (; wlist->pos; wlist++)
+ {
+ key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos);
+ if (_ma_ck_write(info, keynr, keybuf, key_length))
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+static int _ma_ft_erase(MARIA_HA *info, uint keynr, uchar *keybuf,
+ FT_WORD *wlist, my_off_t filepos)
+{
+ uint key_length, err=0;
+ DBUG_ENTER("_ma_ft_erase");
+
+ for (; wlist->pos; wlist++)
+ {
+ key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos);
+ if (_ma_ck_delete(info, keynr, keybuf, key_length))
+ err=1;
+ }
+ DBUG_RETURN(err);
+}
+
+/*
+ Compares an appropriate parts of two WORD_KEY keys directly out of records
+ returns 1 if they are different
+*/
+
+#define THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT 1
+#define GEE_THEY_ARE_ABSOLUTELY_IDENTICAL 0
+
+int _ma_ft_cmp(MARIA_HA *info, uint keynr, const uchar *rec1, const uchar *rec2)
+{
+ FT_SEG_ITERATOR ftsi1, ftsi2;
+ CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
+ DBUG_ENTER("_ma_ft_cmp");
+
+ _ma_ft_segiterator_init(info, keynr, rec1, &ftsi1);
+ _ma_ft_segiterator_init(info, keynr, rec2, &ftsi2);
+
+ while (_ma_ft_segiterator(&ftsi1) && _ma_ft_segiterator(&ftsi2))
+ {
+ if ((ftsi1.pos != ftsi2.pos) &&
+ (!ftsi1.pos || !ftsi2.pos ||
+ ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
+ (uchar*) ftsi2.pos,ftsi2.len,0,0)))
+ DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
+ }
+ DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
+}
+
+
+/* update a document entry */
+
+int _ma_ft_update(MARIA_HA *info, uint keynr, uchar *keybuf,
+ const uchar *oldrec, const uchar *newrec, my_off_t pos)
+{
+ int error= -1;
+ FT_WORD *oldlist,*newlist, *old_word, *new_word;
+ CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
+ uint key_length;
+ int cmp, cmp2;
+ DBUG_ENTER("_ma_ft_update");
+
+ if (!(old_word=oldlist=_ma_ft_parserecord(info, keynr, oldrec,
+ &info->ft_memroot)) ||
+ !(new_word=newlist=_ma_ft_parserecord(info, keynr, newrec,
+ &info->ft_memroot)))
+ goto err;
+
+ error=0;
+ while(old_word->pos && new_word->pos)
+ {
+ cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
+ (uchar*) new_word->pos,new_word->len,0,0);
+ cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
+
+ if (cmp < 0 || cmp2)
+ {
+ key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos);
+ if ((error= _ma_ck_delete(info,keynr, keybuf,key_length)))
+ goto err;
+ }
+ if (cmp > 0 || cmp2)
+ {
+ key_length= _ma_ft_make_key(info, keynr, keybuf, new_word,pos);
+ if ((error= _ma_ck_write(info, keynr, keybuf,key_length)))
+ goto err;
+ }
+ if (cmp<=0) old_word++;
+ if (cmp>=0) new_word++;
+ }
+ if (old_word->pos)
+ error= _ma_ft_erase(info,keynr,keybuf,old_word,pos);
+ else if (new_word->pos)
+ error= _ma_ft_store(info,keynr,keybuf,new_word,pos);
+
+err:
+ free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
+ DBUG_RETURN(error);
+}
+
+
+/* adds a document to the collection */
+
+int _ma_ft_add(MARIA_HA *info, uint keynr, uchar *keybuf, const uchar *record,
+ my_off_t pos)
+{
+ int error= -1;
+ FT_WORD *wlist;
+ DBUG_ENTER("_ma_ft_add");
+ DBUG_PRINT("enter",("keynr: %d",keynr));
+
+ if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot)))
+ error= _ma_ft_store(info,keynr,keybuf,wlist,pos);
+ free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
+ DBUG_PRINT("exit",("Return: %d",error));
+ DBUG_RETURN(error);
+}
+
+
+/* removes a document from the collection */
+
+int _ma_ft_del(MARIA_HA *info, uint keynr, uchar *keybuf, const uchar *record,
+ my_off_t pos)
+{
+ int error= -1;
+ FT_WORD *wlist;
+ DBUG_ENTER("_ma_ft_del");
+ DBUG_PRINT("enter",("keynr: %d",keynr));
+
+ if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot)))
+ error= _ma_ft_erase(info,keynr,keybuf,wlist,pos);
+ free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
+ DBUG_PRINT("exit",("Return: %d",error));
+ DBUG_RETURN(error);
+}
+
+
+uint _ma_ft_make_key(MARIA_HA *info, uint keynr, uchar *keybuf, FT_WORD *wptr,
+ my_off_t filepos)
+{
+ uchar buf[HA_FT_MAXBYTELEN+16];
+ DBUG_ENTER("_ma_ft_make_key");
+
+#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
+ {
+ float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight);
+ mi_float4store(buf,weight);
+ }
+#else
+#error
+#endif
+
+ int2store(buf+HA_FT_WLEN,wptr->len);
+ memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len);
+ DBUG_RETURN(_ma_make_key(info, keynr, keybuf, buf, filepos));
+}
+
+
+/*
+ convert key value to ft2
+*/
+
+uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, uchar *key)
+{
+ MARIA_SHARE *share= info->s;
+ my_off_t root;
+ DYNAMIC_ARRAY *da=info->ft1_to_ft2;
+ MARIA_KEYDEF *keyinfo=&share->ft2_keyinfo;
+ uchar *key_ptr= (uchar*) dynamic_array_ptr(da, 0), *end;
+ uint length, key_length;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ DBUG_ENTER("_ma_ft_convert_to_ft2");
+
+ /* we'll generate one pageful at once, and insert the rest one-by-one */
+ /* calculating the length of this page ...*/
+ length=(keyinfo->block_length-2) / keyinfo->keylength;
+ set_if_smaller(length, da->elements);
+ length=length * keyinfo->keylength;
+
+ get_key_full_length_rdonly(key_length, key);
+ while (_ma_ck_delete(info, keynr, key, key_length) == 0)
+ {
+ /*
+ nothing to do here.
+ _ma_ck_delete() will populate info->ft1_to_ft2 with deleted keys
+ */
+ }
+
+ /* creating pageful of keys */
+ bzero(info->buff, share->keypage_header);
+ _ma_store_keynr(share, info->buff, keynr);
+ _ma_store_page_used(share, info->buff, length + share->keypage_header);
+ memcpy(info->buff + share->keypage_header, key_ptr, length);
+ info->keyread_buff_used= info->page_changed=1; /* info->buff is used */
+ if ((root= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR ||
+ _ma_write_keypage(info, keyinfo, root, page_link->write_lock,
+ DFLT_INIT_HITS, info->buff))
+ DBUG_RETURN(-1);
+
+ /* inserting the rest of key values */
+ end= (uchar*) dynamic_array_ptr(da, da->elements);
+ for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength)
+ if(_ma_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME))
+ DBUG_RETURN(-1);
+
+ /* now, writing the word key entry */
+ ft_intXstore(key+key_length, - (int) da->elements);
+ _ma_dpointer(info, key+key_length+HA_FT_WLEN, root);
+
+ DBUG_RETURN(_ma_ck_real_write_btree(info,
+ share->keyinfo+keynr,
+ key, 0,
+ &share->state.key_root[keynr],
+ SEARCH_SAME));
+}
diff --git a/storage/maria/ma_ftdefs.h b/storage/maria/ma_ftdefs.h
new file mode 100644
index 00000000000..5a7357e451c
--- /dev/null
+++ b/storage/maria/ma_ftdefs.h
@@ -0,0 +1,152 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* some definitions for full-text indices */
+
+#include "ma_fulltext.h"
+#include <m_ctype.h>
+#include <my_tree.h>
+#include <queues.h>
+#include <mysql/plugin.h>
+
+#define true_word_char(ctype, character) \
+ ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \
+ (character) == '_')
+#define misc_word_char(X) 0
+
+#define FT_MAX_WORD_LEN_FOR_SORT 31
+
+#define FTPARSER_MEMROOT_ALLOC_SIZE 65536
+
+#define COMPILE_STOPWORDS_IN
+
+/* Interested readers may consult SMART
+ (ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z)
+ for an excellent implementation of vector space model we use.
+ It also demonstrate the usage of different weghting techniques.
+ This code, though, is completely original and is not based on the
+ SMART code but was in some cases inspired by it.
+
+ NORM_PIVOT was taken from the article
+ A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization",
+ ACM SIGIR'96, 21-29, 1996
+ */
+
+#define LWS_FOR_QUERY LWS_TF
+#define LWS_IN_USE LWS_LOG
+#define PRENORM_IN_USE PRENORM_AVG
+#define NORM_IN_USE NORM_PIVOT
+#define GWS_IN_USE GWS_PROB
+/*==============================================================*/
+#define LWS_TF (count)
+#define LWS_BINARY (count>0)
+#define LWS_SQUARE (count*count)
+#define LWS_LOG (count?(log( (double) count)+1):0)
+/*--------------------------------------------------------------*/
+#define PRENORM_NONE (p->weight)
+#define PRENORM_MAX (p->weight/docstat.max)
+#define PRENORM_AUG (0.4+0.6*p->weight/docstat.max)
+#define PRENORM_AVG (p->weight/docstat.sum*docstat.uniq)
+#define PRENORM_AVGLOG ((1+log(p->weight))/(1+log(docstat.sum/docstat.uniq)))
+/*--------------------------------------------------------------*/
+#define NORM_NONE (1)
+#define NORM_SUM (docstat.nsum)
+#define NORM_COS (sqrt(docstat.nsum2))
+
+#define PIVOT_VAL (0.0115)
+#define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq)
+/*---------------------------------------------------------------*/
+#define GWS_NORM (1/sqrt(sum2))
+#define GWS_GFIDF (sum/doc_cnt)
+/* Mysterious, but w/o (double) GWS_IDF performs better :-o */
+#define GWS_IDF log(aio->info->state->records/doc_cnt)
+#define GWS_IDF1 log((double)aio->info->state->records/doc_cnt)
+#define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 )
+#define GWS_FREQ (1.0/doc_cnt)
+#define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2)
+#define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3)
+#define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records))
+/*=================================================================*/
+
+/* Boolean search operators */
+#define FTB_YES (ft_boolean_syntax[0])
+#define FTB_EGAL (ft_boolean_syntax[1])
+#define FTB_NO (ft_boolean_syntax[2])
+#define FTB_INC (ft_boolean_syntax[3])
+#define FTB_DEC (ft_boolean_syntax[4])
+#define FTB_LBR (ft_boolean_syntax[5])
+#define FTB_RBR (ft_boolean_syntax[6])
+#define FTB_NEG (ft_boolean_syntax[7])
+#define FTB_TRUNC (ft_boolean_syntax[8])
+#define FTB_LQUOT (ft_boolean_syntax[10])
+#define FTB_RQUOT (ft_boolean_syntax[11])
+
+typedef struct st_maria_ft_word {
+ uchar * pos;
+ uint len;
+ double weight;
+} FT_WORD;
+
+int is_stopword(char *word, uint len);
+
+uint _ma_ft_make_key(MARIA_HA *, uint , uchar *, FT_WORD *, my_off_t);
+
+uchar maria_ft_get_word(CHARSET_INFO *, uchar **, uchar *, FT_WORD *,
+ MYSQL_FTPARSER_BOOLEAN_INFO *);
+uchar maria_ft_simple_get_word(CHARSET_INFO *, uchar **, const uchar *,
+ FT_WORD *, my_bool);
+
+typedef struct _st_maria_ft_seg_iterator {
+ uint num, len;
+ HA_KEYSEG *seg;
+ const uchar *rec, *pos;
+} FT_SEG_ITERATOR;
+
+void _ma_ft_segiterator_init(MARIA_HA *, uint, const uchar *, FT_SEG_ITERATOR *);
+void _ma_ft_segiterator_dummy_init(const uchar *, uint, FT_SEG_ITERATOR *);
+uint _ma_ft_segiterator(FT_SEG_ITERATOR *);
+
+void maria_ft_parse_init(TREE *, CHARSET_INFO *);
+int maria_ft_parse(TREE *, uchar *, int, struct st_mysql_ftparser *parser,
+ MYSQL_FTPARSER_PARAM *, MEM_ROOT *);
+FT_WORD * maria_ft_linearize(TREE *, MEM_ROOT *);
+FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const uchar *, MEM_ROOT *);
+uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const uchar *,
+ MYSQL_FTPARSER_PARAM *, MEM_ROOT *);
+
+FT_INFO *maria_ft_init_nlq_search(MARIA_HA *, uint, uchar *, uint, uint, uchar *);
+FT_INFO *maria_ft_init_boolean_search(MARIA_HA *, uint, uchar *, uint, CHARSET_INFO *);
+
+extern const struct _ft_vft _ma_ft_vft_nlq;
+int maria_ft_nlq_read_next(FT_INFO *, char *);
+float maria_ft_nlq_find_relevance(FT_INFO *, uchar *, uint);
+void maria_ft_nlq_close_search(FT_INFO *);
+float maria_ft_nlq_get_relevance(FT_INFO *);
+my_off_t maria_ft_nlq_get_docid(FT_INFO *);
+void maria_ft_nlq_reinit_search(FT_INFO *);
+
+extern const struct _ft_vft _ma_ft_vft_boolean;
+int maria_ft_boolean_read_next(FT_INFO *, char *);
+float maria_ft_boolean_find_relevance(FT_INFO *, uchar *, uint);
+void maria_ft_boolean_close_search(FT_INFO *);
+float maria_ft_boolean_get_relevance(FT_INFO *);
+my_off_t maria_ft_boolean_get_docid(FT_INFO *);
+void maria_ft_boolean_reinit_search(FT_INFO *);
+extern MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info,
+ uint keynr,
+ uint paramnr);
+extern void maria_ftparser_call_deinitializer(MARIA_HA *info);
diff --git a/storage/maria/ma_fulltext.h b/storage/maria/ma_fulltext.h
new file mode 100644
index 00000000000..dc6cf9d1204
--- /dev/null
+++ b/storage/maria/ma_fulltext.h
@@ -0,0 +1,27 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* some definitions for full-text indices */
+
+#include "maria_def.h"
+#include "ft_global.h"
+
+int _ma_ft_cmp(MARIA_HA *, uint, const uchar *, const uchar *);
+int _ma_ft_add(MARIA_HA *, uint, uchar *, const uchar *, my_off_t);
+int _ma_ft_del(MARIA_HA *, uint, uchar *, const uchar *, my_off_t);
+
+uint _ma_ft_convert_to_ft2(MARIA_HA *, uint, uchar *);
diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c
new file mode 100644
index 00000000000..0e1135087c3
--- /dev/null
+++ b/storage/maria/ma_info.c
@@ -0,0 +1,141 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Return useful base information for an open table */
+
+#include "maria_def.h"
+#ifdef __WIN__
+#include <sys/stat.h>
+#endif
+
+ /* Get position to last record */
+
+MARIA_RECORD_POS maria_position(MARIA_HA *info)
+{
+ return info->cur_row.lastpos;
+}
+
+
+/* Get information about the table */
+/* if flag == 2 one get current info (no sync from database */
+
+int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag)
+{
+ MY_STAT state;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_status");
+
+ x->recpos= info->cur_row.lastpos;
+ if (flag == HA_STATUS_POS)
+ DBUG_RETURN(0); /* Compatible with ISAM */
+ if (!(flag & HA_STATUS_NO_LOCK))
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ VOID(_ma_readinfo(info,F_RDLCK,0));
+ fast_ma_writeinfo(info);
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+ if (flag & HA_STATUS_VARIABLE)
+ {
+ x->records = info->state->records;
+ x->deleted = info->state->del;
+ x->delete_length = info->state->empty;
+ x->data_file_length =info->state->data_file_length;
+ x->index_file_length=info->state->key_file_length;
+
+ x->keys = share->state.header.keys;
+ x->check_time = share->state.check_time;
+ x->mean_reclength = x->records ?
+ (ulong) ((x->data_file_length - x->delete_length) /x ->records) :
+ (ulong) share->min_pack_length;
+ }
+ if (flag & HA_STATUS_ERRKEY)
+ {
+ x->errkey= info->errkey;
+ x->dup_key_pos= info->dup_key_pos;
+ }
+ if (flag & HA_STATUS_CONST)
+ {
+ x->reclength = share->base.reclength;
+ x->max_data_file_length=share->base.max_data_file_length;
+ x->max_index_file_length=info->s->base.max_key_file_length;
+ x->filenr = info->dfile.file;
+ x->options = share->options;
+ x->create_time=share->state.create_time;
+ x->reflength= maria_get_pointer_length(share->base.max_data_file_length,
+ maria_data_pointer_size);
+ x->record_offset= (info->s->data_file_type == STATIC_RECORD ?
+ share->base.pack_reclength: 0);
+ x->sortkey= -1; /* No clustering */
+ x->rec_per_key = share->state.rec_per_key_part;
+ x->key_map = share->state.key_map;
+ x->data_file_name = share->data_file_name;
+ x->index_file_name = share->index_file_name;
+ x->data_file_type = share->data_file_type;
+ }
+ if ((flag & HA_STATUS_TIME) && !my_fstat(info->dfile.file, &state, MYF(0)))
+ x->update_time=state.st_mtime;
+ else
+ x->update_time=0;
+ if (flag & HA_STATUS_AUTO)
+ {
+ x->auto_increment= share->state.auto_increment+1;
+ if (!x->auto_increment) /* This shouldn't happen */
+ x->auto_increment= ~(ulonglong) 0;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write a message to the error log.
+
+ SYNOPSIS
+ _ma_report_error()
+ file_name Name of table file (e.g. index_file_name).
+ errcode Error number.
+
+ DESCRIPTION
+ This function supplies my_error() with a table name. Most error
+ messages need one. Since string arguments in error messages are limited
+ to 64 characters by convention, we ensure that in case of truncation,
+ that the end of the index file path is in the message. This contains
+ the most valuable information (the table name and the database name).
+
+ RETURN
+ void
+*/
+
+void _ma_report_error(int errcode, const char *file_name)
+{
+ uint length;
+ DBUG_ENTER("_ma_report_error");
+ DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name));
+
+ if ((length= strlen(file_name)) > 64)
+ {
+ /* we first remove the directory */
+ uint dir_length= dirname_length(file_name);
+ file_name+= dir_length;
+ if ((length-= dir_length) > 64)
+ {
+ /* still too long, chop start of table name */
+ file_name+= length - 64;
+ }
+ }
+
+ my_error(errcode, MYF(ME_NOREFRESH), file_name);
+ DBUG_VOID_RETURN;
+}
diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c
new file mode 100644
index 00000000000..7cc648ae259
--- /dev/null
+++ b/storage/maria/ma_init.c
@@ -0,0 +1,69 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Initialize an maria-database */
+
+#include "maria_def.h"
+#include <ft_global.h>
+#include "ma_blockrec.h"
+#include "trnman_public.h"
+#include "ma_checkpoint.h"
+
+my_bool maria_inited= FALSE;
+pthread_mutex_t THR_LOCK_maria;
+
+/*
+ Initialize maria
+
+ SYNOPSIS
+ maria_init()
+
+ TODO
+ Open log files and do recovery if need
+
+ RETURN
+ 0 ok
+ # error number
+*/
+
+int maria_init(void)
+{
+ if (!maria_inited)
+ {
+ maria_inited= TRUE;
+ pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW);
+ _ma_init_block_record_data();
+ my_handler_error_register();
+ }
+ return 0;
+}
+
+
+void maria_end(void)
+{
+ if (maria_inited)
+ {
+ maria_inited= maria_multi_threaded= FALSE;
+ ft_free_stopwords();
+ ma_checkpoint_end();
+ trnman_destroy();
+ if (translog_status == TRANSLOG_OK)
+ translog_destroy();
+ end_pagecache(maria_log_pagecache, TRUE);
+ end_pagecache(maria_pagecache, TRUE);
+ ma_control_file_end();
+ pthread_mutex_destroy(&THR_LOCK_maria);
+ }
+}
diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c
new file mode 100644
index 00000000000..b13c24cce83
--- /dev/null
+++ b/storage/maria/ma_key.c
@@ -0,0 +1,572 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Functions to handle keys */
+
+#include "maria_def.h"
+#include "m_ctype.h"
+#include "ma_sp_defs.h"
+#ifdef HAVE_IEEEFP_H
+#include <ieeefp.h>
+#endif
+
+#define CHECK_KEYS /* Enable safety checks */
+
+#define FIX_LENGTH(cs, pos, length, char_length) \
+ do { \
+ if (length > char_length) \
+ char_length= my_charpos(cs, pos, pos+length, char_length); \
+ set_if_smaller(char_length,length); \
+ } while(0)
+
+static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,uchar *record);
+
+/*
+ Make a intern key from a record
+
+ SYNOPSIS
+ _ma_make_key()
+ info MyiSAM handler
+ keynr key number
+ key Store created key here
+ record Record
+ filepos Position to record in the data file
+
+ RETURN
+ Length of key
+*/
+
+uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key,
+ const uchar *record, MARIA_RECORD_POS filepos)
+{
+ const uchar *pos;
+ uchar *start;
+ reg1 HA_KEYSEG *keyseg;
+ my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT;
+ DBUG_ENTER("_ma_make_key");
+
+ if (info->s->keyinfo[keynr].flag & HA_SPATIAL)
+ {
+ /*
+ TODO: nulls processing
+ */
+#ifdef HAVE_SPATIAL
+ DBUG_RETURN(_ma_sp_make_key(info,keynr, key,record,filepos));
+#else
+ DBUG_ASSERT(0); /* maria_open should check that this never happens*/
+#endif
+ }
+
+ start=key;
+ for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++)
+ {
+ enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
+ uint length=keyseg->length;
+ uint char_length;
+ CHARSET_INFO *cs=keyseg->charset;
+
+ if (keyseg->null_bit)
+ {
+ if (record[keyseg->null_pos] & keyseg->null_bit)
+ {
+ *key++= 0; /* NULL in key */
+ continue;
+ }
+ *key++=1; /* Not NULL */
+ }
+
+ char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen :
+ length);
+
+ pos= record+keyseg->start;
+ if (type == HA_KEYTYPE_BIT)
+ {
+ if (keyseg->bit_length)
+ {
+ uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos,
+ keyseg->bit_start, keyseg->bit_length);
+ *key++= (char) bits;
+ length--;
+ }
+ memcpy(key, pos, length);
+ key+= length;
+ continue;
+ }
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ if (type != HA_KEYTYPE_NUM)
+ {
+ length= cs->cset->lengthsp(cs, (char*) pos, length);
+ }
+ else
+ {
+ const uchar *end= pos + length;
+ while (pos < end && pos[0] == ' ')
+ pos++;
+ length= (uint) (end-pos);
+ }
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key, pos, (size_t) char_length);
+ key+=char_length;
+ continue;
+ }
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= (keyseg->bit_start == 1 ? 1 : 2);
+ uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos :
+ uint2korr(pos));
+ pos+= pack_length; /* Skip VARCHAR length */
+ set_if_smaller(length,tmp_length);
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key,pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos);
+ memcpy_fixed(&pos,pos+keyseg->bit_start,sizeof(char*));
+ set_if_smaller(length,tmp_length);
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key,pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & HA_SWAP_KEY)
+ { /* Numerical column */
+#ifdef HAVE_ISNAN
+ if (type == HA_KEYTYPE_FLOAT)
+ {
+ float nr;
+ float4get(nr,pos);
+ if (isnan(nr))
+ {
+ /* Replace NAN with zero */
+ bzero(key,length);
+ key+=length;
+ continue;
+ }
+ }
+ else if (type == HA_KEYTYPE_DOUBLE)
+ {
+ double nr;
+ float8get(nr,pos);
+ if (isnan(nr))
+ {
+ bzero(key,length);
+ key+=length;
+ continue;
+ }
+ }
+#endif
+ pos+=length;
+ while (length--)
+ {
+ *key++ = *--pos;
+ }
+ continue;
+ }
+ FIX_LENGTH(cs, pos, length, char_length);
+ memcpy(key, pos, char_length);
+ if (length > char_length)
+ cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' ');
+ key+= length;
+ }
+ _ma_dpointer(info,key,filepos);
+ DBUG_PRINT("exit",("keynr: %d",keynr));
+ DBUG_DUMP("key",start,(uint) (key-start)+keyseg->length);
+ DBUG_EXECUTE("key",
+ _ma_print_key(DBUG_FILE,info->s->keyinfo[keynr].seg,start,
+ (uint) (key-start)););
+ DBUG_RETURN((uint) (key-start)); /* Return keylength */
+} /* _ma_make_key */
+
+
+/*
+ Pack a key to intern format from given format (c_rkey)
+
+ SYNOPSIS
+ _ma_pack_key()
+ info MARIA handler
+ uint keynr key number
+ key Store packed key here
+ old Not packed key
+ keypart_map bitmap of used keyparts
+ last_used_keyseg out parameter. May be NULL
+
+ RETURN
+ length of packed key
+
+ last_use_keyseg Store pointer to the keyseg after the last used one
+*/
+
+uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key,
+ const uchar *old, key_part_map keypart_map,
+ HA_KEYSEG **last_used_keyseg)
+{
+ uchar *start_key=key;
+ HA_KEYSEG *keyseg;
+ my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT;
+ DBUG_ENTER("_ma_pack_key");
+
+ /* "one part" rtree key is 2*SPDIMS part key in Maria */
+ if (info->s->keyinfo[keynr].key_alg == HA_KEY_ALG_RTREE)
+ keypart_map= (((key_part_map)1) << (2*SPDIMS)) - 1;
+
+ /* only key prefixes are supported */
+ DBUG_ASSERT(((keypart_map+1) & keypart_map) == 0);
+
+ for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type && keypart_map;
+ old+= keyseg->length, keyseg++)
+ {
+ enum ha_base_keytype type= (enum ha_base_keytype) keyseg->type;
+ uint length= keyseg->length;
+ uint char_length;
+ const uchar *pos;
+ CHARSET_INFO *cs=keyseg->charset;
+
+ keypart_map>>= 1;
+ if (keyseg->null_bit)
+ {
+ if (!(*key++= (char) 1-*old++)) /* Copy null marker */
+ {
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
+ old+= 2;
+ continue; /* Found NULL */
+ }
+ }
+ char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen :
+ length);
+ pos= old;
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ const uchar *end= pos + length;
+ if (type == HA_KEYTYPE_NUM)
+ {
+ while (pos < end && pos[0] == ' ')
+ pos++;
+ }
+ else if (type != HA_KEYTYPE_BINARY)
+ {
+ while (end > pos && end[-1] == ' ')
+ end--;
+ }
+ length=(uint) (end-pos);
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key,pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
+ {
+ /* Length of key-part used with maria_rkey() always 2 */
+ uint tmp_length=uint2korr(pos);
+ pos+=2;
+ set_if_smaller(length,tmp_length); /* Safety */
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ old+=2; /* Skip length */
+ memcpy(key, pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & HA_SWAP_KEY)
+ { /* Numerical column */
+ pos+=length;
+ while (length--)
+ *key++ = *--pos;
+ continue;
+ }
+ FIX_LENGTH(cs, pos, length, char_length);
+ memcpy(key, pos, char_length);
+ if (length > char_length)
+ cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' ');
+ key+= length;
+ }
+ if (last_used_keyseg)
+ *last_used_keyseg= keyseg;
+
+ DBUG_PRINT("exit", ("length: %u", (uint) (key-start_key)));
+ DBUG_RETURN((uint) (key-start_key));
+} /* _ma_pack_key */
+
+
+
+/*
+ Store found key in record
+
+ SYNOPSIS
+ _ma_put_key_in_record()
+ info MARIA handler
+ keynr Key number that was used
+ record Store key here
+
+ Last read key is in info->lastkey
+
+ NOTES
+ Used when only-keyread is wanted
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr,
+ uchar *record)
+{
+ reg2 uchar *key;
+ uchar *pos,*key_end;
+ reg1 HA_KEYSEG *keyseg;
+ uchar *blob_ptr;
+ DBUG_ENTER("_ma_put_key_in_record");
+
+ blob_ptr= info->lastkey2; /* Place to put blob parts */
+ key=info->lastkey; /* KEy that was read */
+ key_end=key+info->lastkey_length;
+ for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++)
+ {
+ if (keyseg->null_bit)
+ {
+ if (!*key++)
+ {
+ record[keyseg->null_pos]|= keyseg->null_bit;
+ continue;
+ }
+ record[keyseg->null_pos]&= ~keyseg->null_bit;
+ }
+ if (keyseg->type == HA_KEYTYPE_BIT)
+ {
+ uint length= keyseg->length;
+
+ if (keyseg->bit_length)
+ {
+ uchar bits= *key++;
+ set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start,
+ keyseg->bit_length);
+ length--;
+ }
+ else
+ {
+ clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start,
+ keyseg->bit_length);
+ }
+ memcpy(record + keyseg->start, key, length);
+ key+= length;
+ continue;
+ }
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ uint length;
+ get_key_length(length,key);
+#ifdef CHECK_KEYS
+ if (length > keyseg->length || key+length > key_end)
+ goto err;
+#endif
+ pos= record+keyseg->start;
+ if (keyseg->type != (int) HA_KEYTYPE_NUM)
+ {
+ memcpy(pos,key,(size_t) length);
+ keyseg->charset->cset->fill(keyseg->charset,
+ (char*) pos + length,
+ keyseg->length - length,
+ ' ');
+ }
+ else
+ {
+ bfill(pos,keyseg->length-length,' ');
+ memcpy(pos+keyseg->length-length,key,(size_t) length);
+ }
+ key+=length;
+ continue;
+ }
+
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint length;
+ get_key_length(length,key);
+#ifdef CHECK_KEYS
+ if (length > keyseg->length || key+length > key_end)
+ goto err;
+#endif
+ /* Store key length */
+ if (keyseg->bit_start == 1)
+ *(uchar*) (record+keyseg->start)= (uchar) length;
+ else
+ int2store(record+keyseg->start, length);
+ /* And key data */
+ memcpy(record+keyseg->start + keyseg->bit_start, key, length);
+ key+= length;
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ uint length;
+ get_key_length(length,key);
+#ifdef CHECK_KEYS
+ if (length > keyseg->length || key+length > key_end)
+ goto err;
+#endif
+ memcpy(record+keyseg->start+keyseg->bit_start,
+ (char*) &blob_ptr,sizeof(char*));
+ memcpy(blob_ptr,key,length);
+ blob_ptr+=length;
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ _ma_store_blob_length(record+keyseg->start,
+ (uint) keyseg->bit_start,length);
+ key+=length;
+ }
+ else if (keyseg->flag & HA_SWAP_KEY)
+ {
+ uchar *to= record+keyseg->start+keyseg->length;
+ uchar *end= key+keyseg->length;
+#ifdef CHECK_KEYS
+ if (end > key_end)
+ goto err;
+#endif
+ do
+ {
+ *--to= *key++;
+ } while (key != end);
+ continue;
+ }
+ else
+ {
+#ifdef CHECK_KEYS
+ if (key+keyseg->length > key_end)
+ goto err;
+#endif
+ memcpy(record+keyseg->start, key, (size_t) keyseg->length);
+ key+= keyseg->length;
+ }
+ }
+ DBUG_RETURN(0);
+
+err:
+ DBUG_RETURN(1); /* Crashed row */
+} /* _ma_put_key_in_record */
+
+
+ /* Here when key reads are used */
+
+int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
+{
+ fast_ma_writeinfo(info);
+ if (filepos != HA_OFFSET_ERROR)
+ {
+ if (info->lastinx >= 0)
+ { /* Read only key */
+ if (_ma_put_key_in_record(info,(uint) info->lastinx,buf))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return -1;
+ }
+ info->update|= HA_STATE_AKTIV; /* We should find a record */
+ return 0;
+ }
+ my_errno=HA_ERR_WRONG_INDEX;
+ }
+ return(-1); /* Wrong data to read */
+}
+
+
+/*
+ Retrieve auto_increment info
+
+ SYNOPSIS
+ retrieve_auto_increment()
+ key Auto-increment key
+ key_type Key's type
+
+ NOTE
+ 'key' should in "record" format, that is, how it is packed in a record
+ (this matters with HA_SWAP_KEY).
+
+ IMPLEMENTATION
+ For signed columns we don't retrieve the auto increment value if it's
+ less than zero.
+*/
+
+ulonglong ma_retrieve_auto_increment(const uchar *key, uint8 key_type)
+{
+ ulonglong value= 0; /* Store unsigned values here */
+ longlong s_value= 0; /* Store signed values here */
+
+ switch (key_type) {
+ case HA_KEYTYPE_INT8:
+ s_value= (longlong) *(char*)key;
+ break;
+ case HA_KEYTYPE_BINARY:
+ value=(ulonglong) *(uchar*) key;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ s_value= (longlong) sint2korr(key);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ value=(ulonglong) uint2korr(key);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ s_value= (longlong) sint4korr(key);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ value=(ulonglong) uint4korr(key);
+ break;
+ case HA_KEYTYPE_INT24:
+ s_value= (longlong) sint3korr(key);
+ break;
+ case HA_KEYTYPE_UINT24:
+ value=(ulonglong) uint3korr(key);
+ break;
+ case HA_KEYTYPE_FLOAT: /* This shouldn't be used */
+ {
+ float f_1;
+ float4get(f_1,key);
+ /* Ignore negative values */
+ value = (f_1 < (float) 0.0) ? 0 : (ulonglong) f_1;
+ break;
+ }
+ case HA_KEYTYPE_DOUBLE: /* This shouldn't be used */
+ {
+ double f_1;
+ float8get(f_1,key);
+ /* Ignore negative values */
+ value = (f_1 < 0.0) ? 0 : (ulonglong) f_1;
+ break;
+ }
+ case HA_KEYTYPE_LONGLONG:
+ s_value= sint8korr(key);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ value= uint8korr(key);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ value=0; /* Error */
+ break;
+ }
+
+ /*
+ The following code works becasue if s_value < 0 then value is 0
+ and if s_value == 0 then value will contain either s_value or the
+ correct value.
+ */
+ return (s_value > 0) ? (ulonglong) s_value : value;
+}
diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c
new file mode 100644
index 00000000000..81f97cecb87
--- /dev/null
+++ b/storage/maria/ma_key_recover.c
@@ -0,0 +1,1071 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Redo of index */
+
+#include "maria_def.h"
+#include "ma_blockrec.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+
+/****************************************************************************
+ Some helper functions used both by key page loggin and block page loggin
+****************************************************************************/
+
+/**
+ @brief Unpin all pinned pages
+
+ @fn _ma_unpin_all_pages()
+ @param info Maria handler
+ @param undo_lsn LSN for undo pages. LSN_IMPOSSIBLE if we shouldn't write
+ undo (like on duplicate key errors)
+
+ @note
+ We unpin pages in the reverse order as they where pinned; This may not
+ be strictly necessary but may simplify things in the future.
+
+ @return
+ @retval 0 ok
+ @retval 1 error (fatal disk error)
+*/
+
+void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn)
+{
+ MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
+ dynamic_array_ptr(&info->pinned_pages, 0));
+ MARIA_PINNED_PAGE *pinned_page= page_link + info->pinned_pages.elements;
+ DBUG_ENTER("_ma_unpin_all_pages");
+ DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn));
+
+ if (!info->s->now_transactional)
+ DBUG_ASSERT(undo_lsn == LSN_IMPOSSIBLE || maria_in_recovery);
+
+ while (pinned_page-- != page_link)
+ {
+ DBUG_ASSERT(!pinned_page->changed ||
+ undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional);
+ pagecache_unlock_by_link(info->s->pagecache, pinned_page->link,
+ pinned_page->unlock, PAGECACHE_UNPIN,
+ info->trn->rec_lsn, undo_lsn,
+ pinned_page->changed);
+ }
+
+ info->pinned_pages.elements= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
+ enum translog_record_type undo_type,
+ my_bool store_checksum, ha_checksum checksum,
+ LSN *res_lsn, void *extra_msg)
+{
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + CLR_TYPE_STORE_SIZE +
+ HA_CHECKSUM_STORE_SIZE+ KEY_NR_STORE_SIZE + PAGE_STORE_SIZE];
+ uchar *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ struct st_msg_to_write_hook_for_clr_end msg;
+ my_bool res;
+ DBUG_ENTER("_ma_write_clr");
+
+ /* undo_lsn must be first for compression to work */
+ lsn_store(log_data, undo_lsn);
+ clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, undo_type);
+ log_pos= log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + CLR_TYPE_STORE_SIZE;
+
+ /* Extra_msg is handled in write_hook_for_clr_end() */
+ msg.undone_record_type= undo_type;
+ msg.previous_undo_lsn= undo_lsn;
+ msg.extra_msg= extra_msg;
+ msg.checksum_delta= 0;
+
+ if (store_checksum)
+ {
+ msg.checksum_delta= checksum;
+ ha_checksum_store(log_pos, checksum);
+ log_pos+= HA_CHECKSUM_STORE_SIZE;
+ }
+ else if (undo_type == LOGREC_UNDO_KEY_INSERT_WITH_ROOT ||
+ undo_type == LOGREC_UNDO_KEY_DELETE_WITH_ROOT)
+ {
+ /* Key root changed. Store new key root */
+ struct st_msg_to_write_hook_for_undo_key *undo_msg= extra_msg;
+ ulonglong page;
+ key_nr_store(log_pos, undo_msg->keynr);
+ page= (undo_msg->value == HA_OFFSET_ERROR ? IMPOSSIBLE_PAGE_NO :
+ undo_msg->value / info->s->block_size);
+ page_store(log_pos + KEY_NR_STORE_SIZE, page);
+ log_pos+= KEY_NR_STORE_SIZE + PAGE_STORE_SIZE;
+ }
+ if (undo_type == LOGREC_UNDO_ROW_DELETE ||
+ undo_type == LOGREC_UNDO_ROW_UPDATE)
+ {
+ /*
+ We need to store position to the row that was inserted to be
+ able to regenerate keys
+ */
+ MARIA_RECORD_POS rowid= info->cur_row.lastpos;
+ ulonglong page= ma_recordpos_to_page(rowid);
+ uint dir_entry= ma_recordpos_to_dir_entry(rowid);
+ page_store(log_pos, page);
+ dirpos_store(log_pos+ PAGE_STORE_SIZE, dir_entry);
+ log_pos+= PAGE_STORE_SIZE + DIRPOS_STORE_SIZE;
+ }
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data);
+
+ res= translog_write_record(res_lsn, LOGREC_CLR_END,
+ info->trn, info, log_array[TRANSLOG_INTERNAL_PARTS
+ + 0].length,
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data + LSN_STORE_SIZE, &msg);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Sets transaction's undo_lsn, first_undo_lsn if needed
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_clr_end(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn, MARIA_HA *tbl_info
+ __attribute__ ((unused)),
+ LSN *lsn __attribute__ ((unused)),
+ void *hook_arg)
+{
+ MARIA_SHARE *share= tbl_info->s;
+ struct st_msg_to_write_hook_for_clr_end *msg=
+ (struct st_msg_to_write_hook_for_clr_end *)hook_arg;
+ DBUG_ASSERT(trn->trid != 0);
+ trn->undo_lsn= msg->previous_undo_lsn;
+
+ switch (msg->undone_record_type) {
+ case LOGREC_UNDO_ROW_DELETE:
+ share->state.state.records++;
+ share->state.state.checksum+= msg->checksum_delta;
+ break;
+ case LOGREC_UNDO_ROW_INSERT:
+ share->state.state.records--;
+ share->state.state.checksum+= msg->checksum_delta;
+ break;
+ case LOGREC_UNDO_ROW_UPDATE:
+ share->state.state.checksum+= msg->checksum_delta;
+ break;
+ case LOGREC_UNDO_KEY_INSERT_WITH_ROOT:
+ case LOGREC_UNDO_KEY_DELETE_WITH_ROOT:
+ {
+ /* Update key root */
+ struct st_msg_to_write_hook_for_undo_key *extra_msg=
+ (struct st_msg_to_write_hook_for_undo_key *) msg->extra_msg;
+ *extra_msg->root= extra_msg->value;
+ break;
+ }
+ case LOGREC_UNDO_KEY_INSERT:
+ case LOGREC_UNDO_KEY_DELETE:
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+ if (trn->undo_lsn == LSN_IMPOSSIBLE) /* has fully rolled back */
+ trn->first_undo_lsn= LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn);
+ return 0;
+}
+
+
+/**
+ @brief write hook for undo key
+*/
+
+my_bool write_hook_for_undo_key(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg)
+{
+ struct st_msg_to_write_hook_for_undo_key *msg=
+ (struct st_msg_to_write_hook_for_undo_key *) hook_arg;
+
+ *msg->root= msg->value;
+ _ma_fast_unlock_key_del(tbl_info);
+ return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
+}
+
+
+/**
+ Upates "auto_increment" and calls the generic UNDO_KEY hook
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_undo_key_insert(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg)
+{
+ struct st_msg_to_write_hook_for_undo_key *msg=
+ (struct st_msg_to_write_hook_for_undo_key *) hook_arg;
+ MARIA_SHARE *share= tbl_info->s;
+ if (msg->auto_increment > 0)
+ {
+ /*
+ Only reason to set it here is to have a mutex protect from checkpoint
+ reading at the same time (would see a corrupted value).
+ */
+ DBUG_PRINT("info",("auto_inc: %lu new auto_inc: %lu",
+ (ulong)share->state.auto_increment,
+ (ulong)msg->auto_increment));
+ set_if_bigger(share->state.auto_increment, msg->auto_increment);
+ }
+ return write_hook_for_undo_key(type, trn, tbl_info, lsn, hook_arg);
+}
+
+
+/*****************************************************************************
+ Functions for logging of key page changes
+*****************************************************************************/
+
+/**
+ @brief
+ Write log entry for page that has got data added or deleted at start of page
+*/
+
+my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint changed_length,
+ int move_length)
+{
+ uint translog_parts;
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7 + 7], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3];
+ DBUG_ENTER("_ma_log_prefix");
+ DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d",
+ (ulong) page, changed_length, move_length));
+
+ page/= info->s->block_size;
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if (move_length < 0)
+ {
+ /* Delete prefix */
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, -move_length);
+ log_pos+= 3;
+ if (changed_length)
+ {
+ /*
+ We don't need a KEY_OP_OFFSET as KEY_OP_DEL_PREFIX has an implicit
+ offset
+ */
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, changed_length);
+ log_pos+= 3;
+ }
+ }
+ else
+ {
+ /* Add prefix */
+ DBUG_ASSERT(changed_length >0 && (int) changed_length >= move_length);
+ log_pos[0]= KEY_OP_ADD_PREFIX;
+ int2store(log_pos+1, move_length);
+ int2store(log_pos+3, changed_length);
+ log_pos+= 5;
+ }
+
+ translog_parts= 1;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ if (changed_length)
+ {
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
+ info->s->keypage_header);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
+ translog_parts= 2;
+ }
+
+#ifdef EXTRA_DEBUG_KEY_CHANGES
+ {
+ int page_length= _ma_get_page_used(info->s, buff);
+ ha_checksum crc;
+ crc= my_checksum(0, buff + LSN_STORE_SIZE, page_length - LSN_STORE_SIZE);
+ log_pos[0]= KEY_OP_CHECK;
+ int2store(log_pos+1, page_length);
+ int4store(log_pos+3, crc);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7;
+ changed_length+= 7;
+ translog_parts++;
+ }
+#endif
+
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + changed_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got data added or deleted at end of page
+*/
+
+my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint org_length, uint new_length)
+{
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3];
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 10 + 7], *log_pos;
+ int diff;
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_suffix");
+ DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
+ (ulong) page, org_length, new_length));
+
+ page/= info->s->block_size;
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if ((diff= (int) (new_length - org_length)) < 0)
+ {
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos+1, -diff);
+ log_pos+= 3;
+ translog_parts= 1;
+ extra_length= 0;
+ }
+ else
+ {
+ log_pos[0]= KEY_OP_ADD_SUFFIX;
+ int2store(log_pos+1, diff);
+ log_pos+= 3;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) buff + org_length;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= (uint) diff;
+ translog_parts= 2;
+ extra_length= (uint) diff;
+ }
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+
+#ifdef EXTRA_DEBUG_KEY_CHANGES
+ {
+ ha_checksum crc;
+ crc= my_checksum(0, buff + LSN_STORE_SIZE, new_length - LSN_STORE_SIZE);
+ log_pos[0]= KEY_OP_CHECK;
+ int2store(log_pos+1, new_length);
+ int4store(log_pos+3, crc);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7;
+ extra_length+= 7;
+ translog_parts++;
+ }
+#endif
+
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief Log that a key was added to the page
+
+ @param buff Page buffer
+ @param buff_length Original length of buff (before key was added)
+
+ @note
+ If handle_overflow is set, then we have to protect against
+ logging changes that is outside of the page.
+ This may happen during underflow() handling where the buffer
+ in memory temporary contains more data than block_size
+*/
+
+my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint buff_length, uchar *key_pos,
+ uint changed_length, int move_length,
+ my_bool handle_overflow __attribute__ ((unused)))
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 3 + 3 + 3 + 7];
+ uchar *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3];
+ uint offset= (uint) (key_pos - buff);
+ uint page_length= info->s->block_size - KEYPAGE_CHECKSUM_SIZE;
+ uint translog_parts;
+ DBUG_ENTER("_ma_log_add");
+ DBUG_PRINT("enter", ("page: %lu org_page_length: %u changed_length: %u "
+ "move_length: %d",
+ (ulong) page, buff_length, changed_length,
+ move_length));
+ DBUG_ASSERT(info->s->now_transactional);
+
+ /*
+ Write REDO entry that contains the logical operations we need
+ to do the page
+ */
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page/= info->s->block_size;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if (buff_length + move_length > page_length)
+ {
+ /*
+ Overflow. Cut either key or data from page end so that key fits
+ The code that splits the too big page will ignore logging any
+ data over page_length
+ */
+ DBUG_ASSERT(handle_overflow);
+ if (offset + changed_length > page_length)
+ {
+ changed_length= page_length - offset;
+ move_length= 0;
+ }
+ else
+ {
+ uint diff= buff_length + move_length - page_length;
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos+1, diff);
+ log_pos+= 3;
+ buff_length= page_length - move_length;
+ }
+ }
+
+ if (offset == buff_length)
+ log_pos[0]= KEY_OP_ADD_SUFFIX;
+ else
+ {
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos+= 3;
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+ log_pos[0]= KEY_OP_CHANGE;
+ }
+ int2store(log_pos+1, changed_length);
+ log_pos+= 3;
+ translog_parts= 2;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
+
+#ifdef EXTRA_DEBUG_KEY_CHANGES
+ {
+ MARIA_SHARE *share= info->s;
+ ha_checksum crc;
+ uint save_page_length= _ma_get_page_used(share, buff);
+ uint new_length= buff_length + move_length;
+ _ma_store_page_used(share, buff, new_length);
+ crc= my_checksum(0, buff + LSN_STORE_SIZE, new_length - LSN_STORE_SIZE);
+ log_pos[0]= KEY_OP_CHECK;
+ int2store(log_pos+1, new_length);
+ int4store(log_pos+3, crc);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7;
+ changed_length+= 7;
+ translog_parts++;
+ _ma_store_page_used(share, buff, save_page_length);
+ }
+#endif
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + changed_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL))
+ DBUG_RETURN(-1);
+ DBUG_RETURN(0);
+}
+
+
+/****************************************************************************
+ Redo of key pages
+****************************************************************************/
+
+/**
+ @brief Apply LOGREC_REDO_INDEX_NEW_PAGE
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
+ const uchar *header, uint length)
+{
+ ulonglong root_page= page_korr(header);
+ ulonglong free_page= page_korr(header + PAGE_STORE_SIZE);
+ uint key_nr= key_nr_korr(header + PAGE_STORE_SIZE * 2);
+ my_bool page_type_flag= header[PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE];
+ enum pagecache_page_lock unlock_method;
+ enum pagecache_page_pin unpin_method;
+ MARIA_PINNED_PAGE page_link;
+ my_off_t file_size;
+ uchar *buff;
+ uint result;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_apply_redo_index_new_page");
+ DBUG_PRINT("enter", ("root_page: %lu free_page: %lu",
+ (ulong) root_page, (ulong) free_page));
+
+ /* Set header to point at key data */
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+
+ header+= PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1;
+ length-= PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1;
+
+ /* free_page is 0 if we shouldn't set key_del */
+ if (free_page)
+ {
+ if (free_page != IMPOSSIBLE_PAGE_NO)
+ share->state.key_del= (my_off_t) free_page * share->block_size;
+ else
+ share->state.key_del= HA_OFFSET_ERROR;
+ }
+ file_size= (my_off_t) (root_page + 1) * share->block_size;
+
+ /* If root page */
+ if (page_type_flag &&
+ cmp_translog_addr(lsn, share->state.is_of_horizon) >= 0)
+ share->state.key_root[key_nr]= file_size - share->block_size;
+
+ if (file_size > info->state->key_file_length)
+ {
+ info->state->key_file_length= file_size;
+ buff= info->keyread_buff;
+ info->keyread_buff_used= 1;
+ unlock_method= PAGECACHE_LOCK_WRITE;
+ unpin_method= PAGECACHE_PIN;
+ }
+ else
+ {
+ if (!(buff= pagecache_read(share->pagecache, &share->kfile,
+ root_page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ if (my_errno != HA_ERR_FILE_TOO_SHORT &&
+ my_errno != HA_ERR_WRONG_CRC)
+ {
+ result= 1;
+ goto err;
+ }
+ buff= pagecache_block_link_to_buffer(page_link.link);
+ }
+ else if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+ unlock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED;
+ unpin_method= PAGECACHE_PIN_LEFT_PINNED;
+ }
+
+ /* Write modified page */
+ bzero(buff, LSN_STORE_SIZE);
+ memcpy(buff + LSN_STORE_SIZE, header, length);
+ bzero(buff + LSN_STORE_SIZE + length,
+ share->block_size - LSN_STORE_SIZE - KEYPAGE_CHECKSUM_SIZE - length);
+ bfill(buff + share->block_size - KEYPAGE_CHECKSUM_SIZE,
+ KEYPAGE_CHECKSUM_SIZE, (uchar) 255);
+
+ result= 0;
+ if (unlock_method == PAGECACHE_LOCK_WRITE &&
+ pagecache_write(share->pagecache,
+ &share->kfile, root_page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ unlock_method, unpin_method,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE))
+ result= 1;
+
+ /* Mark page to be unlocked and written at _ma_unpin_all_pages() */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ DBUG_RETURN(result);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_INDEX_FREE_PAGE
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index_free_page(MARIA_HA *info,
+ LSN lsn,
+ const uchar *header)
+{
+ ulonglong page= page_korr(header);
+ ulonglong free_page= page_korr(header + PAGE_STORE_SIZE);
+ my_off_t old_link;
+ MARIA_PINNED_PAGE page_link;
+ MARIA_SHARE *share= info->s;
+ uchar *buff;
+ int result;
+ DBUG_ENTER("_ma_apply_redo_index_free_page");
+ DBUG_PRINT("enter", ("page: %lu free_page: %lu",
+ (ulong) page, (ulong) free_page));
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+
+ share->state.key_del= (my_off_t) page * share->block_size;
+ old_link= ((free_page != IMPOSSIBLE_PAGE_NO) ?
+ (my_off_t) free_page * share->block_size :
+ HA_OFFSET_ERROR);
+ if (!(buff= pagecache_read(share->pagecache, &share->kfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= (uint) my_errno;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+ /* Free page */
+ bzero(buff + LSN_STORE_SIZE, share->keypage_header - LSN_STORE_SIZE);
+ _ma_store_keynr(share, buff, (uchar) MARIA_DELETE_KEY_NR);
+ _ma_store_page_used(share, buff, share->keypage_header + 8);
+ mi_sizestore(buff + share->keypage_header, old_link);
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ {
+ bzero(buff + share->keypage_header + 8,
+ share->block_size - share->keypage_header - 8 -
+ KEYPAGE_CHECKSUM_SIZE);
+ }
+#endif
+
+ /* Mark page to be unlocked and written at _ma_unpin_all_pages() */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_INDEX
+
+ @fn ma_apply_redo_index()
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @notes
+ Data for this part is a set of logical instructions of how to
+ construct the key page.
+
+ Information of the layout of the components for REDO_INDEX:
+
+ Name Parameters (in byte) Information
+ KEY_OP_OFFSET 2 Set position for next operations
+ KEY_OP_SHIFT 2 (signed int) How much to shift down or up
+ KEY_OP_CHANGE 2 length, data Data to replace at 'pos'
+ KEY_OP_ADD_PREFIX 2 move-length How much data should be moved up
+ 2 change-length Data to be replaced at page start
+ KEY_OP_DEL_PREFIX 2 length Bytes to be deleted at page start
+ KEY_OP_ADD_SUFFIX 2 length, data Add data to end of page
+ KEY_OP_DEL_SUFFIX 2 length Reduce page length with this
+ Sets position to start of page
+ KEY_OP_CHECK 6 page_length[2},CRC Used only when debugging
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+long my_counter= 0;
+
+uint _ma_apply_redo_index(MARIA_HA *info,
+ LSN lsn, const uchar *header, uint head_length)
+{
+ MARIA_SHARE *share= info->s;
+ ulonglong page= page_korr(header);
+ MARIA_PINNED_PAGE page_link;
+ uchar *buff;
+ const uchar *header_end= header + head_length;
+ uint page_offset= 0;
+ uint nod_flag, page_length, keypage_header;
+ int result;
+ uint org_page_length;
+ DBUG_ENTER("_ma_apply_redo_index");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ /* Set header to point at key data */
+ header+= PAGE_STORE_SIZE;
+
+ if (!(buff= pagecache_read(share->pagecache, &share->kfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= 1;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+
+ _ma_get_used_and_nod(share, buff, page_length, nod_flag);
+ keypage_header= share->keypage_header;
+ org_page_length= page_length;
+ DBUG_PRINT("info", ("page_length: %u", page_length));
+
+ /* Apply modifications to page */
+ do
+ {
+ switch ((enum en_key_op) (*header++)) {
+ case KEY_OP_OFFSET: /* 1 */
+ page_offset= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_offset >= keypage_header && page_offset <= page_length);
+ break;
+ case KEY_OP_SHIFT: /* 2 */
+ {
+ int length= sint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_offset != 0 && page_offset <= page_length &&
+ page_length + length < share->block_size);
+
+ if (length < 0)
+ bmove(buff + page_offset, buff + page_offset - length,
+ page_length - page_offset + length);
+ else
+ bmove_upp(buff + page_length + length, buff + page_length,
+ page_length - page_offset);
+ page_length+= length;
+ break;
+ }
+ case KEY_OP_CHANGE: /* 3 */
+ {
+ uint length= uint2korr(header);
+ DBUG_ASSERT(page_offset != 0 && page_offset + length <= page_length);
+
+ memcpy(buff + page_offset, header + 2 , length);
+ header+= 2 + length;
+ break;
+ }
+ case KEY_OP_ADD_PREFIX: /* 4 */
+ {
+ uint insert_length= uint2korr(header);
+ uint changed_length= uint2korr(header+2);
+ DBUG_ASSERT(insert_length <= changed_length &&
+ page_length + changed_length <= share->block_size);
+
+ bmove_upp(buff + page_length + insert_length, buff + page_length,
+ page_length - keypage_header);
+ memcpy(buff + keypage_header, header + 4 , changed_length);
+ header+= 4 + changed_length;
+ page_length+= insert_length;
+ break;
+ }
+ case KEY_OP_DEL_PREFIX: /* 5 */
+ {
+ uint length= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(length <= page_length - keypage_header);
+
+ bmove(buff + keypage_header, buff + keypage_header +
+ length, page_length - keypage_header - length);
+ page_length-= length;
+
+ page_offset= keypage_header; /* Prepare for change */
+ break;
+ }
+ case KEY_OP_ADD_SUFFIX: /* 6 */
+ {
+ uint insert_length= uint2korr(header);
+ DBUG_ASSERT(page_length + insert_length <= share->block_size);
+ memcpy(buff + page_length, header+2, insert_length);
+
+ page_length+= insert_length;
+ header+= 2 + insert_length;
+ break;
+ }
+ case KEY_OP_DEL_SUFFIX: /* 7 */
+ {
+ uint del_length= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_length - del_length >= keypage_header);
+ page_length-= del_length;
+ break;
+ }
+ case KEY_OP_CHECK: /* 8 */
+ {
+#ifdef EXTRA_DEBUG_KEY_CHANGES
+ uint check_page_length;
+ ha_checksum crc;
+ check_page_length= uint2korr(header);
+ crc= uint4korr(header+2);
+ _ma_store_page_used(share, buff, page_length);
+ DBUG_ASSERT(check_page_length == page_length);
+ DBUG_ASSERT(crc == (uint32) my_checksum(0, buff + LSN_STORE_SIZE,
+ page_length- LSN_STORE_SIZE));
+#endif
+ header+= 6;
+ break;
+ }
+ case KEY_OP_NONE:
+ default:
+ DBUG_ASSERT(0);
+ result= 1;
+ goto err;
+ }
+ } while (header < header_end);
+ DBUG_ASSERT(header == header_end);
+
+ /* Write modified page */
+ _ma_store_page_used(share, buff, page_length);
+
+ /*
+ Clean old stuff up. Gives us better compression of we archive things
+ and makes things easer to debug
+ */
+ if (page_length < org_page_length)
+ bzero(buff + page_length, org_page_length-page_length);
+
+ /* Mark page to be unlocked and written at _ma_unpin_all_pages() */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(result);
+}
+
+
+/****************************************************************************
+ Undo of key block changes
+****************************************************************************/
+
+/**
+ @brief Undo of insert of key (ie, delete the inserted key)
+*/
+
+my_bool _ma_apply_undo_key_insert(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length)
+{
+ LSN lsn;
+ my_bool res;
+ uint keynr;
+ uchar key[HA_MAX_KEY_BUFF];
+ MARIA_SHARE *share= info->s;
+ my_off_t new_root;
+ struct st_msg_to_write_hook_for_undo_key msg;
+ DBUG_ENTER("_ma_apply_undo_key_insert");
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+ keynr= key_nr_korr(header);
+ length-= KEY_NR_STORE_SIZE;
+
+ /* We have to copy key as _ma_ck_real_delete() may change it */
+ memcpy(key, header + KEY_NR_STORE_SIZE, length);
+ DBUG_DUMP("key", key, length);
+
+ new_root= share->state.key_root[keynr];
+ res= _ma_ck_real_delete(info, share->keyinfo+keynr, key,
+ length - share->rec_reflength, &new_root);
+
+ msg.root= &share->state.key_root[keynr];
+ msg.value= new_root;
+ msg.keynr= keynr;
+
+ if (_ma_write_clr(info, undo_lsn, *msg.root == msg.value ?
+ LOGREC_UNDO_KEY_INSERT : LOGREC_UNDO_KEY_INSERT_WITH_ROOT,
+ 0, 0, &lsn, (void*) &msg))
+ res= 1;
+
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Undo of delete of key (ie, insert the deleted key)
+*/
+
+my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length)
+{
+ LSN lsn;
+ my_bool res;
+ uint keynr;
+ uchar key[HA_MAX_KEY_BUFF];
+ MARIA_SHARE *share= info->s;
+ my_off_t new_root;
+ struct st_msg_to_write_hook_for_undo_key msg;
+ DBUG_ENTER("_ma_apply_undo_key_delete");
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+ keynr= key_nr_korr(header);
+ length-= KEY_NR_STORE_SIZE;
+
+ /* We have to copy key as _ma_ck_real_write_btree() may change it */
+ memcpy(key, header + KEY_NR_STORE_SIZE, length);
+ _ma_dpointer(info, key + length, info->cur_row.lastpos);
+ DBUG_DUMP("key", key, length + share->rec_reflength);
+
+ new_root= share->state.key_root[keynr];
+ res= _ma_ck_real_write_btree(info, share->keyinfo+keynr, key,
+ length,
+ &new_root,
+ share->keyinfo[keynr].write_comp_flag);
+
+ msg.root= &share->state.key_root[keynr];
+ msg.value= new_root;
+ msg.keynr= keynr;
+ if (_ma_write_clr(info, undo_lsn,
+ *msg.root == msg.value ?
+ LOGREC_UNDO_KEY_DELETE : LOGREC_UNDO_KEY_DELETE_WITH_ROOT,
+ 0, 0, &lsn,
+ (void*) &msg))
+ res= 1;
+
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+}
+
+
+/****************************************************************************
+ Handle some local variables
+****************************************************************************/
+
+/**
+ @brief lock key_del for other threads usage
+
+ @fn _ma_lock_key_del()
+ @param info Maria handler
+ @param insert_at_end Set to 1 if we are doing an insert
+
+ @notes
+ To allow higher concurrency in the common case where we do inserts
+ and we don't have any linked blocks we do the following:
+ - Mark in info->used_key_del that we are not using key_del
+ - Return at once (without marking key_del as used)
+
+ This is safe as we in this case don't write current_key_del into
+ the redo log and during recover we are not updating key_del.
+*/
+
+my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
+{
+ MARIA_SHARE *share= info->s;
+
+ if (info->used_key_del != 1)
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ if (share->state.key_del == HA_OFFSET_ERROR && insert_at_end)
+ {
+ pthread_mutex_unlock(&share->intern_lock);
+ info->used_key_del= 2; /* insert-with-append */
+ return 1;
+ }
+#ifdef THREAD
+ while (share->used_key_del)
+ pthread_cond_wait(&share->intern_cond, &share->intern_lock);
+#endif
+ info->used_key_del= 1;
+ share->used_key_del= 1;
+ share->current_key_del= share->state.key_del;
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+ return 0;
+}
+
+
+/**
+ @brief copy changes to key_del and unlock it
+*/
+
+void _ma_unlock_key_del(MARIA_HA *info)
+{
+ DBUG_ASSERT(info->used_key_del);
+ if (info->used_key_del == 1) /* Ignore insert-with-append */
+ {
+ MARIA_SHARE *share= info->s;
+ pthread_mutex_lock(&share->intern_lock);
+ share->used_key_del= 0;
+ share->state.key_del= info->s->current_key_del;
+ pthread_mutex_unlock(&share->intern_lock);
+ pthread_cond_signal(&share->intern_cond);
+ }
+ info->used_key_del= 0;
+}
diff --git a/storage/maria/ma_key_recover.h b/storage/maria/ma_key_recover.h
new file mode 100644
index 00000000000..87716dc6db1
--- /dev/null
+++ b/storage/maria/ma_key_recover.h
@@ -0,0 +1,103 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ When we have finished the write/update/delete of a row, we have cleanups to
+ do. For now it is signalling to Checkpoint that all dirtied pages have
+ their rec_lsn set and page LSN set (_ma_unpin_all_pages() has been called),
+ and that bitmap pages are correct (_ma_bitmap_release_unused() has been
+ called).
+*/
+
+/* Struct for clr_end */
+
+struct st_msg_to_write_hook_for_clr_end
+{
+ LSN previous_undo_lsn;
+ enum translog_record_type undone_record_type;
+ ha_checksum checksum_delta;
+ void *extra_msg;
+};
+
+struct st_msg_to_write_hook_for_undo_key
+{
+ my_off_t *root;
+ my_off_t value;
+ uint keynr;
+ ulonglong auto_increment;
+};
+
+
+/* Function definitions for some redo functions */
+
+my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
+ enum translog_record_type undo_type,
+ my_bool store_checksum, ha_checksum checksum,
+ LSN *res_lsn, void *extra_msg);
+my_bool write_hook_for_clr_end(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
+ void *hook_arg);
+extern my_bool write_hook_for_undo_key(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg);
+extern my_bool write_hook_for_undo_key_insert(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg);
+void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn);
+
+my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint changed_length,
+ int move_length);
+my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint org_length,
+ uint new_length);
+my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint buff_length, uchar *key_pos,
+ uint changed_length, int move_length,
+ my_bool handle_overflow);
+
+uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
+ const uchar *header, uint length);
+uint _ma_apply_redo_index_free_page(MARIA_HA *info, LSN lsn,
+ const uchar *header);
+uint _ma_apply_redo_index(MARIA_HA *info,
+ LSN lsn, const uchar *header, uint length);
+
+my_bool _ma_apply_undo_key_insert(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length);
+my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length);
+
+static inline void _ma_finalize_row(MARIA_HA *info)
+{
+ info->trn->rec_lsn= LSN_IMPOSSIBLE;
+}
+
+/* unpinning is often the last operation before finalizing */
+
+static inline void _ma_unpin_all_pages_and_finalize_row(MARIA_HA *info,
+ LSN undo_lsn)
+{
+ _ma_unpin_all_pages(info, undo_lsn);
+ _ma_finalize_row(info);
+}
+
+extern my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end);
+extern void _ma_unlock_key_del(MARIA_HA *info);
+static inline void _ma_fast_unlock_key_del(MARIA_HA *info)
+{
+ if (info->used_key_del)
+ _ma_unlock_key_del(info);
+}
diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c
new file mode 100644
index 00000000000..9295904dbcf
--- /dev/null
+++ b/storage/maria/ma_keycache.c
@@ -0,0 +1,164 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Key cache assignments
+*/
+
+#include "maria_def.h"
+
+/*
+ Assign pages of the index file for a table to a key cache
+
+ SYNOPSIS
+ maria_assign_to_pagecache()
+ info open table
+ key_map map of indexes to assign to the key cache
+ pagecache_ptr pointer to the key cache handle
+ assign_lock Mutex to lock during assignment
+
+ PREREQUESTS
+ One must have a READ lock or a WRITE lock on the table when calling
+ the function to ensure that there is no other writers to it.
+
+ The caller must also ensure that one doesn't call this function from
+ two different threads with the same table.
+
+ NOTES
+ At present pages for all indexes must be assigned to the same key cache.
+ In future only pages for indexes specified in the key_map parameter
+ of the table will be assigned to the specified key cache.
+
+ RETURN VALUE
+ 0 If a success
+ # Error code
+*/
+
+int maria_assign_to_pagecache(MARIA_HA *info,
+ ulonglong key_map __attribute__((unused)),
+ PAGECACHE *pagecache)
+{
+ int error= 0;
+ MARIA_SHARE* share= info->s;
+ DBUG_ENTER("maria_assign_to_pagecache");
+ DBUG_PRINT("enter",
+ ("old_pagecache_handle: 0x%lx new_pagecache_handle: 0x%lx",
+ (long) share->pagecache, (long) pagecache));
+
+ /*
+ Skip operation if we didn't change key cache. This can happen if we
+ call this for all open instances of the same table
+ */
+ if (share->pagecache == pagecache)
+ DBUG_RETURN(0);
+
+ /*
+ First flush all blocks for the table in the old key cache.
+ This is to ensure that the disk is consistent with the data pages
+ in memory (which may not be the case if the table uses delayed_key_write)
+
+ Note that some other read thread may still fill in the key cache with
+ new blocks during this call and after, but this doesn't matter as
+ all threads will start using the new key cache for their next call to
+ maria library and we know that there will not be any changed blocks
+ in the old key cache.
+ */
+
+ if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_RELEASE))
+ {
+ error= my_errno;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* Mark that table must be checked */
+ }
+
+ /*
+ Flush the new key cache for this file. This is needed to ensure
+ that there is no old blocks (with outdated data) left in the new key
+ cache from an earlier assign_to_keycache operation
+
+ (This can never fail as there is never any not written data in the
+ new key cache)
+ */
+ (void) flush_pagecache_blocks(pagecache, &share->kfile, FLUSH_RELEASE);
+
+ /*
+ ensure that setting the key cache and changing the multi_pagecache
+ is done atomicly
+ */
+ pthread_mutex_lock(&share->intern_lock);
+ /*
+ Tell all threads to use the new key cache
+ This should be seen at the lastes for the next call to an maria function.
+ */
+ share->pagecache= pagecache;
+
+ /* store the key cache in the global hash structure for future opens */
+ if (multi_pagecache_set((uchar*) share->unique_file_name,
+ share->unique_name_length,
+ share->pagecache))
+ error= my_errno;
+ pthread_mutex_unlock(&share->intern_lock);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Change all MARIA entries that uses one key cache to another key cache
+
+ SYNOPSIS
+ maria_change_pagecache()
+ old_pagecache Old key cache
+ new_pagecache New key cache
+
+ NOTES
+ This is used when we delete one key cache.
+
+ To handle the case where some other threads tries to open an MARIA
+ table associated with the to-be-deleted key cache while this operation
+ is running, we have to call 'multi_pagecache_change()' from this
+ function while we have a lock on the MARIA table list structure.
+
+ This is safe as long as it's only MARIA that is using this specific
+ key cache.
+*/
+
+
+void maria_change_pagecache(PAGECACHE *old_pagecache,
+ PAGECACHE *new_pagecache)
+{
+ LIST *pos;
+ DBUG_ENTER("maria_change_pagecache");
+
+ /*
+ Lock list to ensure that no one can close the table while we manipulate it
+ */
+ pthread_mutex_lock(&THR_LOCK_maria);
+ for (pos=maria_open_list ; pos ; pos=pos->next)
+ {
+ MARIA_HA *info= (MARIA_HA*) pos->data;
+ MARIA_SHARE *share= info->s;
+ if (share->pagecache == old_pagecache)
+ maria_assign_to_pagecache(info, (ulonglong) ~0, new_pagecache);
+ }
+
+ /*
+ We have to do the following call while we have the lock on the
+ MARIA list structure to ensure that another thread is not trying to
+ open a new table that will be associted with the old key cache
+ */
+ multi_pagecache_change(old_pagecache, new_pagecache);
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ DBUG_VOID_RETURN;
+}
diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c
new file mode 100644
index 00000000000..a25820b81fb
--- /dev/null
+++ b/storage/maria/ma_locking.c
@@ -0,0 +1,582 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ locking of isam-tables.
+ reads info from a isam-table. Must be first request before doing any furter
+ calls to any isamfunktion. Is used to allow many process use the same
+ isamdatabase.
+*/
+
+#include "ma_ftdefs.h"
+
+ /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
+
+int maria_lock_database(MARIA_HA *info, int lock_type)
+{
+ int error;
+ uint count;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_lock_database");
+ DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
+ "global_changed: %d open_count: %u name: '%s'",
+ lock_type, info->lock_type, share->r_locks,
+ share->w_locks,
+ share->global_changed, share->state.open_count,
+ share->index_file_name));
+ if (share->options & HA_OPTION_READ_ONLY_DATA ||
+ info->lock_type == lock_type)
+ DBUG_RETURN(0);
+ if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
+ {
+ ++share->w_locks;
+ ++share->tot_locks;
+ info->lock_type= lock_type;
+ DBUG_RETURN(0);
+ }
+
+ error=0;
+ pthread_mutex_lock(&share->intern_lock);
+ if (share->kfile.file >= 0) /* May only be false on windows */
+ {
+ switch (lock_type) {
+ case F_UNLCK:
+ maria_ftparser_call_deinitializer(info);
+ if (info->lock_type == F_RDLCK)
+ {
+ count= --share->r_locks;
+ _ma_restore_status(info);
+ }
+ else
+ {
+ count= --share->w_locks;
+ _ma_update_status(info);
+ }
+ --share->tot_locks;
+ if (info->lock_type == F_WRLCK && !share->w_locks)
+ {
+ /* pages of transactional tables get flushed at Checkpoint */
+ if (!share->base.born_transactional && !share->temporary &&
+ _ma_flush_table_files(info,
+ share->delay_key_write ? MARIA_FLUSH_DATA :
+ MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_KEEP, FLUSH_KEEP))
+ error= my_errno;
+ }
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ if (end_io_cache(&info->rec_cache))
+ {
+ error=my_errno;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+ }
+ if (!count)
+ {
+ DBUG_PRINT("info",("changed: %u w_locks: %u",
+ (uint) share->changed, share->w_locks));
+ if (share->changed && !share->w_locks)
+ {
+#ifdef HAVE_MMAP
+ if ((info->s->mmaped_length !=
+ info->s->state.state.data_file_length) &&
+ (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
+ {
+ if (info->s->concurrent_insert)
+ rw_wrlock(&info->s->mmap_lock);
+ _ma_remap_file(info, info->s->state.state.data_file_length);
+ info->s->nonmmaped_inserts= 0;
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ }
+#endif
+#ifdef EXTERNAL_LOCKING
+ share->state.process= share->last_process=share->this_process;
+ share->state.unique= info->last_unique= info->this_unique;
+ share->state.update_count= info->last_loop= ++info->this_loop;
+#endif
+ /* transactional tables rather flush their state at Checkpoint */
+ if (!share->base.born_transactional)
+ {
+ if (_ma_state_info_write_sub(share->kfile.file, &share->state, 1))
+ error= my_errno;
+ else
+ {
+ /* A value of 0 means below means "state flushed" */
+ share->changed= 0;
+ }
+ }
+ if (maria_flush)
+ {
+ if (_ma_sync_table_files(info))
+ error= my_errno;
+ }
+ else
+ share->not_flushed=1;
+ if (error)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+ }
+ }
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ info->lock_type= F_UNLCK;
+ /*
+ Verify that user of the table cleaned up after itself. Not in
+ recovery, as for example maria_extra(HA_EXTRA_PREPARE_FOR_RENAME) may
+ call us here, with transactionality temporarily disabled.
+ */
+ DBUG_ASSERT(maria_in_recovery ||
+ share->now_transactional == share->base.born_transactional);
+ break;
+ case F_RDLCK:
+ if (info->lock_type == F_WRLCK)
+ {
+ /*
+ Change RW to READONLY
+
+ mysqld does not turn write locks to read locks,
+ so we're never here in mysqld.
+ */
+ share->w_locks--;
+ share->r_locks++;
+ info->lock_type=lock_type;
+ break;
+ }
+#ifdef MARIA_EXTERNAL_LOCKING
+ if (!share->r_locks && !share->w_locks)
+ {
+ /* note that a transactional table should not do this */
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ {
+ error=my_errno;
+ break;
+ }
+ }
+#endif
+ VOID(_ma_test_if_changed(info));
+ share->r_locks++;
+ share->tot_locks++;
+ info->lock_type=lock_type;
+ break;
+ case F_WRLCK:
+ if (info->lock_type == F_RDLCK)
+ { /* Change READONLY to RW */
+ if (share->r_locks == 1)
+ {
+ share->r_locks--;
+ share->w_locks++;
+ info->lock_type=lock_type;
+ break;
+ }
+ }
+#ifdef MARIA_EXTERNAL_LOCKING
+ if (!(share->options & HA_OPTION_READ_ONLY_DATA))
+ {
+ if (!share->w_locks)
+ {
+ if (!share->r_locks)
+ {
+ /*
+ Note that transactional tables should not do this.
+ If we enabled this code, we should make sure to skip it if
+ born_transactional is true. We should not test
+ now_transactional to decide if we can call
+ _ma_state_info_read_dsk(), because it can temporarily be 0
+ (TRUNCATE on a partitioned table) and thus it would make a state
+ modification below without mutex, confusing a concurrent
+ checkpoint running.
+ Even if this code was enabled only for non-transactional tables:
+ in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
+ state on disk read by DELETE is obsolete as it was not flushed
+ at the end of INSERT. MyISAM same. It however causes no issue as
+ maria_delete_all_rows() calls _ma_reset_status() thus is not
+ influenced by the obsolete read values.
+ */
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ {
+ error=my_errno;
+ break;
+ }
+ }
+ }
+ }
+#endif /* defined(MARIA_EXTERNAL_LOCKING) */
+ VOID(_ma_test_if_changed(info));
+
+ info->lock_type=lock_type;
+ info->invalidator=info->s->invalidator;
+ share->w_locks++;
+ share->tot_locks++;
+ break;
+ default:
+ DBUG_ASSERT(0);
+ break; /* Impossible */
+ }
+ }
+#ifdef __WIN__
+ else
+ {
+ /*
+ Check for bad file descriptors if this table is part
+ of a merge union. Failing to capture this may cause
+ a crash on windows if the table is renamed and
+ later on referenced by the merge table.
+ */
+ if( info->owned_by_merge && (info->s)->kfile.file < 0 )
+ {
+ error = HA_ERR_NO_SUCH_TABLE;
+ }
+ }
+#endif
+ pthread_mutex_unlock(&share->intern_lock);
+ DBUG_RETURN(error);
+} /* maria_lock_database */
+
+
+/****************************************************************************
+ The following functions are called by thr_lock() in threaded applications
+****************************************************************************/
+
+/*
+ Create a copy of the current status for the table
+
+ SYNOPSIS
+ _ma_get_status()
+ param Pointer to Myisam handler
+ concurrent_insert Set to 1 if we are going to do concurrent inserts
+ (THR_WRITE_CONCURRENT_INSERT was used)
+*/
+
+void _ma_get_status(void* param, int concurrent_insert)
+{
+ MARIA_HA *info=(MARIA_HA*) param;
+ DBUG_ENTER("_ma_get_status");
+ DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d",
+ (long) info->s->state.state.key_file_length,
+ (long) info->s->state.state.data_file_length,
+ concurrent_insert));
+#ifndef DBUG_OFF
+ if (info->state->key_file_length > info->s->state.state.key_file_length ||
+ info->state->data_file_length > info->s->state.state.data_file_length)
+ DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
+ (long) info->state->key_file_length,
+ (long) info->state->data_file_length));
+#endif
+ info->save_state=info->s->state.state;
+ info->state= &info->save_state;
+ info->append_insert_at_end= concurrent_insert;
+ DBUG_VOID_RETURN;
+}
+
+
+void _ma_update_status(void* param)
+{
+ MARIA_HA *info=(MARIA_HA*) param;
+ MARIA_SHARE *share= info->s;
+ /*
+ Because someone may have closed the table we point at, we only
+ update the state if its our own state. This isn't a problem as
+ we are always pointing at our own lock or at a read lock.
+ (This is enforced by thr_multi_lock.c)
+ */
+ if (info->state == &info->save_state)
+ {
+#ifndef DBUG_OFF
+ DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld",
+ (long) info->state->key_file_length,
+ (long) info->state->data_file_length));
+ if (info->state->key_file_length < share->state.state.key_file_length ||
+ info->state->data_file_length < share->state.state.data_file_length)
+ DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
+ (long) share->state.state.key_file_length,
+ (long) share->state.state.data_file_length));
+#endif
+ /*
+ we are going to modify the state without lock's log, this would break
+ recovery if done with a transactional table.
+ */
+ DBUG_ASSERT(!info->s->base.born_transactional);
+ share->state.state= *info->state;
+ info->state= &share->state.state;
+ }
+ info->append_insert_at_end= 0;
+}
+
+
+void _ma_restore_status(void *param)
+{
+ MARIA_HA *info= (MARIA_HA*) param;
+ info->state= &info->s->state.state;
+ info->append_insert_at_end= 0;
+}
+
+
+void _ma_copy_status(void* to,void *from)
+{
+ ((MARIA_HA*) to)->state= &((MARIA_HA*) from)->save_state;
+}
+
+
+/*
+ Check if should allow concurrent inserts
+
+ IMPLEMENTATION
+ Allow concurrent inserts if we don't have a hole in the table or
+ if there is no active write lock and there is active read locks and
+ maria_concurrent_insert == 2. In this last case the new
+ row('s) are inserted at end of file instead of filling up the hole.
+
+ The last case is to allow one to inserts into a heavily read-used table
+ even if there is holes.
+
+ NOTES
+ If there is a an rtree indexes in the table, concurrent inserts are
+ disabled in maria_open()
+
+ RETURN
+ 0 ok to use concurrent inserts
+ 1 not ok
+*/
+
+my_bool _ma_check_status(void *param)
+{
+ MARIA_HA *info=(MARIA_HA*) param;
+ /*
+ The test for w_locks == 1 is here because this thread has already done an
+ external lock (in other words: w_locks == 1 means no other threads has
+ a write lock)
+ */
+ DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
+ (long) info->s->state.dellink, (uint) info->s->r_locks,
+ (uint) info->s->w_locks));
+ return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
+ (maria_concurrent_insert == 2 && info->s->r_locks &&
+ info->s->w_locks == 1));
+}
+
+
+/****************************************************************************
+ ** functions to read / write the state
+****************************************************************************/
+
+int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
+ int lock_type __attribute__ ((unused)),
+ int check_keybuffer __attribute__ ((unused)))
+{
+#ifdef MARIA_EXTERNAL_LOCKING
+ DBUG_ENTER("_ma_readinfo");
+
+ if (info->lock_type == F_UNLCK)
+ {
+ MARIA_SHARE *share= info->s;
+ if (!share->tot_locks)
+ {
+ /* should not be done for transactional tables */
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ {
+ if (!my_errno)
+ my_errno= HA_ERR_FILE_TOO_SHORT;
+ DBUG_RETURN(1);
+ }
+ }
+ if (check_keybuffer)
+ VOID(_ma_test_if_changed(info));
+ info->invalidator=info->s->invalidator;
+ }
+ else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
+ {
+ my_errno=EACCES; /* Not allowed to change */
+ DBUG_RETURN(-1); /* when have read_lock() */
+ }
+ DBUG_RETURN(0);
+#else
+ return 0;
+#endif /* defined(MARIA_EXTERNAL_LOCKING) */
+} /* _ma_readinfo */
+
+
+/*
+ Every isam-function that uppdates the isam-database MUST end with this
+ request
+
+ NOTES
+ my_errno is not changed if this succeeds!
+*/
+
+int _ma_writeinfo(register MARIA_HA *info, uint operation)
+{
+ int error,olderror;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_writeinfo");
+ DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
+ share->tot_locks));
+
+ error=0;
+ if (share->tot_locks == 0 && !share->base.born_transactional)
+ {
+ /* transactional tables flush their state at Checkpoint */
+ if (operation)
+ { /* Two threads can't be here */
+ olderror= my_errno; /* Remember last error */
+
+#ifdef EXTERNAL_LOCKING
+ /*
+ The following only makes sense if we want to be allow two different
+ processes access the same table at the same time
+ */
+ share->state.process= share->last_process= share->this_process;
+ share->state.unique= info->last_unique= info->this_unique;
+ share->state.update_count= info->last_loop= ++info->this_loop;
+#endif
+
+ if ((error= _ma_state_info_write_sub(share->kfile.file,
+ &share->state, 1)))
+ olderror=my_errno;
+#ifdef __WIN__
+ if (maria_flush)
+ {
+ _commit(share->kfile.file);
+ _commit(info->dfile.file);
+ }
+#endif
+ my_errno=olderror;
+ }
+ }
+ else if (operation)
+ share->changed= 1; /* Mark keyfile changed */
+ DBUG_RETURN(error);
+} /* _ma_writeinfo */
+
+
+/*
+ Test if an external process has changed the database
+ (Should be called after readinfo)
+*/
+
+int _ma_test_if_changed(register MARIA_HA *info)
+{
+#ifdef EXTERNAL_LOCKING
+ MARIA_SHARE *share= info->s;
+ if (share->state.process != share->last_process ||
+ share->state.unique != info->last_unique ||
+ share->state.update_count != info->last_loop)
+ { /* Keyfile has changed */
+ DBUG_PRINT("info",("index file changed"));
+ if (share->state.process != share->this_process)
+ VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
+ FLUSH_RELEASE));
+ share->last_process=share->state.process;
+ info->last_unique= share->state.unique;
+ info->last_loop= share->state.update_count;
+ info->update|= HA_STATE_WRITTEN; /* Must use file on next */
+ info->data_changed= 1; /* For maria_is_changed */
+ return 1;
+ }
+#endif
+ return (!(info->update & HA_STATE_AKTIV) ||
+ (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
+ HA_STATE_KEY_CHANGED)));
+} /* _ma_test_if_changed */
+
+
+/*
+ Put a mark in the .MYI file that someone is updating the table
+
+
+ DOCUMENTATION
+
+ state.open_count in the .MYI file is used the following way:
+ - For the first change of the .MYI file in this process open_count is
+ incremented by _ma_mark_file_changed(). (We have a write lock on the file
+ when this happens)
+ - In maria_close() it's decremented by _ma_decrement_open_count() if it
+ was incremented in the same process.
+
+ This mean that if we are the only process using the file, the open_count
+ tells us if the MARIA file wasn't properly closed. (This is true if
+ my_disable_locking is set).
+
+ open_count is not maintained on disk for transactional or temporary tables.
+*/
+
+
+int _ma_mark_file_changed(MARIA_HA *info)
+{
+ uchar buff[3];
+ register MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_mark_file_changed");
+
+ if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed)
+ {
+ share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS);
+ if (!share->global_changed)
+ {
+ share->global_changed=1;
+ share->state.open_count++;
+ }
+ /*
+ temp tables don't need an open_count as they are removed on crash;
+ transactional tables are fixed by log-based recovery, so don't need an
+ open_count either (and we thus avoid the disk write below).
+ */
+ if (!(share->temporary | share->base.born_transactional))
+ {
+ mi_int2store(buff,share->state.open_count);
+ buff[2]=1; /* Mark that it's changed */
+ DBUG_RETURN(my_pwrite(share->kfile.file, buff, sizeof(buff),
+ sizeof(share->state.header),
+ MYF(MY_NABP)));
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
+ call. In these context the following code should be safe!
+ */
+
+int _ma_decrement_open_count(MARIA_HA *info)
+{
+ uchar buff[2];
+ register MARIA_SHARE *share= info->s;
+ int lock_error=0,write_error=0;
+ if (share->global_changed)
+ {
+ uint old_lock=info->lock_type;
+ share->global_changed=0;
+ lock_error=maria_lock_database(info,F_WRLCK);
+ /* Its not fatal even if we couldn't get the lock ! */
+ if (share->state.open_count > 0)
+ {
+ share->state.open_count--;
+ if (!(share->temporary | share->base.born_transactional))
+ {
+ mi_int2store(buff,share->state.open_count);
+ write_error= my_pwrite(share->kfile.file, buff, sizeof(buff),
+ sizeof(share->state.header),
+ MYF(MY_NABP));
+ }
+ }
+ if (!lock_error)
+ lock_error=maria_lock_database(info,old_lock);
+ }
+ return test(lock_error || write_error);
+}
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
new file mode 100644
index 00000000000..c0a79c0be91
--- /dev/null
+++ b/storage/maria/ma_loghandler.c
@@ -0,0 +1,7637 @@
+/* Copyright (C) 2007 MySQL AB & Sanja Belkin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#include "trnman.h"
+#include "ma_blockrec.h" /* for some constants and in-write hooks */
+#include "ma_key_recover.h" /* For some in-write hooks */
+
+/**
+ @file
+ @brief Module which writes and reads to a transaction log
+*/
+
+/* 0xFF can never be valid first byte of a chunk */
+#define TRANSLOG_FILLER 0xFF
+
+/* number of opened log files in the pagecache (should be at least 2) */
+#define OPENED_FILES_NUM 3
+#define CACHED_FILES_NUM 5
+#define CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT 7
+#if CACHED_FILES_NUM > CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT
+#include <hash.h>
+#include <m_ctype.h>
+#endif
+
+/* transaction log file descriptor */
+typedef struct st_translog_file
+{
+ uint32 number;
+ PAGECACHE_FILE handler;
+ my_bool was_recovered;
+ my_bool is_sync;
+} TRANSLOG_FILE;
+
+/* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
+#define TRANSLOG_WRITE_BUFFER (1024*1024)
+/* min chunk length */
+#define TRANSLOG_MIN_CHUNK 3
+/*
+ Number of buffers used by loghandler
+
+ Should be at least 4, because one thread can block up to 2 buffers in
+ normal circumstances (less then half of one and full other, or just
+ switched one and other), But if we met end of the file in the middle and
+ have to switch buffer it will be 3. + 1 buffer for flushing/writing.
+ We have a bigger number here for higher concurrency.
+*/
+#define TRANSLOG_BUFFERS_NO 5
+/* number of bytes (+ header) which can be unused on first page in sequence */
+#define TRANSLOG_MINCHUNK_CONTENT 1
+/* version of log file */
+#define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */
+
+#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */
+
+/* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
+#define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
+#define MAX_NUMBER_OF_LSNS_PER_RECORD 2
+
+
+/* log write buffer descriptor */
+struct st_translog_buffer
+{
+ LSN last_lsn;
+ /* This buffer offset in the file */
+ TRANSLOG_ADDRESS offset;
+ /*
+ Next buffer offset in the file (it is not always offset + size,
+ in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
+ */
+ TRANSLOG_ADDRESS next_buffer_offset;
+ /*
+ How much is written (or will be written when copy_to_buffer_in_progress
+ become 0) to this buffer
+ */
+ translog_size_t size;
+ /* File handler for this buffer */
+ TRANSLOG_FILE *file;
+ /* Threads which are waiting for buffer filling/freeing */
+ pthread_cond_t waiting_filling_buffer;
+ /* Number of records which are in copy progress */
+ uint copy_to_buffer_in_progress;
+ /* list of waiting buffer ready threads */
+ struct st_my_thread_var *waiting_flush;
+ /*
+ Pointer on the buffer which overlap with this one (due to flush of
+ loghandler, the last page of that buffer is the same as the first page
+ of this buffer) and have to be written first (because contain old
+ content of page which present in both buffers)
+ */
+ struct st_translog_buffer *overlay;
+#ifndef DBUG_OFF
+ uint buffer_no;
+#endif
+ /*
+ Lock for the buffer.
+
+ Current buffer also lock the whole handler (if one want lock the handler
+ one should lock the current buffer).
+
+ Buffers are locked only in one direction (with overflow and beginning
+ from the first buffer). If we keep lock on buffer N we can lock only
+ buffer N+1 (never N-1).
+
+ One thread do not lock more then 2 buffer in a time, so to make dead
+ lock it should be N thread (where N equal number of buffers) takes one
+ buffer and try to lock next. But it is impossible because there is only
+ 2 cases when thread take 2 buffers: 1) one thread finishes current
+ buffer (where horizon is) and start next (to which horizon moves). 2)
+ flush start from buffer after current (oldest) and go till the current
+ crabbing by buffer sequence. And there is only one flush in a moment
+ (they are serialised).
+
+ Because of above and number of buffers equal 5 we can't get dead lock (it is
+ impossible to get all 5 buffers locked simultaneously).
+ */
+ pthread_mutex_t mutex;
+ /* Cache for current log. */
+ uchar buffer[TRANSLOG_WRITE_BUFFER];
+};
+
+
+struct st_buffer_cursor
+{
+ /* pointer into the buffer */
+ uchar *ptr;
+ /* current buffer */
+ struct st_translog_buffer *buffer;
+ /* How many bytes we wrote on the current page */
+ uint16 current_page_fill;
+ /*
+ How many times we write the page on the disk during flushing process
+ (for sector protection).
+ */
+ uint16 write_counter;
+ /* previous write offset */
+ uint16 previous_offset;
+ /* Number of current buffer */
+ uint8 buffer_no;
+ /*
+ True if it is just filling buffer after advancing the pointer to
+ the horizon.
+ */
+ my_bool chaser;
+ /*
+ Is current page of the cursor already finished (sector protection
+ should be applied if it is needed)
+ */
+ my_bool protected;
+};
+
+
+struct st_translog_descriptor
+{
+ /* *** Parameters of the log handler *** */
+
+ /* Page cache for the log reads */
+ PAGECACHE *pagecache;
+ uint flags;
+ /* File open flags */
+ uint open_flags;
+ /* max size of one log size (for new logs creation) */
+ uint32 log_file_max_size;
+ uint32 server_version;
+ /* server ID (used for replication) */
+ uint32 server_id;
+ /* Loghandler's buffer capacity in case of chunk 2 filling */
+ uint32 buffer_capacity_chunk_2;
+ /*
+ Half of the buffer capacity in case of chunk 2 filling,
+ used to decide will we write a record in one group or many.
+ It is written to the variable just to avoid devision every
+ time we need it.
+ */
+ uint32 half_buffer_capacity_chunk_2;
+ /* Page overhead calculated by flags (whether CRC is enabled, etc) */
+ uint16 page_overhead;
+ /*
+ Page capacity ("useful load") calculated by flags
+ (TRANSLOG_PAGE_SIZE - page_overhead-1)
+ */
+ uint16 page_capacity_chunk_2;
+ /* Path to the directory where we store log store files */
+ char directory[FN_REFLEN];
+
+ /* *** Current state of the log handler *** */
+ /* list of opened files */
+ DYNAMIC_ARRAY open_files;
+ /* min/max number of file in the array */
+ uint32 max_file, min_file;
+ /* the opened files list guard */
+ rw_lock_t open_files_lock;
+
+ /*
+ File descriptor of the directory where we store log files for syncing
+ it.
+ */
+ File directory_fd;
+ /* buffers for log writing */
+ struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
+ /*
+ horizon - visible end of the log (here is absolute end of the log:
+ position where next chunk can start
+ */
+ TRANSLOG_ADDRESS horizon;
+ /* horizon buffer cursor */
+ struct st_buffer_cursor bc;
+ /* maximum LSN of the current (not finished) file */
+ LSN max_lsn;
+
+ /*
+ Last flushed LSN (protected by log_flush_lock).
+ Pointers in the log ordered like this:
+ last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
+ max_lsn <= horizon
+ */
+ LSN flushed;
+ /* Last LSN sent to the disk (but maybe not written yet) */
+ LSN sent_to_disk;
+ TRANSLOG_ADDRESS previous_flush_horizon;
+ /* All what is after this address is not sent to disk yet */
+ TRANSLOG_ADDRESS in_buffers_only;
+ /* protection of sent_to_file and in_buffers_only */
+ pthread_mutex_t sent_to_disk_lock;
+ /*
+ Protect flushed (see above) and for flush serialization (will
+ be removed in v1.5
+ */
+ pthread_mutex_t log_flush_lock;
+
+ /* Protects changing of headers of finished files (max_lsn) */
+ pthread_mutex_t file_header_lock;
+
+ /*
+ Sorted array (with protection) of files where we started writing process
+ and so we can't give last LSN yet
+ */
+ pthread_mutex_t unfinished_files_lock;
+ DYNAMIC_ARRAY unfinished_files;
+
+ /*
+ minimum number of still need file calculeted during last
+ translog_purge call
+ */
+ uint32 min_need_file;
+ /* Purger data: minimum file in the log (or 0 if unknown) */
+ uint32 min_file_number;
+ /* Protect purger from many calls and it's data */
+ pthread_mutex_t purger_lock;
+ /* last low water mark checked */
+ LSN last_lsn_checked;
+};
+
+static struct st_translog_descriptor log_descriptor;
+
+ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
+ulong log_file_size= TRANSLOG_FILE_SIZE;
+ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;
+
+/* Marker for end of log */
+static uchar end_of_log= 0;
+#define END_OF_LOG &end_of_log
+
+enum enum_translog_status translog_status= TRANSLOG_UNINITED;
+
+/* chunk types */
+#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */
+#define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */
+#define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */
+#define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */
+#define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */
+#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */
+
+/* compressed (relative) LSN constants */
+#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */
+
+
+
+#include <my_atomic.h>
+/* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
+static MARIA_SHARE **id_to_share= NULL;
+/* lock for id_to_share */
+static my_atomic_rwlock_t LOCK_id_to_share;
+
+static my_bool translog_dummy_callback(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar* data_ptr);
+static my_bool translog_page_validator(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar* data_ptr);
+
+static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
+static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);
+
+
+/*
+ Initialize log_record_type_descriptors
+*/
+
+LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
+
+
+#ifndef DBUG_OFF
+
+#define translog_buffer_lock_assert_owner(B) \
+ safe_mutex_assert_owner(&(B)->mutex);
+void translog_lock_assert_owner()
+{
+ translog_buffer_lock_assert_owner(log_descriptor.bc.buffer);
+}
+
+/**
+ @brief check the description table validity
+
+ @param num how many records should be filled
+*/
+
+static void check_translog_description_table(int num)
+{
+ int i;
+ DBUG_ENTER("check_translog_description_table");
+ DBUG_PRINT("enter", ("last record: %d", num));
+ DBUG_ASSERT(num > 0);
+ /* last is reserved for extending the table */
+ DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
+ DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
+
+ for (i= 0; i <= num; i++)
+ {
+ DBUG_PRINT("info",
+ ("record type: %d class: %d fixed: %u header: %u LSNs: %u "
+ "name: %s",
+ i, log_record_type_descriptor[i].rclass,
+ (uint)log_record_type_descriptor[i].fixed_length,
+ (uint)log_record_type_descriptor[i].read_header_len,
+ (uint)log_record_type_descriptor[i].compressed_LSN,
+ log_record_type_descriptor[i].name));
+ switch (log_record_type_descriptor[i].rclass) {
+ case LOGRECTYPE_NOT_ALLOWED:
+ DBUG_ASSERT(i == 0);
+ break;
+ case LOGRECTYPE_VARIABLE_LENGTH:
+ DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
+ DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
+ ((log_record_type_descriptor[i].compressed_LSN == 1) &&
+ (log_record_type_descriptor[i].read_header_len >=
+ LSN_STORE_SIZE)) ||
+ ((log_record_type_descriptor[i].compressed_LSN == 2) &&
+ (log_record_type_descriptor[i].read_header_len >=
+ LSN_STORE_SIZE * 2)));
+ break;
+ case LOGRECTYPE_PSEUDOFIXEDLENGTH:
+ DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
+ log_record_type_descriptor[i].read_header_len);
+ DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
+ DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
+ break;
+ case LOGRECTYPE_FIXEDLENGTH:
+ DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
+ log_record_type_descriptor[i].read_header_len);
+ DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
+ for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
+ {
+ DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
+ LOGRECTYPE_NOT_ALLOWED);
+ }
+ DBUG_VOID_RETURN;
+}
+#else
+#define translog_buffer_lock_assert_owner(B)
+#define translog_lock_assert_owner()
+#endif
+
+static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
+{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
+ "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
+
+static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
+ write_hook_for_redo, NULL, 0,
+ "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
+ write_hook_for_redo, NULL, 0,
+ "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+/* Use this entry next time we need to add a new entry */
+static LOG_DESC INIT_LOGREC_REDO_NOT_USED=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0,
+ "redo_insert_row_blob", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
+ write_hook_for_redo, NULL, 0,
+ "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
+{LOGRECTYPE_FIXEDLENGTH,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
+{LOGRECTYPE_FIXEDLENGTH,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
+{LOGRECTYPE_FIXEDLENGTH,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+/* not yet used; for when we have versioning */
+static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
+{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
+ "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+/** @todo RECOVERY BUG unused, remove? */
+static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
+ "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_INDEX=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
+ "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
+{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
+{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
+ "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_CLR_END=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
+ "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_PURGE_END=
+{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
+ "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ NULL, write_hook_for_undo_row_insert, NULL, 1,
+ "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ NULL, write_hook_for_undo_row_delete, NULL, 1,
+ "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ NULL, write_hook_for_undo_row_update, NULL, 1,
+ "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
+ NULL, write_hook_for_undo_key_insert, NULL, 1,
+ "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+/* This will never be in the log, only in the clr */
+static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
+ NULL, write_hook_for_undo_key, NULL, 1,
+ "undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
+ NULL, write_hook_for_undo_key, NULL, 1,
+ "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
+ NULL, write_hook_for_undo_key, NULL, 1,
+ "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_PREPARE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
+ "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
+ "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_COMMIT=
+{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
+ NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
+ NULL};
+
+static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
+{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
+ "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_CHECKPOINT=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
+ "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
+"redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
+ "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
+ "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
+{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
+ NULL, write_hook_for_redo_delete_all, NULL, 0,
+ "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
+{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 4 + 8, FILEID_STORE_SIZE + 4 + 8,
+ NULL, NULL, NULL, 0,
+ "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_FILE_ID=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
+ "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
+{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
+ "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
+{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
+ NULL, NULL, NULL, 0,
+ "incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
+{LOGRECTYPE_FIXEDLENGTH, 0, 0,
+ NULL, NULL, NULL, 0,
+ "incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
+
+const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
+
+void translog_table_init()
+{
+ int i;
+ log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
+ INIT_LOGREC_RESERVED_FOR_CHUNKS23;
+ log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
+ INIT_LOGREC_REDO_INSERT_ROW_HEAD;
+ log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
+ INIT_LOGREC_REDO_INSERT_ROW_TAIL;
+ log_record_type_descriptor[LOGREC_REDO_NOT_USED]=
+ INIT_LOGREC_REDO_NOT_USED;
+ log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
+ INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
+ log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
+ INIT_LOGREC_REDO_PURGE_ROW_HEAD;
+ log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
+ INIT_LOGREC_REDO_PURGE_ROW_TAIL;
+ log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
+ INIT_LOGREC_REDO_FREE_BLOCKS;
+ log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
+ INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
+ log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
+ INIT_LOGREC_REDO_DELETE_ROW;
+ log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
+ INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
+ log_record_type_descriptor[LOGREC_REDO_INDEX]=
+ INIT_LOGREC_REDO_INDEX;
+ log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
+ INIT_LOGREC_REDO_INDEX_NEW_PAGE;
+ log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
+ INIT_LOGREC_REDO_INDEX_FREE_PAGE;
+ log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
+ INIT_LOGREC_REDO_UNDELETE_ROW;
+ log_record_type_descriptor[LOGREC_CLR_END]=
+ INIT_LOGREC_CLR_END;
+ log_record_type_descriptor[LOGREC_PURGE_END]=
+ INIT_LOGREC_PURGE_END;
+ log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
+ INIT_LOGREC_UNDO_ROW_INSERT;
+ log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
+ INIT_LOGREC_UNDO_ROW_DELETE;
+ log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
+ INIT_LOGREC_UNDO_ROW_UPDATE;
+ log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
+ INIT_LOGREC_UNDO_KEY_INSERT;
+ log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
+ INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
+ log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
+ INIT_LOGREC_UNDO_KEY_DELETE;
+ log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
+ INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
+ log_record_type_descriptor[LOGREC_PREPARE]=
+ INIT_LOGREC_PREPARE;
+ log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
+ INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
+ log_record_type_descriptor[LOGREC_COMMIT]=
+ INIT_LOGREC_COMMIT;
+ log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
+ INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
+ log_record_type_descriptor[LOGREC_CHECKPOINT]=
+ INIT_LOGREC_CHECKPOINT;
+ log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
+ INIT_LOGREC_REDO_CREATE_TABLE;
+ log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
+ INIT_LOGREC_REDO_RENAME_TABLE;
+ log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
+ INIT_LOGREC_REDO_DROP_TABLE;
+ log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
+ INIT_LOGREC_REDO_DELETE_ALL;
+ log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
+ INIT_LOGREC_REDO_REPAIR_TABLE;
+ log_record_type_descriptor[LOGREC_FILE_ID]=
+ INIT_LOGREC_FILE_ID;
+ log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
+ INIT_LOGREC_LONG_TRANSACTION_ID;
+ log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
+ INIT_LOGREC_INCOMPLETE_LOG;
+ log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
+ INIT_LOGREC_INCOMPLETE_GROUP;
+ for (i= LOGREC_INCOMPLETE_GROUP + 1;
+ i < LOGREC_NUMBER_OF_TYPES;
+ i++)
+ log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
+#ifndef DBUG_OFF
+ check_translog_description_table(LOGREC_INCOMPLETE_GROUP);
+#endif
+};
+
+
+/* all possible flags page overheads */
+static uint page_overhead[TRANSLOG_FLAGS_NUM];
+
+typedef struct st_translog_validator_data
+{
+ TRANSLOG_ADDRESS *addr;
+ my_bool was_recovered;
+} TRANSLOG_VALIDATOR_DATA;
+
+
+const char *maria_data_root;
+
+
+/*
+ Check cursor/buffer consistence
+
+ SYNOPSIS
+ translog_check_cursor
+ cursor cursor which will be checked
+*/
+
+static void translog_check_cursor(struct st_buffer_cursor *cursor
+ __attribute__((unused)))
+{
+ DBUG_ASSERT(cursor->chaser ||
+ ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
+ cursor->buffer->size));
+ DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
+ DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
+ cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
+}
+
+
+/**
+ @brief switch the loghandler in read only mode in case of write error
+*/
+
+void translog_stop_writing()
+{
+ translog_status= (translog_status == TRANSLOG_SHUTDOWN ?
+ TRANSLOG_UNINITED :
+ TRANSLOG_READONLY);
+ log_descriptor.open_flags= O_BINARY | O_RDONLY;
+}
+
+
+/*
+ @brief Get file name of the log by log number
+
+ @param file_no Number of the log we want to open
+ @param path Pointer to buffer where file name will be
+ stored (must be FN_REFLEN bytes at least)
+
+ @return pointer to path
+*/
+
+char *translog_filename_by_fileno(uint32 file_no, char *path)
+{
+ char buff[11], *end;
+ uint length;
+ DBUG_ENTER("translog_filename_by_fileno");
+ DBUG_ASSERT(file_no <= 0xfffffff);
+
+ /* log_descriptor.directory is already formated */
+ end= strxmov(path, log_descriptor.directory, "maria_log.0000000", NullS);
+ length= (uint) (int10_to_str(file_no, buff, 10) - buff);
+ strmov(end - length +1, buff);
+
+ DBUG_PRINT("info", ("Path: '%s' path: 0x%lx", path, (ulong) path));
+ DBUG_RETURN(path);
+}
+
+
+/**
+ @brief Create log file with given number without cache
+
+ @param file_no Number of the log we want to open
+
+ retval -1 error
+ retval # file descriptor number
+*/
+
+static File create_logfile_by_number_no_cache(uint32 file_no)
+{
+ File file;
+ char path[FN_REFLEN];
+ DBUG_ENTER("create_logfile_by_number_no_cache");
+
+ if (translog_status != TRANSLOG_OK)
+ DBUG_RETURN(-1);
+
+ /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
+ if ((file= my_create(translog_filename_by_fileno(file_no, path),
+ 0, O_BINARY | O_RDWR, MYF(MY_WME))) < 0)
+ {
+ DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path));
+ translog_stop_writing();
+ DBUG_RETURN(-1);
+ }
+ if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
+ my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
+ {
+ DBUG_PRINT("error", ("Error %d during syncing directory '%s'",
+ errno, log_descriptor.directory));
+ translog_stop_writing();
+ DBUG_RETURN(-1);
+ }
+ DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
+ DBUG_RETURN(file);
+}
+
+/**
+ @brief Open (not create) log file with given number without cache
+
+ @param file_no Number of the log we want to open
+
+ retval -1 error
+ retval # file descriptor number
+*/
+
+static File open_logfile_by_number_no_cache(uint32 file_no)
+{
+ File file;
+ char path[FN_REFLEN];
+ DBUG_ENTER("open_logfile_by_number_no_cache");
+
+ /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
+ /* TODO: use my_create() */
+ if ((file= my_open(translog_filename_by_fileno(file_no, path),
+ log_descriptor.open_flags,
+ MYF(MY_WME))) < 0)
+ {
+ DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path));
+ DBUG_RETURN(-1);
+ }
+ DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
+ DBUG_RETURN(file);
+}
+
+
+/**
+ @brief get file descriptor by given number using cache
+
+ @param file_no Number of the log we want to open
+
+ retval # file descriptor
+*/
+
+static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
+{
+ TRANSLOG_FILE *file;
+ DBUG_ENTER("get_logfile_by_number");
+ rw_rdlock(&log_descriptor.open_files_lock);
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+ DBUG_ASSERT(log_descriptor.max_file >= file_no);
+ DBUG_ASSERT(log_descriptor.min_file <= file_no);
+ DBUG_ASSERT(log_descriptor.max_file - file_no <
+ log_descriptor.open_files.elements);
+ file= *dynamic_element(&log_descriptor.open_files,
+ log_descriptor.max_file - file_no, TRANSLOG_FILE **);
+ rw_unlock(&log_descriptor.open_files_lock);
+ DBUG_PRINT("info", ("File 0x%lx File no: %lu, File handler: %d",
+ (ulong)file, (ulong)file_no,
+ (file ? file->handler.file : -1)));
+ DBUG_ASSERT(!file || file->number == file_no);
+ DBUG_RETURN(file);
+}
+
+
+/**
+ @brief get current file descriptor
+
+ retval # file descriptor
+*/
+
+static TRANSLOG_FILE *get_current_logfile()
+{
+ TRANSLOG_FILE *file;
+ rw_rdlock(&log_descriptor.open_files_lock);
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+ file= *dynamic_element(&log_descriptor.open_files, 0, TRANSLOG_FILE **);
+ rw_unlock(&log_descriptor.open_files_lock);
+ return (file);
+}
+
+uchar NEAR maria_trans_file_magic[]=
+{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
+ 'L', 'O', 'G' };
+#define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
+ 8 + 4 + 4 + 4 + 2 + 3 + \
+ LSN_STORE_SIZE)
+
+
+/*
+ Write log file page header in the just opened new log file
+
+ SYNOPSIS
+ translog_write_file_header();
+
+ NOTES
+ First page is just a marker page; We don't store any real log data in it.
+
+ RETURN
+ 0 OK
+ 1 ERROR
+*/
+
+static my_bool translog_write_file_header()
+{
+ TRANSLOG_FILE *file;
+ ulonglong timestamp;
+ uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
+ my_bool rc;
+ DBUG_ENTER("translog_write_file_header");
+
+ /* file tag */
+ memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
+ page+= sizeof(maria_trans_file_magic);
+ /* timestamp */
+ timestamp= my_getsystime();
+ int8store(page, timestamp);
+ page+= 8;
+ /* maria version */
+ int4store(page, TRANSLOG_VERSION_ID);
+ page+= 4;
+ /* mysql version (MYSQL_VERSION_ID) */
+ int4store(page, log_descriptor.server_version);
+ page+= 4;
+ /* server ID */
+ int4store(page, log_descriptor.server_id);
+ page+= 4;
+ /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */
+ int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE);
+ page+= 2;
+ /* file number */
+ int3store(page, LSN_FILE_NO(log_descriptor.horizon));
+ page+= 3;
+ lsn_store(page, LSN_IMPOSSIBLE);
+ page+= LSN_STORE_SIZE;
+ memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
+
+ file= get_current_logfile();
+ rc= my_pwrite(file->handler.file, page_buff, sizeof(page_buff), 0,
+ log_write_flags) != 0;
+ /*
+ Dropping the flag in such way can make false alarm: signalling than the
+ file in not sync when it is sync, but the situation is quite rare and
+ protections with mutexes give much more overhead to the whole engine
+ */
+ file->is_sync= 0;
+ DBUG_RETURN(rc);
+}
+
+/*
+ @brief write the new LSN on the given file header
+
+ @param file The file descriptor
+ @param lsn That LSN which should be written
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_max_lsn_to_header(File file, LSN lsn)
+{
+ uchar lsn_buff[LSN_STORE_SIZE];
+ DBUG_ENTER("translog_max_lsn_to_header");
+ DBUG_PRINT("enter", ("File descriptor: %ld "
+ "lsn: (%lu,0x%lx)",
+ (long) file,
+ LSN_IN_PARTS(lsn)));
+
+ lsn_store(lsn_buff, lsn);
+
+ DBUG_RETURN(my_pwrite(file, lsn_buff,
+ LSN_STORE_SIZE,
+ (LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
+ log_write_flags) != 0 ||
+ my_sync(file, MYF(MY_WME)) != 0);
+}
+
+
+/*
+ Information from transaction log file header
+*/
+
+typedef struct st_loghandler_file_info
+{
+ /*
+ LSN_IMPOSSIBLE for current file (not finished file).
+ Maximum LSN of the record which parts stored in the
+ file.
+ */
+ LSN max_lsn;
+ ulonglong timestamp; /* Time stamp */
+ ulong maria_version; /* Version of maria loghandler */
+ ulong mysql_version; /* Version of mysql server */
+ ulong server_id; /* Server ID */
+ uint page_size; /* Loghandler page size */
+ uint file_number; /* Number of the file (from the file header) */
+} LOGHANDLER_FILE_INFO;
+
+/*
+ @brief Read hander file information from loghandler file
+
+ @param desc header information descriptor to be filled with information
+ @param file file descriptor to read
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
+{
+ uchar page_buff[LOG_HEADER_DATA_SIZE], *ptr;
+ DBUG_ENTER("translog_read_file_header");
+
+ if (my_pread(file, page_buff,
+ sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
+ {
+ DBUG_PRINT("info", ("log read fail error: %d", my_errno));
+ DBUG_RETURN(1);
+ }
+ ptr= page_buff + sizeof(maria_trans_file_magic);
+ desc->timestamp= uint8korr(ptr);
+ ptr+= 8;
+ desc->maria_version= uint4korr(ptr);
+ ptr+= 4;
+ desc->mysql_version= uint4korr(ptr);
+ ptr+= 4;
+ desc->server_id= uint4korr(ptr);
+ ptr+= 4;
+ desc->page_size= uint2korr(ptr);
+ ptr+= 2;
+ desc->file_number= uint3korr(ptr);
+ ptr+=3;
+ desc->max_lsn= lsn_korr(ptr);
+ DBUG_PRINT("info", ("timestamp: %llu maria ver: %lu mysql ver: %lu "
+ "server id %lu page size %u file number %lu "
+ "max lsn: (%lu,0x%lx)",
+ (ulonglong) desc->timestamp,
+ (ulong) desc->maria_version,
+ (ulong) desc->mysql_version,
+ (ulong) desc->server_id,
+ desc->page_size, (ulong) desc->file_number,
+ LSN_IN_PARTS(desc->max_lsn)));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ @brief set the lsn to the files from_file - to_file if it is greater
+ then written in the file
+
+ @param from_file first file number (min)
+ @param to_file last file number (max)
+ @param lsn the lsn for writing
+ @param is_locked true if current thread locked the log handler
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
+ LSN lsn, my_bool is_locked)
+{
+ uint32 file;
+ DBUG_ENTER("translog_set_lsn_for_files");
+ DBUG_PRINT("enter", ("From: %lu to: %lu lsn: (%lu,0x%lx) locked: %d",
+ (ulong) from_file, (ulong) to_file,
+ LSN_IN_PARTS(lsn),
+ is_locked));
+ DBUG_ASSERT(from_file <= to_file);
+ DBUG_ASSERT(from_file > 0); /* we have not file 0 */
+
+ /* Checks the current file (not finished yet file) */
+ if (!is_locked)
+ translog_lock();
+ if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
+ {
+ if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
+ log_descriptor.max_lsn= lsn;
+ to_file--;
+ }
+ if (!is_locked)
+ translog_unlock();
+
+ /* Checks finished files if they are */
+ pthread_mutex_lock(&log_descriptor.file_header_lock);
+ for (file= from_file; file <= to_file; file++)
+ {
+ LOGHANDLER_FILE_INFO info;
+ File fd= open_logfile_by_number_no_cache(file);
+ if (fd < 0 ||
+ translog_read_file_header(&info, fd) ||
+ (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
+ translog_max_lsn_to_header(fd, lsn)))
+ {
+ translog_stop_writing();
+ DBUG_RETURN(1);
+ }
+ }
+ pthread_mutex_unlock(&log_descriptor.file_header_lock);
+
+ DBUG_RETURN(0);
+}
+
+
+/* descriptor of file in unfinished_files */
+struct st_file_counter
+{
+ uint32 file; /* file number */
+ uint32 counter; /* counter for started writes */
+};
+
+
+/*
+ @brief mark file "in progress" (for multi-group records)
+
+ @param file log file number
+*/
+
+static void translog_mark_file_unfinished(uint32 file)
+{
+ int place, i;
+ struct st_file_counter fc, *fc_ptr;
+ fc.file= file; fc.counter= 1;
+
+ DBUG_ENTER("translog_mark_file_unfinished");
+ DBUG_PRINT("enter", ("file: %lu", (ulong) file));
+
+ pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
+
+ if (log_descriptor.unfinished_files.elements == 0)
+ {
+ insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
+ DBUG_PRINT("info", ("The first element inserted"));
+ goto end;
+ }
+
+ for (place= log_descriptor.unfinished_files.elements - 1;
+ place >= 0;
+ place--)
+ {
+ fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
+ place, struct st_file_counter *);
+ if (fc_ptr->file <= file)
+ break;
+ }
+
+ if (place >= 0 && fc_ptr->file == file)
+ {
+ fc_ptr->counter++;
+ DBUG_PRINT("info", ("counter increased"));
+ goto end;
+ }
+
+ if (place == (int)log_descriptor.unfinished_files.elements)
+ {
+ insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
+ DBUG_PRINT("info", ("The last element inserted"));
+ goto end;
+ }
+ /* shift and assign new element */
+ insert_dynamic(&log_descriptor.unfinished_files,
+ (uchar*)
+ dynamic_element(&log_descriptor.unfinished_files,
+ log_descriptor.unfinished_files.elements- 1,
+ struct st_file_counter *));
+ for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
+ {
+ /* we do not use set_dynamic() to avoid unneeded checks */
+ memcpy(dynamic_element(&log_descriptor.unfinished_files,
+ i, struct st_file_counter *),
+ dynamic_element(&log_descriptor.unfinished_files,
+ i + 1, struct st_file_counter *),
+ sizeof(struct st_file_counter));
+ }
+ memcpy(dynamic_element(&log_descriptor.unfinished_files,
+ place + 1, struct st_file_counter *),
+ &fc, sizeof(struct st_file_counter));
+end:
+ pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+
+/*
+ @brief remove file mark "in progress" (for multi-group records)
+
+ @param file log file number
+*/
+
+static void translog_mark_file_finished(uint32 file)
+{
+ int i;
+ struct st_file_counter *fc_ptr;
+ DBUG_ENTER("translog_mark_file_finished");
+ DBUG_PRINT("enter", ("file: %lu", (ulong) file));
+
+ LINT_INIT(fc_ptr);
+
+ pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
+
+ DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
+ for (i= 0;
+ i < (int) log_descriptor.unfinished_files.elements;
+ i++)
+ {
+ fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
+ i, struct st_file_counter *);
+ if (fc_ptr->file == file)
+ {
+ break;
+ }
+ }
+ DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);
+
+ if (! --fc_ptr->counter)
+ delete_dynamic_element(&log_descriptor.unfinished_files, i);
+ pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ @brief get max LSN of the record which parts stored in this file
+
+ @param file file number
+
+ @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
+ @retval LSN_IMPOSSIBLE File is still not finished
+ @retval LSN_ERROR Error opening file
+ @retval # LSN of the record which parts stored in this file
+*/
+
+LSN translog_get_file_max_lsn_stored(uint32 file)
+{
+ uint32 limit= FILENO_IMPOSSIBLE;
+ DBUG_ENTER("translog_get_file_max_lsn_stored");
+ DBUG_PRINT("enter", ("file: %lu", (ulong)file));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
+
+ /* find file with minimum file number "in progress" */
+ if (log_descriptor.unfinished_files.elements > 0)
+ {
+ struct st_file_counter *fc_ptr;
+ fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
+ 0, struct st_file_counter *);
+ limit= fc_ptr->file; /* minimal file number "in progress" */
+ }
+ pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
+
+ /*
+ if there is no "in progress file" then unfinished file is in progress
+ for sure
+ */
+ if (limit == FILENO_IMPOSSIBLE)
+ {
+ TRANSLOG_ADDRESS horizon= translog_get_horizon();
+ limit= LSN_FILE_NO(horizon);
+ }
+
+ if (file >= limit)
+ {
+ DBUG_PRINT("info", ("The file in in progress"));
+ DBUG_RETURN(LSN_IMPOSSIBLE);
+ }
+
+ {
+ LOGHANDLER_FILE_INFO info;
+ File fd= open_logfile_by_number_no_cache(file);
+ if (fd < 0 ||
+ translog_read_file_header(&info, fd))
+ {
+ DBUG_PRINT("error", ("Can't read file header"));
+ DBUG_RETURN(LSN_ERROR);
+ }
+ DBUG_PRINT("info", ("Max lsn: (%lu,0x%lx)",
+ LSN_IN_PARTS(info.max_lsn)));
+ DBUG_RETURN(info.max_lsn);
+ }
+}
+
+/*
+ Initialize transaction log file buffer
+
+ SYNOPSIS
+ translog_buffer_init()
+ buffer The buffer to initialize
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_buffer_init(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_init");
+ buffer->last_lsn= LSN_IMPOSSIBLE;
+ /* This Buffer File */
+ buffer->file= NULL;
+ buffer->overlay= 0;
+ /* cache for current log */
+ memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
+ /* Buffer size */
+ buffer->size= 0;
+ /* cond of thread which is waiting for buffer filling */
+ if (pthread_cond_init(&buffer->waiting_filling_buffer, 0))
+ DBUG_RETURN(1);
+ /* Number of records which are in copy progress */
+ buffer->copy_to_buffer_in_progress= 0;
+ /* list of waiting buffer ready threads */
+ buffer->waiting_flush= 0;
+ /* lock for the buffer. Current buffer also lock the handler */
+ if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ @brief close transaction log file by descriptor
+
+ @param file pagegecache file descriptor reference
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_close_log_file(TRANSLOG_FILE *file)
+{
+ int rc= 0;
+ flush_pagecache_blocks(log_descriptor.pagecache, &file->handler,
+ FLUSH_RELEASE);
+ /*
+ Sync file when we close it
+ TODO: sync only we have changed the log
+ */
+ if (!file->is_sync)
+ rc= my_sync(file->handler.file, MYF(MY_WME));
+ rc|= my_close(file->handler.file, MYF(MY_WME));
+ my_free(file, MYF(0));
+ return test(rc);
+}
+
+
+/**
+ @brief Dummy function for write failure (the log to not use
+ pagecache writing)
+*/
+
+void translog_dummy_write_failure(uchar *data __attribute__((unused)))
+{
+ return;
+}
+
+
+/**
+ @brief Initializes TRANSLOG_FILE structure
+
+ @param file reference on the file to initialize
+ @param number file number
+ @param is_sync is file synced on disk
+*/
+
+static void translog_file_init(TRANSLOG_FILE *file, uint32 number,
+ my_bool is_sync)
+{
+ pagecache_file_init(file->handler, &translog_page_validator,
+ &translog_dummy_callback,
+ &translog_dummy_write_failure, file);
+ file->number= number;
+ file->was_recovered= 0;
+ file->is_sync= is_sync;
+}
+
+
+/**
+ @brief Create and fill header of new file.
+
+ @note the caller must call it right after it has increased
+ log_descriptor.horizon to the new file
+ (log_descriptor.horizon+= LSN_ONE_FILE)
+
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_create_new_file()
+{
+ TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
+ MYF(0));
+
+ TRANSLOG_FILE *old= get_current_logfile();
+ uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
+ DBUG_ENTER("translog_create_new_file");
+
+ if (file == NULL)
+ goto error;
+
+ /*
+ Writes max_lsn to the file header before finishing it (there is no need
+ to lock file header buffer because it is still unfinished file, so only
+ one thread can finish the file and nobody interested of LSN of current
+ (unfinished) file, because no one can purge it).
+ */
+ if (translog_max_lsn_to_header(old->handler.file, log_descriptor.max_lsn))
+ goto error;
+
+ rw_wrlock(&log_descriptor.open_files_lock);
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+ DBUG_ASSERT(file_no == log_descriptor.max_file + 1);
+ if (allocate_dynamic(&log_descriptor.open_files,
+ log_descriptor.max_file - log_descriptor.min_file + 2))
+ goto error_lock;
+ if ((file->handler.file=
+ create_logfile_by_number_no_cache(file_no)) == -1)
+ goto error_lock;
+ translog_file_init(file, file_no, 0);
+
+ /* this call just expand the array */
+ insert_dynamic(&log_descriptor.open_files, (uchar*)&file);
+ log_descriptor.max_file++;
+ {
+ char *start= (char*) dynamic_element(&log_descriptor.open_files, 0,
+ TRANSLOG_FILE**);
+ memmove(start + sizeof(TRANSLOG_FILE*), start,
+ sizeof(TRANSLOG_FILE*) *
+ (log_descriptor.max_file - log_descriptor.min_file + 1 - 1));
+ }
+ /* can't fail we because we expanded array */
+ set_dynamic(&log_descriptor.open_files, (uchar*)&file, 0);
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+ rw_unlock(&log_descriptor.open_files_lock);
+
+ DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no));
+
+ if (translog_write_file_header())
+ DBUG_RETURN(1);
+
+ if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, file_no,
+ CONTROL_FILE_UPDATE_ONLY_LOGNO))
+ {
+ translog_stop_writing();
+ DBUG_RETURN(1);
+ }
+
+ DBUG_RETURN(0);
+
+error_lock:
+ rw_unlock(&log_descriptor.open_files_lock);
+error:
+ translog_stop_writing();
+ DBUG_RETURN(1);
+}
+
+
+/**
+ @brief Locks the loghandler buffer.
+
+ @param buffer This buffer which should be locked
+
+ @note See comment before buffer 'mutex' variable.
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_buffer_lock(struct st_translog_buffer *buffer)
+{
+ my_bool res;
+ DBUG_ENTER("translog_buffer_lock");
+ DBUG_PRINT("enter",
+ ("Lock buffer #%u: (0x%lx)", (uint) buffer->buffer_no,
+ (ulong) buffer));
+ res= (pthread_mutex_lock(&buffer->mutex) != 0);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Unlock the loghandler buffer
+
+ SYNOPSIS
+ translog_buffer_unlock()
+ buffer This buffer which should be unlocked
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer)
+{
+ my_bool res;
+ DBUG_ENTER("translog_buffer_unlock");
+ DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx)",
+ (uint) buffer->buffer_no, (ulong) buffer));
+
+ res= (pthread_mutex_unlock(&buffer->mutex) != 0);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Write a header on the page
+
+ SYNOPSIS
+ translog_new_page_header()
+ horizon Where to write the page
+ cursor Where to write the page
+
+ NOTE
+ - space for page header should be checked before
+*/
+
+static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ uchar *ptr;
+
+ DBUG_ENTER("translog_new_page_header");
+ DBUG_ASSERT(cursor->ptr);
+
+ cursor->protected= 0;
+
+ ptr= cursor->ptr;
+ /* Page number */
+ int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
+ ptr+= 3;
+ /* File number */
+ int3store(ptr, LSN_FILE_NO(*horizon));
+ ptr+= 3;
+ DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
+ cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
+ ptr++;
+ if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
+ {
+#ifndef DBUG_OFF
+ DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)",
+ LSN_IN_PARTS(*horizon)));
+ /* This will be overwritten by real CRC; This is just for debugging */
+ int4store(ptr, 0x11223344);
+#endif
+ /* CRC will be put when page is finished */
+ ptr+= CRC_SIZE;
+ }
+ if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ /*
+ The time() works like "random" values producer because it is enough to
+ have such "random" for this purpose and it will not interfere with
+ higher level pseudo random value generator
+ */
+ uint16 tmp_time= time(NULL);
+ ptr[0]= tmp_time & 0xFF;
+ ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
+ }
+ {
+ uint len= (ptr - cursor->ptr);
+ (*horizon)+= len; /* increasing the offset part of the address */
+ cursor->current_page_fill= len;
+ if (!cursor->chaser)
+ cursor->buffer->size+= len;
+ }
+ cursor->ptr= ptr;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ translog_check_cursor(cursor);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Put sector protection on the page image
+
+ SYNOPSIS
+ translog_put_sector_protection()
+ page reference on the page content
+ cursor cursor of the buffer
+
+ NOTES
+ We put a sector protection on all following sectors on the page,
+ except the first sector that is protected by page header.
+*/
+
+static void translog_put_sector_protection(uchar *page,
+ struct st_buffer_cursor *cursor)
+{
+ uchar *table= page + log_descriptor.page_overhead -
+ TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
+ uint i, offset;
+ uint16 last_protected_sector= ((cursor->previous_offset - 1) /
+ DISK_DRIVE_SECTOR_SIZE);
+ uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
+ uint8 value= table[0] + cursor->write_counter;
+ DBUG_ENTER("translog_put_sector_protection");
+
+ if (start_sector == 0)
+ {
+ /* First sector is protected by file & page numbers in the page header. */
+ start_sector= 1;
+ }
+
+ DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, "
+ "last protected:%u start sector:%u",
+ (uint) cursor->write_counter,
+ (uint) value,
+ (uint) cursor->previous_offset,
+ (uint) last_protected_sector, (uint) start_sector));
+ if (last_protected_sector == start_sector)
+ {
+ i= last_protected_sector;
+ offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
+ /* restore data, because we modified sector which was protected */
+ if (offset < cursor->previous_offset)
+ page[offset]= table[i];
+ }
+ for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
+ i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
+ i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
+ {
+ DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
+ i, offset, (uint) page[offset]));
+ table[i]= page[offset];
+ page[offset]= value;
+ DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
+ i, offset, (uint) page[offset]));
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Calculate CRC32 of given area
+
+ SYNOPSIS
+ translog_crc()
+ area Pointer of the area beginning
+ length The Area length
+
+ RETURN
+ CRC32
+*/
+
+static uint32 translog_crc(uchar *area, uint length)
+{
+ DBUG_ENTER("translog_crc");
+ DBUG_RETURN(crc32(0L, (unsigned char*) area, length));
+}
+
+
+/*
+ Finish current page with zeros
+
+ SYNOPSIS
+ translog_finish_page()
+ horizon \ horizon & buffer pointers
+ cursor /
+*/
+
+static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
+ uchar *page= cursor->ptr - cursor->current_page_fill;
+ DBUG_ENTER("translog_finish_page");
+ DBUG_PRINT("enter", ("Buffer: #%u 0x%lx "
+ "Buffer addr: (%lu,0x%lx) "
+ "Page addr: (%lu,0x%lx) "
+ "size:%lu (%lu) Pg:%u left:%u",
+ (uint) cursor->buffer_no, (ulong) cursor->buffer,
+ LSN_IN_PARTS(cursor->buffer->offset),
+ (ulong) LSN_FILE_NO(*horizon),
+ (ulong) (LSN_OFFSET(*horizon) -
+ cursor->current_page_fill),
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer),
+ (uint) cursor->current_page_fill, (uint) left));
+ DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset));
+ translog_check_cursor(cursor);
+ if (cursor->protected)
+ {
+ DBUG_PRINT("info", ("Already protected and finished"));
+ DBUG_VOID_RETURN;
+ }
+ cursor->protected= 1;
+
+ DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
+ if (left != 0)
+ {
+ DBUG_PRINT("info", ("left: %u", (uint) left));
+ memset(cursor->ptr, TRANSLOG_FILLER, left);
+ cursor->ptr+= left;
+ (*horizon)+= left; /* offset increasing */
+ if (!cursor->chaser)
+ cursor->buffer->size+= left;
+ /* We are finishing the page so reset the counter */
+ cursor->current_page_fill= 0;
+ DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx "
+ "chaser: %d Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no,
+ (ulong) cursor->buffer, cursor->chaser,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ translog_check_cursor(cursor);
+ }
+ /*
+ When we are finishing the page other thread might not finish the page
+ header yet (in case if we started from the middle of the page) so we
+ have to read log_descriptor.flags but not the flags from the page.
+ */
+ if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ translog_put_sector_protection(page, cursor);
+ DBUG_PRINT("info", ("drop write_counter"));
+ cursor->write_counter= 0;
+ cursor->previous_offset= 0;
+ }
+ if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_crc(page + log_descriptor.page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ log_descriptor.page_overhead);
+ DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
+ /* We have page number, file number and flag before crc */
+ int4store(page + 3 + 3 + 1, crc);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ @brief Wait until all threads have finished filling this buffer.
+
+ @param buffer This buffer should be check
+*/
+
+static void translog_wait_for_writers(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_wait_for_writers");
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (int) buffer->copy_to_buffer_in_progress));
+ translog_buffer_lock_assert_owner(buffer);
+
+ while (buffer->copy_to_buffer_in_progress)
+ {
+ DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_ASSERT(buffer->file != NULL);
+ pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
+ DBUG_PRINT("info", ("wait for writers done buffer: #%u 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+
+ Wait for buffer to become free
+
+ SYNOPSIS
+ translog_wait_for_buffer_free()
+ buffer The buffer we are waiting for
+
+ NOTE
+ - this buffer should be locked
+*/
+
+static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_wait_for_buffer_free");
+ DBUG_PRINT("enter", ("Buffer: #%u 0x%lx copies in progress: %u "
+ "File: %d size: %lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (int) buffer->copy_to_buffer_in_progress,
+ (buffer->file ? buffer->file->handler.file : -1),
+ (ulong) buffer->size));
+
+ translog_wait_for_writers(buffer);
+
+ while (buffer->file != NULL)
+ {
+ DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
+ DBUG_PRINT("info", ("wait for writers done. buffer: #%u 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ }
+ DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Initialize the cursor for a buffer
+
+ SYNOPSIS
+ translog_cursor_init()
+ buffer The buffer
+ cursor It's cursor
+ buffer_no Number of buffer
+*/
+
+static void translog_cursor_init(struct st_buffer_cursor *cursor,
+ struct st_translog_buffer *buffer,
+ uint8 buffer_no)
+{
+ DBUG_ENTER("translog_cursor_init");
+ cursor->ptr= buffer->buffer;
+ cursor->buffer= buffer;
+ cursor->buffer_no= buffer_no;
+ cursor->current_page_fill= 0;
+ cursor->chaser= (cursor != &log_descriptor.bc);
+ cursor->write_counter= 0;
+ cursor->previous_offset= 0;
+ cursor->protected= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ @brief Initialize buffer for the current file, and a cursor for this buffer.
+
+ @param buffer The buffer
+ @param cursor It's cursor
+ @param buffer_no Number of buffer
+*/
+
+static void translog_start_buffer(struct st_translog_buffer *buffer,
+ struct st_buffer_cursor *cursor,
+ uint buffer_no)
+{
+ DBUG_ENTER("translog_start_buffer");
+ DBUG_PRINT("enter",
+ ("Assign buffer: #%u (0x%lx) offset: 0x%lx(%lu)",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon)));
+ DBUG_ASSERT(buffer_no == buffer->buffer_no);
+ buffer->last_lsn= LSN_IMPOSSIBLE;
+ buffer->offset= log_descriptor.horizon;
+ buffer->next_buffer_offset= LSN_IMPOSSIBLE;
+ buffer->file= get_current_logfile();
+ buffer->overlay= 0;
+ buffer->size= 0;
+ translog_cursor_init(cursor, buffer, buffer_no);
+ DBUG_PRINT("info", ("file: #%ld (%d) init cursor #%u: 0x%lx "
+ "chaser: %d Size: %lu (%lu)",
+ (long) (buffer->file ? buffer->file->number : 0),
+ (buffer->file ? buffer->file->handler.file : -1),
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ translog_check_cursor(cursor);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ @brief Switch to the next buffer in a chain.
+
+ @param horizon \ Pointers on current position in file and buffer
+ @param cursor /
+ @param new_file Also start new file
+
+ @note
+ - loghandler should be locked
+ - after return new and old buffer still are locked
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ my_bool new_file)
+{
+ uint old_buffer_no= cursor->buffer_no;
+ uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
+ my_bool chasing= cursor->chaser;
+ DBUG_ENTER("translog_buffer_next");
+
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx) chasing: %d",
+ LSN_IN_PARTS(log_descriptor.horizon), chasing));
+
+ DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
+
+ translog_finish_page(horizon, cursor);
+
+ if (!chasing)
+ {
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+ }
+ else
+ DBUG_ASSERT(new_buffer->file != NULL);
+
+ if (new_file)
+ {
+
+ /* move the horizon to the next file and its header page */
+ (*horizon)+= LSN_ONE_FILE;
+ (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
+ if (!chasing && translog_create_new_file())
+ {
+ DBUG_RETURN(1);
+ }
+ }
+
+ /* prepare next page */
+ if (chasing)
+ translog_cursor_init(cursor, new_buffer, new_buffer_no);
+ else
+ {
+ translog_lock_assert_owner();
+ translog_start_buffer(new_buffer, cursor, new_buffer_no);
+ }
+ log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
+ translog_new_page_header(horizon, cursor);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Sets max LSN sent to file, and address from which data is only in the buffer
+
+ SYNOPSIS
+ translog_set_sent_to_disk()
+ lsn LSN to assign
+ in_buffers to assign to in_buffers_only
+
+ TODO: use atomic operations if possible (64bit architectures?)
+*/
+
+static void translog_set_sent_to_disk(LSN lsn, TRANSLOG_ADDRESS in_buffers)
+{
+ DBUG_ENTER("translog_set_sent_to_disk");
+ pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
+ DBUG_PRINT("enter", ("lsn: (%lu,0x%lx) in_buffers: (%lu,0x%lx) "
+ "in_buffers_only: (%lu,0x%lx)",
+ LSN_IN_PARTS(lsn),
+ LSN_IN_PARTS(in_buffers),
+ LSN_IN_PARTS(log_descriptor.in_buffers_only)));
+ DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0);
+ log_descriptor.sent_to_disk= lsn;
+ /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
+ if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
+ {
+ log_descriptor.in_buffers_only= in_buffers;
+ DBUG_PRINT("info", ("set new in_buffers_only"));
+ }
+ pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Sets address from which data is only in the buffer
+
+ SYNOPSIS
+ translog_set_only_in_buffers()
+ lsn LSN to assign
+ in_buffers to assign to in_buffers_only
+*/
+
+static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
+{
+ DBUG_ENTER("translog_set_only_in_buffers");
+ pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
+ DBUG_PRINT("enter", ("in_buffers: (%lu,0x%lx) "
+ "in_buffers_only: (%lu,0x%lx)",
+ LSN_IN_PARTS(in_buffers),
+ LSN_IN_PARTS(log_descriptor.in_buffers_only)));
+ /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
+ if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
+ {
+ if (translog_status != TRANSLOG_OK)
+ DBUG_VOID_RETURN;
+ log_descriptor.in_buffers_only= in_buffers;
+ DBUG_PRINT("info", ("set new in_buffers_only"));
+ }
+ pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Gets address from which data is only in the buffer
+
+ SYNOPSIS
+ translog_only_in_buffers()
+
+ RETURN
+ address from which data is only in the buffer
+*/
+
+static TRANSLOG_ADDRESS translog_only_in_buffers()
+{
+ register TRANSLOG_ADDRESS addr;
+ DBUG_ENTER("translog_only_in_buffers");
+ pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
+ addr= log_descriptor.in_buffers_only;
+ pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
+ DBUG_RETURN(addr);
+}
+
+
+/*
+ Get max LSN sent to file
+
+ SYNOPSIS
+ translog_get_sent_to_disk()
+
+ RETURN
+ max LSN send to file
+*/
+
+static LSN translog_get_sent_to_disk()
+{
+ register LSN lsn;
+ DBUG_ENTER("translog_get_sent_to_disk");
+ pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
+ lsn= log_descriptor.sent_to_disk;
+ pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
+ DBUG_RETURN(lsn);
+}
+
+
+/*
+ Get first chunk address on the given page
+
+ SYNOPSIS
+ translog_get_first_chunk_offset()
+ page The page where to find first chunk
+
+ RETURN
+ first chunk offset
+*/
+
+static my_bool translog_get_first_chunk_offset(uchar *page)
+{
+ DBUG_ENTER("translog_get_first_chunk_offset");
+ DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
+ DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
+}
+
+
+/*
+ Write coded length of record
+
+ SYNOPSIS
+ translog_write_variable_record_1group_code_len
+ dst Destination buffer pointer
+ length Length which should be coded
+ header_len Calculated total header length
+*/
+
+static void
+translog_write_variable_record_1group_code_len(uchar *dst,
+ translog_size_t length,
+ uint16 header_len)
+{
+ switch (header_len) {
+ case 6: /* (5 + 1) */
+ DBUG_ASSERT(length <= 250);
+ *dst= (uint8) length;
+ return;
+ case 8: /* (5 + 3) */
+ DBUG_ASSERT(length <= 0xFFFF);
+ *dst= 251;
+ int2store(dst + 1, length);
+ return;
+ case 9: /* (5 + 4) */
+ DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
+ *dst= 252;
+ int3store(dst + 1, length);
+ return;
+ case 10: /* (5 + 5) */
+ *dst= 253;
+ int4store(dst + 1, length);
+ return;
+ default:
+ DBUG_ASSERT(0);
+ }
+ return;
+}
+
+
+/*
+ Decode record data length and advance given pointer to the next field
+
+ SYNOPSIS
+ translog_variable_record_1group_decode_len()
+ src The pointer to the pointer to the length beginning
+
+ RETURN
+ decoded length
+*/
+
+static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
+{
+ uint8 first= (uint8) (**src);
+ switch (first) {
+ case 251:
+ (*src)+= 3;
+ return (uint2korr((*src) - 2));
+ case 252:
+ (*src)+= 4;
+ return (uint3korr((*src) - 3));
+ case 253:
+ (*src)+= 5;
+ return (uint4korr((*src) - 4));
+ case 254:
+ case 255:
+ DBUG_ASSERT(0); /* reserved for future use */
+ return (0);
+ default:
+ (*src)++;
+ return (first);
+ }
+}
+
+
+/*
+ Get total length of this chunk (not only body)
+
+ SYNOPSIS
+ translog_get_total_chunk_length()
+ page The page where chunk placed
+ offset Offset of the chunk on this place
+
+ RETURN
+ total length of the chunk
+*/
+
+static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
+{
+ DBUG_ENTER("translog_get_total_chunk_length");
+ switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
+ case TRANSLOG_CHUNK_LSN:
+ {
+ /* 0 chunk referred as LSN (head or tail) */
+ translog_size_t rec_len;
+ uchar *start= page + offset;
+ uchar *ptr= start + 1 + 2; /* chunk type and short trid */
+ uint16 chunk_len, header_len, page_rest;
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
+ rec_len= translog_variable_record_1group_decode_len(&ptr);
+ chunk_len= uint2korr(ptr);
+ header_len= (ptr -start) + 2;
+ DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
+ (ulong) rec_len, (uint) chunk_len, (uint) header_len));
+ if (chunk_len)
+ {
+ DBUG_PRINT("info", ("chunk len: %u + %u = %u",
+ (uint) header_len, (uint) chunk_len,
+ (uint) (chunk_len + header_len)));
+ DBUG_RETURN(chunk_len + header_len);
+ }
+ page_rest= TRANSLOG_PAGE_SIZE - offset;
+ DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
+ if (rec_len + header_len < page_rest)
+ DBUG_RETURN(rec_len + header_len);
+ DBUG_RETURN(page_rest);
+ }
+ case TRANSLOG_CHUNK_FIXED:
+ {
+ uchar *ptr;
+ uint type= page[offset] & TRANSLOG_REC_TYPE;
+ uint length;
+ int i;
+ /* 1 (pseudo)fixed record (also LSN) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
+ DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
+ LOGRECTYPE_FIXEDLENGTH ||
+ log_record_type_descriptor[type].rclass ==
+ LOGRECTYPE_PSEUDOFIXEDLENGTH);
+ if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
+ {
+ DBUG_PRINT("info",
+ ("Fixed length: %u",
+ (uint) (log_record_type_descriptor[type].fixed_length + 3)));
+ DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
+ }
+
+ ptr= page + offset + 3; /* first compressed LSN */
+ length= log_record_type_descriptor[type].fixed_length + 3;
+ for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
+ {
+ /* first 2 bits is length - 2 */
+ uint len= (((uint8) (*ptr)) >> 6) + 2;
+ if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
+ len+= LSN_STORE_SIZE; /* case of full LSN storing */
+ ptr+= len;
+ /* subtract saved bytes */
+ length-= (LSN_STORE_SIZE - len);
+ }
+ DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
+ DBUG_RETURN(length);
+ }
+ case TRANSLOG_CHUNK_NOHDR:
+ /* 2 no header chunk (till page end) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u",
+ (uint) (TRANSLOG_PAGE_SIZE - offset)));
+ DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
+ case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
+ DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
+ DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
+ DBUG_RETURN(uint2korr(page + offset + 1) + 3);
+ default:
+ DBUG_ASSERT(0);
+ DBUG_RETURN(0);
+ }
+}
+
+
+/*
+ Flush given buffer
+
+ SYNOPSIS
+ translog_buffer_flush()
+ buffer This buffer should be flushed
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
+{
+ uint32 i, pg;
+ TRANSLOG_FILE *file;
+ DBUG_ENTER("translog_buffer_flush");
+ DBUG_ASSERT(buffer->file != NULL);
+ DBUG_PRINT("enter",
+ ("Buffer: #%u 0x%lx file: %d offset: (%lu,0x%lx) size: %lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->file->handler.file,
+ LSN_IN_PARTS(buffer->offset),
+ (ulong) buffer->size));
+ translog_buffer_lock_assert_owner(buffer);
+
+
+ translog_wait_for_writers(buffer);
+
+ if (buffer->overlay && buffer->overlay->file == buffer->file &&
+ cmp_translog_addr(buffer->overlay->offset + buffer->overlay->size,
+ buffer->offset) > 0)
+ {
+ /*
+ This can't happen for normal translog_flush,
+ only during destroying the loghandler
+ */
+ struct st_translog_buffer *overlay= buffer->overlay;
+ TRANSLOG_ADDRESS buffer_offset= buffer->offset;
+ TRANSLOG_FILE *fl= buffer->file;
+ translog_buffer_unlock(buffer);
+ translog_buffer_lock(overlay);
+ /* rechecks under mutex protection that overlay is still our overlay */
+ if (buffer->overlay->file == fl &&
+ cmp_translog_addr(buffer->overlay->offset + buffer->overlay->size,
+ buffer_offset) > 0)
+ {
+ translog_wait_for_buffer_free(overlay);
+ }
+ translog_buffer_unlock(overlay);
+ translog_buffer_lock(buffer);
+ if (buffer->file != NULL && buffer_offset == buffer->offset)
+ {
+ /*
+ This means that somebody else flushed the buffer while we was
+ waiting for overlay then for locking buffer again.
+ It is possible for single request for flush and destroying the
+ loghandler.
+ */
+ DBUG_RETURN(0);
+ }
+ }
+
+ /*
+ Send page by page in the pagecache what we are going to write on the
+ disk
+ */
+ file= buffer->file;
+ for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
+ i < buffer->size;
+ i+= TRANSLOG_PAGE_SIZE, pg++)
+ {
+ TRANSLOG_ADDRESS addr= (buffer->offset + i);
+ TRANSLOG_VALIDATOR_DATA data;
+ data.addr= &addr;
+ DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
+ if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN)
+ DBUG_RETURN(1);
+ if (pagecache_inject(log_descriptor.pagecache,
+ &file->handler, pg, 3,
+ buffer->buffer + i,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED, 0,
+ LSN_IMPOSSIBLE))
+ {
+ DBUG_PRINT("error", ("Can't write page (%lu,0x%lx) to pagecache",
+ (ulong) buffer->file,
+ (ulong) (LSN_OFFSET(buffer->offset)+ i)));
+ translog_stop_writing();
+ DBUG_RETURN(1);
+ }
+ }
+ file->is_sync= 0;
+ if (my_pwrite(file->handler.file, (char*) buffer->buffer,
+ buffer->size, LSN_OFFSET(buffer->offset),
+ log_write_flags))
+ {
+ DBUG_PRINT("error", ("Can't write buffer (%lu,0x%lx) size %lu "
+ "to the disk (%d)",
+ (ulong) file->handler.file,
+ (ulong) LSN_OFFSET(buffer->offset),
+ (ulong) buffer->size, errno));
+ translog_stop_writing();
+ DBUG_RETURN(1);
+ }
+ /*
+ Dropping the flag in such way can make false alarm: signalling than the
+ file in not sync when it is sync, but the situation is quite rare and
+ protections with mutexes give much more overhead to the whole engine
+ */
+ file->is_sync= 0;
+
+ if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */
+ translog_set_sent_to_disk(buffer->last_lsn,
+ buffer->next_buffer_offset);
+ else
+ translog_set_only_in_buffers(buffer->next_buffer_offset);
+ /* Free buffer */
+ buffer->file= NULL;
+ buffer->overlay= 0;
+ pthread_cond_broadcast(&buffer->waiting_filling_buffer);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Recover page with sector protection (wipe out failed chunks)
+
+ SYNOPSYS
+ translog_recover_page_up_to_sector()
+ page reference on the page
+ offset offset of failed sector
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
+{
+ uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
+ DBUG_ENTER("translog_recover_page_up_to_sector");
+ DBUG_PRINT("enter", ("offset: %u first chunk: %u",
+ (uint) offset, (uint) chunk_offset));
+
+ while (page[chunk_offset] != TRANSLOG_FILLER && chunk_offset < offset)
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ {
+ DBUG_PRINT("error", ("cant get chunk length (offset %u)",
+ (uint) chunk_offset));
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("chunk: offset: %u length %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
+ {
+ DBUG_PRINT("error", ("damaged chunk (offset %u) in trusted area",
+ (uint) chunk_offset));
+ DBUG_RETURN(1);
+ }
+ chunk_offset+= chunk_length;
+ }
+
+ valid_chunk_end= chunk_offset;
+ /* end of trusted area - sector parsing */
+ while (page[chunk_offset] != TRANSLOG_FILLER)
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ break;
+
+ DBUG_PRINT("info", ("chunk: offset: %u length %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ if (((ulong) chunk_offset) + ((ulong) chunk_length) >
+ (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
+ break;
+
+ chunk_offset+= chunk_length;
+ valid_chunk_end= chunk_offset;
+ }
+ DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
+
+ memset(page + valid_chunk_end, TRANSLOG_FILLER,
+ TRANSLOG_PAGE_SIZE - valid_chunk_end);
+
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Dummy write callback.
+*/
+
+static my_bool
+translog_dummy_callback(uchar *page __attribute__((unused)),
+ pgcache_page_no_t page_no __attribute__((unused)),
+ uchar* data_ptr __attribute__((unused)))
+{
+ return 0;
+}
+
+
+/**
+ @brief Checks and removes sector protection.
+
+ @param page reference on the page content.
+ @param file transaction log descriptor.
+
+ @retvat 0 OK
+ @retval 1 Error
+*/
+
+static my_bool
+translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file)
+{
+ uint i, offset;
+ uchar *table= page + page_overhead[page[TRANSLOG_PAGE_FLAGS]] -
+ TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE; ;
+ uint8 current= table[0];
+ DBUG_ENTER("translog_check_sector_protection");
+
+ for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
+ i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
+ i++, offset+= DISK_DRIVE_SECTOR_SIZE)
+ {
+ /*
+ TODO: add chunk counting for "suspecting" sectors (difference is
+ more than 1-2), if difference more then present chunks then it is
+ the problem.
+ */
+ uint8 test= page[offset];
+ DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
+ "read: 0x%x stored: 0x%x%x",
+ i, offset, (ulong) current,
+ (uint) uint2korr(page + offset), (uint) table[i],
+ (uint) table[i + 1]));
+ /*
+ 3 is minimal possible record length. So we can have "distance"
+ between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
+ only if it is old value, i.e. the sector was not written.
+ */
+ if (((test < current) &&
+ (0xFFL - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) ||
+ ((test >= current) &&
+ (test - current > DISK_DRIVE_SECTOR_SIZE / 3)))
+ {
+ if (translog_recover_page_up_to_sector(page, offset))
+ DBUG_RETURN(1);
+ file->was_recovered= 1;
+ DBUG_RETURN(0);
+ }
+
+ /* Restore value on the page */
+ page[offset]= table[i];
+ current= test;
+ DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
+ "read: 0x%x stored: 0x%x",
+ i, offset, (ulong) current,
+ (uint) page[offset], (uint) table[i]));
+ }
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Log page validator (read callback)
+
+ @param page The page data to check
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Read callback data pointer (pointer to TRANSLOG_FILE)
+
+
+ @todo: add turning loghandler to read-only mode after merging with
+ that patch.
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_page_validator(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar* data_ptr)
+{
+ uint this_page_page_overhead;
+ uint flags;
+ uchar *page_pos;
+ TRANSLOG_FILE *data= (TRANSLOG_FILE *) data_ptr;
+#ifndef DBUG_OFF
+ uint32 offset= page_no * TRANSLOG_PAGE_SIZE;
+#endif
+ DBUG_ENTER("translog_page_validator");
+
+ data->was_recovered= 0;
+
+ if (uint3korr(page) != page_no ||
+ uint3korr(page + 3) != data->number)
+ {
+ DBUG_PRINT("error", ("Page (%lu,0x%lx): "
+ "page address written in the page is incorrect: "
+ "File %lu instead of %lu or page %lu instead of %lu",
+ (ulong) data->number, (ulong) offset,
+ (ulong) uint3korr(page + 3), (ulong) data->number,
+ (ulong) uint3korr(page),
+ (ulong) page_no));
+ DBUG_RETURN(1);
+ }
+ flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
+ this_page_page_overhead= page_overhead[flags];
+ if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
+ TRANSLOG_RECORD_CRC))
+ {
+ DBUG_PRINT("error", ("Page (%lu,0x%lx): "
+ "Garbage in the page flags field detected : %x",
+ (ulong) data->number, (ulong) offset,
+ (uint) flags));
+ DBUG_RETURN(1);
+ }
+ page_pos= page + (3 + 3 + 1);
+ if (flags & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_crc(page + this_page_page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ this_page_page_overhead);
+ if (crc != uint4korr(page_pos))
+ {
+ DBUG_PRINT("error", ("Page (%lu,0x%lx): "
+ "CRC mismatch: calculated: %lx on the page %lx",
+ (ulong) data->number, (ulong) offset,
+ (ulong) crc, (ulong) uint4korr(page_pos)));
+ DBUG_RETURN(1);
+ }
+ page_pos+= CRC_SIZE; /* Skip crc */
+ }
+ if (flags & TRANSLOG_SECTOR_PROTECTION &&
+ translog_check_sector_protection(page, data))
+ {
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Locks the loghandler.
+
+ @note See comment before buffer 'mutex' variable.
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool translog_lock()
+{
+ uint8 current_buffer;
+ DBUG_ENTER("translog_lock");
+
+ /*
+ Locking the loghandler mean locking current buffer, but it can change
+ during locking, so we should check it
+ */
+ for (;;)
+ {
+ /*
+ log_descriptor.bc.buffer_no is only one byte so its reading is
+ an atomic operation
+ */
+ current_buffer= log_descriptor.bc.buffer_no;
+ if (translog_buffer_lock(log_descriptor.buffers + current_buffer))
+ DBUG_RETURN(1);
+ if (log_descriptor.bc.buffer_no == current_buffer)
+ break;
+ translog_buffer_unlock(log_descriptor.buffers + current_buffer);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Unlock the loghandler
+
+ SYNOPSIS
+ translog_unlock()
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+my_bool translog_unlock()
+{
+ DBUG_ENTER("translog_unlock");
+ translog_buffer_unlock(log_descriptor.bc.buffer);
+
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Get log page by file number and offset of the beginning of the page
+
+ @param data validator data, which contains the page address
+ @param buffer buffer for page placing
+ (might not be used in some cache implementations)
+ @param direct_link if it is not NULL then caller can accept direct
+ link to the page cache
+
+ @retval NULL Error
+ @retval # pointer to the page cache which should be used to read this page
+*/
+
+static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
+ PAGECACHE_BLOCK_LINK **direct_link)
+{
+ TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
+ uint32 file_no= LSN_FILE_NO(addr);
+ TRANSLOG_FILE *file;
+ DBUG_ENTER("translog_get_page");
+ DBUG_PRINT("enter", ("File: %lu Offset: %lu(0x%lx)",
+ (ulong) file_no,
+ (ulong) LSN_OFFSET(addr),
+ (ulong) LSN_OFFSET(addr)));
+
+ /* it is really page address */
+ DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
+
+ if (direct_link)
+ *direct_link= NULL;
+
+ in_buffers= translog_only_in_buffers();
+ DBUG_PRINT("info", ("in_buffers: (%lu,0x%lx)",
+ LSN_IN_PARTS(in_buffers)));
+ if (in_buffers != LSN_IMPOSSIBLE &&
+ cmp_translog_addr(addr, in_buffers) >= 0)
+ {
+ translog_lock();
+ /* recheck with locked loghandler */
+ in_buffers= translog_only_in_buffers();
+ if (cmp_translog_addr(addr, in_buffers) >= 0)
+ {
+ uint16 buffer_no= log_descriptor.bc.buffer_no;
+#ifndef DBUG_OFF
+ uint16 buffer_start= buffer_no;
+#endif
+ struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
+ struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
+ for (;;)
+ {
+ /*
+ if the page is in the buffer and it is the last version of the
+ page (in case of division the page by buffer flush)
+ */
+ if (curr_buffer->file != NULL &&
+ cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
+ cmp_translog_addr(addr,
+ (curr_buffer->next_buffer_offset ?
+ curr_buffer->next_buffer_offset:
+ curr_buffer->offset + curr_buffer->size)) < 0)
+ {
+ int is_last_unfinished_page;
+ uint last_protected_sector= 0;
+ uchar *from, *table= NULL;
+ TRANSLOG_FILE file_copy;
+ translog_wait_for_writers(curr_buffer);
+ DBUG_ASSERT(LSN_FILE_NO(addr) == LSN_FILE_NO(curr_buffer->offset));
+ from= curr_buffer->buffer + (addr - curr_buffer->offset);
+ memcpy(buffer, from, TRANSLOG_PAGE_SIZE);
+ /*
+ We can use copy then in translog_page_validator() because it
+ do not put it permanently somewhere.
+ We have to use copy because after releasing log lock we can't
+ guaranty that the file still be present (in real life it will be
+ present but theoretically possible that it will be released
+ already from last files cache);
+ */
+ file_copy= *(curr_buffer->file);
+ file_copy.handler.callback_data= (uchar*) &file_copy;
+ is_last_unfinished_page= ((log_descriptor.bc.buffer ==
+ curr_buffer) &&
+ (log_descriptor.bc.ptr >= from) &&
+ (log_descriptor.bc.ptr <
+ from + TRANSLOG_PAGE_SIZE));
+ if (is_last_unfinished_page &&
+ (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
+ {
+ last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
+ DISK_DRIVE_SECTOR_SIZE);
+ table= buffer + log_descriptor.page_overhead -
+ TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
+ }
+
+ DBUG_ASSERT(buffer_unlock == curr_buffer);
+ translog_buffer_unlock(buffer_unlock);
+ if (is_last_unfinished_page)
+ {
+ uint i;
+ /*
+ This is last unfinished page => we should not check CRC and
+ remove only that protection which already installed (no need
+ to check it)
+
+ We do not check the flag of sector protection, because if
+ (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
+ not set then last_protected_sector will be 0 so following loop
+ will be never executed
+ */
+ DBUG_PRINT("info", ("This is last unfinished page, "
+ "last protected sector %u",
+ last_protected_sector));
+ for (i= 1; i <= last_protected_sector; i++)
+ {
+ uint offset= i * DISK_DRIVE_SECTOR_SIZE;
+ DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
+ i, buffer[offset],
+ table[i]));
+ buffer[offset]= table[i];
+ }
+ }
+ else
+ {
+ /*
+ This IF should be true because we use in-memory data which
+ supposed to be correct.
+ */
+ if (translog_page_validator((uchar*) buffer,
+ LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
+ (uchar*) &file_copy))
+ {
+ DBUG_ASSERT(0);
+ buffer= NULL;
+ }
+ }
+ DBUG_RETURN(buffer);
+ }
+ buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ curr_buffer= log_descriptor.buffers + buffer_no;
+ translog_buffer_lock(curr_buffer);
+ translog_buffer_unlock(buffer_unlock);
+ buffer_unlock= curr_buffer;
+ /* we can't make a full circle */
+ DBUG_ASSERT(buffer_start != buffer_no);
+ }
+ }
+ translog_unlock();
+ }
+ file= get_logfile_by_number(file_no);
+ buffer=
+ (uchar*) pagecache_read(log_descriptor.pagecache, &file->handler,
+ LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
+ 3, (direct_link ? NULL : (char*) buffer),
+ PAGECACHE_PLAIN_PAGE,
+ (direct_link ?
+ PAGECACHE_LOCK_READ :
+ PAGECACHE_LOCK_LEFT_UNLOCKED),
+ direct_link);
+ DBUG_PRINT("info", ("Direct link is assigned to : 0x%lx * 0x%lx",
+ (ulong) direct_link,
+ (ulong)(direct_link ? *direct_link : NULL)));
+ data->was_recovered= file->was_recovered;
+ DBUG_RETURN(buffer);
+}
+
+
+/**
+ @brief free direct log page link
+
+ @param direct_link the direct log page link to be freed
+
+*/
+
+static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
+{
+ DBUG_ENTER("translog_free_link");
+ DBUG_PRINT("info", ("Direct link: 0x%lx",
+ (ulong) direct_link));
+ if (direct_link)
+ pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
+ PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
+ LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0);
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Finds last full page of the given log file.
+
+ @param addr address structure to fill with data, which contain
+ file number of the log file
+ @param last_page_ok Result of the check whether last page OK.
+ (for now only we check only that file length
+ divisible on page length).
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
+ my_bool *last_page_ok)
+{
+ MY_STAT stat_buff, *local_stat;
+ char path[FN_REFLEN];
+ uint32 rec_offset;
+ uint32 file_no= LSN_FILE_NO(*addr);
+ DBUG_ENTER("translog_get_last_page_addr");
+
+ if (!(local_stat= my_stat(translog_filename_by_fileno(file_no, path),
+ &stat_buff, MYF(MY_WME))))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("File size: %lu", (ulong) local_stat->st_size));
+ if (local_stat->st_size > TRANSLOG_PAGE_SIZE)
+ {
+ rec_offset= (((local_stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
+ TRANSLOG_PAGE_SIZE);
+ *last_page_ok= (local_stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE);
+ }
+ else
+ {
+ *last_page_ok= 0;
+ rec_offset= 0;
+ }
+ *addr= MAKE_LSN(file_no, rec_offset);
+ DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset,
+ *last_page_ok));
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Get number bytes for record length storing
+
+ @param length Record length which will be encoded
+
+ @return 1,3,4,5 - number of bytes to store given length
+*/
+
+static uint translog_variable_record_length_bytes(translog_size_t length)
+{
+ if (length < 250)
+ return 1;
+ if (length < 0xFFFF)
+ return 3;
+ if (length < (ulong) 0xFFFFFF)
+ return 4;
+ return 5;
+}
+
+
+/**
+ @brief Gets header of this chunk.
+
+ @param chunk The pointer to the chunk beginning
+
+ @retval # total length of the chunk
+ @retval 0 Error
+*/
+
+static uint16 translog_get_chunk_header_length(uchar *chunk)
+{
+ DBUG_ENTER("translog_get_chunk_header_length");
+ switch (*chunk & TRANSLOG_CHUNK_TYPE) {
+ case TRANSLOG_CHUNK_LSN:
+ {
+ /* 0 chunk referred as LSN (head or tail) */
+ translog_size_t rec_len;
+ uchar *start= chunk;
+ uchar *ptr= start + 1 + 2;
+ uint16 chunk_len, header_len;
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
+ rec_len= translog_variable_record_1group_decode_len(&ptr);
+ chunk_len= uint2korr(ptr);
+ header_len= (ptr - start) +2;
+ DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
+ (ulong) rec_len, (uint) chunk_len, (uint) header_len));
+ if (chunk_len)
+ {
+ /* TODO: fine header end */
+ /*
+ The last chunk of multi-group record can be base for it header
+ calculation (we skip to the first group to read the header) so if we
+ stuck here something is wrong.
+ */
+ DBUG_ASSERT(0);
+ DBUG_RETURN(0); /* Keep compiler happy */
+ }
+ DBUG_RETURN(header_len);
+ }
+ case TRANSLOG_CHUNK_FIXED:
+ {
+ /* 1 (pseudo)fixed record (also LSN) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
+ DBUG_RETURN(3);
+ }
+ case TRANSLOG_CHUNK_NOHDR:
+ /* 2 no header chunk (till page end) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
+ DBUG_RETURN(1);
+ break;
+ case TRANSLOG_CHUNK_LNGTH:
+ /* 3 chunk with chunk length */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
+ DBUG_RETURN(3);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ DBUG_RETURN(0); /* Keep compiler happy */
+ }
+}
+
+
+/**
+ @brief Truncate the log to the given address. Used during the startup if the
+ end of log if corrupted.
+
+ @param addr new horizon
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
+{
+ uchar *page;
+ TRANSLOG_ADDRESS current_page;
+ uint32 next_page_offset, page_rest;
+ uint32 i;
+ File fd;
+ TRANSLOG_VALIDATOR_DATA data;
+ char path[FN_REFLEN];
+ uchar page_buff[TRANSLOG_PAGE_SIZE];
+ DBUG_ENTER("translog_truncate_log");
+ /* TODO: write warning to the client */
+ DBUG_PRINT("warning", ("removing all records from (%lx,0x%lx) "
+ "till (%lx,0x%lx)",
+ LSN_IN_PARTS(addr),
+ LSN_IN_PARTS(log_descriptor.horizon)));
+ DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
+ /* remove files between the address and horizon */
+ for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
+ if (my_delete(translog_filename_by_fileno(i, path), MYF(MY_WME)))
+ {
+ translog_unlock();
+ DBUG_RETURN(1);
+ }
+
+ /* truncate the last file up to the last page */
+ next_page_offset= LSN_OFFSET(addr);
+ next_page_offset= (next_page_offset -
+ ((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
+ TRANSLOG_PAGE_SIZE);
+ page_rest= next_page_offset - LSN_OFFSET(addr);
+ memset(page_buff, TRANSLOG_FILLER, page_rest);
+ if ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
+ my_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
+ (page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
+ log_write_flags)) ||
+ my_sync(fd, MYF(MY_WME)) ||
+ my_close(fd, MYF(MY_WME)) ||
+ (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
+ my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD))))
+ DBUG_RETURN(1);
+
+ /* fix the horizon */
+ log_descriptor.horizon= addr;
+ /* fix the buffer data */
+ current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
+ TRANSLOG_PAGE_SIZE));
+ data.addr= &current_page;
+ if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
+ NULL)
+ DBUG_RETURN(1);
+ if (page != log_descriptor.buffers->buffer)
+ memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
+ log_descriptor.bc.buffer->offset= current_page;
+ log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
+ log_descriptor.bc.ptr=
+ log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
+ log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Check log files presence
+
+ @retval 0 no log files.
+ @retval 1 there is at least 1 log file in the directory
+*/
+
+my_bool translog_is_log_files()
+{
+ MY_DIR *dirp;
+ uint i;
+ my_bool rc= FALSE;
+
+ /* Finds and removes transaction log files */
+ if (!(dirp = my_dir(log_descriptor.directory, MYF(MY_DONT_SORT))))
+ return 1;
+
+ for (i= 0; i < dirp->number_off_files; i++)
+ {
+ char *file= dirp->dir_entry[i].name;
+ if (strncmp(file, "maria_log.", 10) == 0 &&
+ file[10] >= '0' && file[10] <= '9' &&
+ file[11] >= '0' && file[11] <= '9' &&
+ file[12] >= '0' && file[12] <= '9' &&
+ file[13] >= '0' && file[13] <= '9' &&
+ file[14] >= '0' && file[14] <= '9' &&
+ file[15] >= '0' && file[15] <= '9' &&
+ file[16] >= '0' && file[16] <= '9' &&
+ file[17] >= '0' && file[17] <= '9' &&
+ file[18] == '\0')
+ {
+ rc= TRUE;
+ break;
+ }
+ }
+ my_dirend(dirp);
+ return FALSE;
+}
+
+
+/**
+ @brief Initialize transaction log
+
+ @param directory Directory where log files are put
+ @param log_file_max_size max size of one log size (for new logs creation)
+ @param server_version version of MySQL server (MYSQL_VERSION_ID)
+ @param server_id server ID (replication & Co)
+ @param pagecache Page cache for the log reads
+ @param flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
+ TRANSLOG_RECORD_CRC)
+ @param read_only Put transaction log in read-only mode
+ @param init_table_func function to initialize record descriptors table
+
+ @todo
+ Free used resources in case of error.
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool translog_init_with_table(const char *directory,
+ uint32 log_file_max_size,
+ uint32 server_version,
+ uint32 server_id, PAGECACHE *pagecache,
+ uint flags, my_bool readonly,
+ void (*init_table_func)())
+{
+ int i;
+ int old_log_was_recovered= 0, logs_found= 0;
+ uint old_flags= flags;
+ uint32 start_file_num= 1;
+ TRANSLOG_ADDRESS sure_page, last_page, last_valid_page;
+ my_bool version_changed= 0;
+ DBUG_ENTER("translog_init_with_table");
+
+ id_to_share= NULL;
+
+ (*init_table_func)();
+
+ if (readonly)
+ log_descriptor.open_flags= O_BINARY | O_RDONLY;
+ else
+ log_descriptor.open_flags= O_BINARY | O_RDWR;
+ if (pthread_mutex_init(&log_descriptor.sent_to_disk_lock,
+ MY_MUTEX_INIT_FAST) ||
+ pthread_mutex_init(&log_descriptor.file_header_lock,
+ MY_MUTEX_INIT_FAST) ||
+ pthread_mutex_init(&log_descriptor.unfinished_files_lock,
+ MY_MUTEX_INIT_FAST) ||
+ pthread_mutex_init(&log_descriptor.purger_lock,
+ MY_MUTEX_INIT_FAST) ||
+ pthread_mutex_init(&log_descriptor.log_flush_lock,
+ MY_MUTEX_INIT_FAST) ||
+ my_rwlock_init(&log_descriptor.open_files_lock,
+ NULL) ||
+ my_init_dynamic_array(&log_descriptor.open_files,
+ sizeof(TRANSLOG_FILE*), 10, 10) ||
+ my_init_dynamic_array(&log_descriptor.unfinished_files,
+ sizeof(struct st_file_counter),
+ 10, 10))
+ DBUG_RETURN(1);
+ log_descriptor.min_need_file= 0;
+ log_descriptor.min_file_number= 0;
+ log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
+
+ /* Directory to store files */
+ unpack_dirname(log_descriptor.directory, directory);
+
+ if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
+ O_RDONLY, MYF(MY_WME))) < 0)
+ {
+ my_errno= errno;
+ DBUG_PRINT("error", ("Error %d during opening directory '%s'",
+ errno, log_descriptor.directory));
+ DBUG_RETURN(1);
+ }
+
+ log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
+ DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
+ log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
+ /* max size of one log size (for new logs creation) */
+ log_file_size= log_descriptor.log_file_max_size=
+ log_file_max_size;
+ /* server version */
+ log_descriptor.server_version= server_version;
+ /* server ID */
+ log_descriptor.server_id= server_id;
+ /* Page cache for the log reads */
+ log_descriptor.pagecache= pagecache;
+ /* Flags */
+ DBUG_ASSERT((flags &
+ ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
+ TRANSLOG_RECORD_CRC)) == 0);
+ log_descriptor.flags= flags;
+ for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
+ {
+ page_overhead[i]= 7;
+ if (i & TRANSLOG_PAGE_CRC)
+ page_overhead[i]+= CRC_SIZE;
+ if (i & TRANSLOG_SECTOR_PROTECTION)
+ page_overhead[i]+= TRANSLOG_PAGE_SIZE /
+ DISK_DRIVE_SECTOR_SIZE;
+ }
+ log_descriptor.page_overhead= page_overhead[flags];
+ log_descriptor.page_capacity_chunk_2=
+ TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
+ compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
+ log_descriptor.buffer_capacity_chunk_2=
+ (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
+ log_descriptor.page_capacity_chunk_2;
+ log_descriptor.half_buffer_capacity_chunk_2=
+ log_descriptor.buffer_capacity_chunk_2 / 2;
+ DBUG_PRINT("info",
+ ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u",
+ log_descriptor.page_overhead,
+ log_descriptor.page_capacity_chunk_2,
+ log_descriptor.buffer_capacity_chunk_2,
+ log_descriptor.half_buffer_capacity_chunk_2));
+
+ /*
+ last_logno and last_checkpoint_lsn were set in
+ ma_control_file_create_or_open()
+ */
+ logs_found= (last_logno != FILENO_IMPOSSIBLE);
+
+
+ /* Just to init it somehow (hack for bootstrap)*/
+ {
+ TRANSLOG_FILE *file= 0;
+ log_descriptor.min_file = log_descriptor.max_file= 1;
+ insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ pop_dynamic(&log_descriptor.open_files);
+ }
+
+ /* Buffers for log writing */
+ for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
+ {
+ if (translog_buffer_init(log_descriptor.buffers + i))
+ DBUG_RETURN(1);
+#ifndef DBUG_OFF
+ log_descriptor.buffers[i].buffer_no= (uint8) i;
+#endif
+ DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx",
+ i, (ulong) log_descriptor.buffers + i));
+ }
+
+ /*
+ last_logno and last_checkpoint_lsn were set in
+ ma_control_file_create_or_open()
+ */
+ logs_found= (last_logno != FILENO_IMPOSSIBLE);
+
+ translog_status= (readonly ? TRANSLOG_READONLY : TRANSLOG_OK);
+
+ if (logs_found)
+ {
+ my_bool pageok;
+ DBUG_PRINT("info", ("log found..."));
+ /*
+ TODO: scan directory for maria_log.XXXXXXXX files and find
+ highest XXXXXXXX & set logs_found
+ TODO: check that last checkpoint within present log addresses space
+
+ find the log end
+ */
+ if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
+ {
+ DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
+ /* there was no checkpoints we will read from the beginning */
+ sure_page= (LSN_ONE_FILE | TRANSLOG_PAGE_SIZE);
+ }
+ else
+ {
+ sure_page= last_checkpoint_lsn;
+ DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
+ sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
+ }
+ /* Set horizon to the beginning of the last file first */
+ log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
+ if (translog_get_last_page_addr(&last_page, &pageok))
+ {
+ if (!translog_is_log_files())
+ {
+ /* files was deleted, just start from the next log number */
+ start_file_num= last_logno + 1;
+ logs_found= 0;
+ }
+ else
+ DBUG_RETURN(1);
+ }
+ else if (LSN_OFFSET(last_page) == 0)
+ {
+ if (LSN_FILE_NO(last_page) == 1)
+ {
+ logs_found= 0; /* file #1 has no pages */
+ DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
+ }
+ else
+ {
+ last_page-= LSN_ONE_FILE;
+ if (translog_get_last_page_addr(&last_page, &pageok))
+ DBUG_RETURN(1);
+ }
+ }
+ if (logs_found)
+ {
+ uint32 i;
+ log_descriptor.min_file= translog_first_file(log_descriptor.horizon, 1);
+ log_descriptor.max_file= last_logno;
+ /* Open all files */
+ if (allocate_dynamic(&log_descriptor.open_files,
+ log_descriptor.max_file -
+ log_descriptor.min_file + 1))
+ DBUG_RETURN(1);
+ for (i = log_descriptor.max_file; i >= log_descriptor.min_file; i--)
+ {
+ /*
+ We can't allocate all file together because they will be freed
+ one by one
+ */
+ TRANSLOG_FILE *file= (TRANSLOG_FILE *)my_malloc(sizeof(TRANSLOG_FILE),
+ MYF(0));
+ if (file == NULL ||
+ (file->handler.file=
+ open_logfile_by_number_no_cache(i)) < 0)
+ {
+ int j;
+ for (j= i - log_descriptor.min_file - 1; j > 0; j--)
+ {
+ TRANSLOG_FILE *el=
+ *dynamic_element(&log_descriptor.open_files, j,
+ TRANSLOG_FILE **);
+ my_close(el->handler.file, MYF(MY_WME));
+ my_free(el, MYF(0));
+ }
+ if (file)
+ {
+ free(file);
+ DBUG_RETURN(1);
+ }
+ else
+ DBUG_RETURN(1);
+ }
+ translog_file_init(file, i, 1);
+ /* we allocated space so it can't fail */
+ insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
+ }
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+ }
+ }
+ else if (readonly)
+ {
+ /* There is no logs and there is read-only mode => nothing to read */
+ DBUG_PRINT("error", ("No logs and read-only mode"));
+ DBUG_RETURN(1);
+ }
+
+ if (logs_found)
+ {
+ TRANSLOG_ADDRESS current_page= sure_page;
+ my_bool pageok;
+
+ DBUG_PRINT("info", ("The log is really present"));
+ DBUG_ASSERT(sure_page <= last_page);
+
+ /* TODO: check page size */
+
+ last_valid_page= LSN_IMPOSSIBLE;
+ /*
+ Scans and validate pages. We need it to show "outside" only for sure
+ valid part of the log. If the log was damaged then fixed we have to
+ cut off damaged part before some other process start write something
+ in the log.
+ */
+ do
+ {
+ TRANSLOG_ADDRESS current_file_last_page;
+ current_file_last_page= current_page;
+ if (translog_get_last_page_addr(&current_file_last_page, &pageok))
+ DBUG_RETURN(1);
+ if (!pageok)
+ {
+ DBUG_PRINT("error", ("File %lu have no complete last page",
+ (ulong) LSN_FILE_NO(current_file_last_page)));
+ old_log_was_recovered= 1;
+ /* This file is not written till the end so it should be last */
+ last_page= current_file_last_page;
+ /* TODO: issue warning */
+ }
+ do
+ {
+ TRANSLOG_VALIDATOR_DATA data;
+ uchar buffer[TRANSLOG_PAGE_SIZE], *page;
+ data.addr= &current_page;
+ if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
+ DBUG_RETURN(1);
+ if (data.was_recovered)
+ {
+ DBUG_PRINT("error", ("file no: %lu (%d) "
+ "rec_offset: 0x%lx (%lu) (%d)",
+ (ulong) LSN_FILE_NO(current_page),
+ (uint3korr(page + 3) !=
+ LSN_FILE_NO(current_page)),
+ (ulong) LSN_OFFSET(current_page),
+ (ulong) (LSN_OFFSET(current_page) /
+ TRANSLOG_PAGE_SIZE),
+ (uint3korr(page) !=
+ LSN_OFFSET(current_page) /
+ TRANSLOG_PAGE_SIZE)));
+ old_log_was_recovered= 1;
+ break;
+ }
+ old_flags= page[TRANSLOG_PAGE_FLAGS];
+ last_valid_page= current_page;
+ current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
+ } while (current_page <= current_file_last_page);
+ current_page+= LSN_ONE_FILE;
+ current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
+ } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
+ !old_log_was_recovered);
+ if (last_valid_page == LSN_IMPOSSIBLE)
+ {
+ /* Panic!!! Even page which should be valid is invalid */
+ /* TODO: issue error */
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("Last valid page is in file: %lu "
+ "offset: %lu (0x%lx) "
+ "Logs found: %d was recovered: %d "
+ "flags match: %d",
+ (ulong) LSN_FILE_NO(last_valid_page),
+ (ulong) LSN_OFFSET(last_valid_page),
+ (ulong) LSN_OFFSET(last_valid_page),
+ logs_found, old_log_was_recovered,
+ (old_flags == flags)));
+
+ /* TODO: check server ID */
+ if (logs_found && !old_log_was_recovered && old_flags == flags)
+ {
+ TRANSLOG_VALIDATOR_DATA data;
+ uchar buffer[TRANSLOG_PAGE_SIZE], *page;
+ uint16 chunk_offset;
+ data.addr= &last_valid_page;
+ /* continue old log */
+ DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
+ LSN_FILE_NO(log_descriptor.horizon));
+ if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
+ (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
+ DBUG_RETURN(1);
+
+ /* Puts filled part of old page in the buffer */
+ log_descriptor.horizon= last_valid_page;
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ /*
+ Free space if filled with TRANSLOG_FILLER and first uchar of
+ real chunk can't be TRANSLOG_FILLER
+ */
+ while (chunk_offset < TRANSLOG_PAGE_SIZE &&
+ page[chunk_offset] != TRANSLOG_FILLER)
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("chunk: offset: %u length: %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ chunk_offset+= chunk_length;
+
+ /* chunk can't cross the page border */
+ DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
+ }
+ memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
+ log_descriptor.bc.buffer->size+= chunk_offset;
+ log_descriptor.bc.ptr+= chunk_offset;
+ log_descriptor.bc.current_page_fill= chunk_offset;
+ log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
+ (chunk_offset +
+ LSN_OFFSET(last_valid_page)));
+ DBUG_PRINT("info", ("Move Page #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
+ buffer->buffer)));
+ translog_check_cursor(&log_descriptor.bc);
+ }
+ if (!old_log_was_recovered && old_flags == flags)
+ {
+ LOGHANDLER_FILE_INFO info;
+ /*
+ Accessing &log_descriptor.open_files without mutex is safe
+ because it is initialization
+ */
+ if (translog_read_file_header(&info,
+ (*dynamic_element(&log_descriptor.
+ open_files,
+ 0, TRANSLOG_FILE **))->
+ handler.file))
+ DBUG_RETURN(1);
+ version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
+ }
+ }
+ DBUG_PRINT("info", ("Logs found: %d was recovered: %d",
+ logs_found, old_log_was_recovered));
+ if (!logs_found)
+ {
+ TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
+ MYF(0));
+ DBUG_PRINT("info", ("The log is not found => we will create new log"));
+ if (file == NULL)
+ DBUG_RETURN(1);
+ /* Start new log system from scratch */
+ log_descriptor.horizon= MAKE_LSN(start_file_num,
+ TRANSLOG_PAGE_SIZE); /* header page */
+ if ((file->handler.file=
+ create_logfile_by_number_no_cache(start_file_num)) == -1)
+ DBUG_RETURN(1);
+ translog_file_init(file, start_file_num, 0);
+ if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
+ DBUG_RETURN(1);
+ log_descriptor.min_file= log_descriptor.max_file= start_file_num;
+ if (translog_write_file_header())
+ DBUG_RETURN(1);
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+
+ if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, 1,
+ CONTROL_FILE_UPDATE_ONLY_LOGNO))
+ DBUG_RETURN(1);
+ /* assign buffer 0 */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+ }
+ else if ((old_log_was_recovered || old_flags != flags || version_changed) &&
+ !readonly)
+ {
+ /* leave the damaged file untouched */
+ log_descriptor.horizon+= LSN_ONE_FILE;
+ /* header page */
+ log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
+ TRANSLOG_PAGE_SIZE);
+ if (translog_create_new_file())
+ DBUG_RETURN(1);
+ /*
+ Buffer system left untouched after recovery => we should init it
+ (starting from buffer 0)
+ */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+ }
+
+ /* all LSNs that are on disk are flushed */
+ log_descriptor.sent_to_disk=
+ log_descriptor.flushed= log_descriptor.horizon;
+ log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
+ log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
+ log_descriptor.previous_flush_horizon= log_descriptor.horizon;
+ /*
+ Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
+ address of the next LSN and we want indicate that all LSNs that are
+ already on the disk are flushed so we need decrease horizon on 1 (we are
+ sure that there is no LSN on the disk which is greater then 'flushed'
+ and there will not be LSN created that is equal or less then the value
+ of the 'flushed').
+ */
+ log_descriptor.flushed--; /* offset decreased */
+ log_descriptor.sent_to_disk--; /* offset decreased */
+ /*
+ Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
+ structures for generating 2-byte ids:
+ */
+ my_atomic_rwlock_init(&LOCK_id_to_share);
+ id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*),
+ MYF(MY_WME | MY_ZEROFILL));
+ if (unlikely(!id_to_share))
+ DBUG_RETURN(1);
+ id_to_share--; /* min id is 1 */
+
+ /* Check the last LSN record integrity */
+ if (logs_found)
+ {
+ TRANSLOG_SCANNER_DATA scanner;
+ TRANSLOG_ADDRESS page_addr;
+ LSN last_lsn= LSN_IMPOSSIBLE;
+ /*
+ take very last page address and try to find LSN record on it
+ if it fail take address of previous page and so on
+ */
+ page_addr= (log_descriptor.horizon -
+ ((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
+ if (translog_scanner_init(page_addr, 1, &scanner, 1))
+ DBUG_RETURN(1);
+ scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
+ for (;;)
+ {
+ uint chunk_type;
+ chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type,
+ (uint) scanner.page[scanner.page_offset]));
+ while (chunk_type != TRANSLOG_CHUNK_LSN &&
+ chunk_type != TRANSLOG_CHUNK_FIXED &&
+ scanner.page != END_OF_LOG &&
+ scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
+ scanner.page_addr == page_addr)
+ {
+ if (translog_get_next_chunk(&scanner))
+ {
+ translog_destroy_scanner(&scanner);
+ DBUG_RETURN(1);
+ }
+ if (scanner.page != END_OF_LOG)
+ {
+ chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type,
+ (uint) scanner.page[scanner.page_offset]));
+ }
+ }
+ if (chunk_type == TRANSLOG_CHUNK_LSN ||
+ chunk_type == TRANSLOG_CHUNK_FIXED)
+ {
+ last_lsn= scanner.page_addr + scanner.page_offset;
+ if (translog_get_next_chunk(&scanner))
+ {
+ translog_destroy_scanner(&scanner);
+ DBUG_RETURN(1);
+ }
+ if (scanner.page == END_OF_LOG)
+ break; /* it was the last record */
+ chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type,
+ (uint) scanner.page[scanner.page_offset]));
+ continue; /* try to find other record on this page */
+ }
+
+ if (last_lsn != LSN_IMPOSSIBLE)
+ break; /* there is no more records on the page */
+
+ /* We have to make step back */
+ if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
+ {
+ uint32 file_no= LSN_FILE_NO(page_addr);
+ bool last_page_ok;
+ /* it is beginning of the current file */
+ if (unlikely(file_no == 1))
+ {
+ /*
+ It is beginning of the log => there is no LSNs in the log =>
+ There is no harm in leaving it "as-is".
+ */
+ DBUG_RETURN(0);
+ }
+ file_no--;
+ page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
+ translog_get_last_page_addr(&page_addr, &last_page_ok);
+ /* page should be OK as it is not the last file */
+ DBUG_ASSERT(last_page_ok);
+ }
+ else
+ {
+ page_addr-= TRANSLOG_PAGE_SIZE;
+ }
+ translog_destroy_scanner(&scanner);
+ if (translog_scanner_init(page_addr, 1, &scanner, 1))
+ DBUG_RETURN(1);
+ scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
+ }
+ translog_destroy_scanner(&scanner);
+
+ /* Now scanner points to the last LSN chunk, lets check it */
+ {
+ TRANSLOG_HEADER_BUFFER rec;
+ translog_size_t rec_len;
+ int len;
+ uchar buffer[1];
+ DBUG_PRINT("info", ("going to check the last found record (%lu,0x%lx)",
+ LSN_IN_PARTS(last_lsn)));
+
+ len=
+ translog_read_record_header(last_lsn, &rec);
+ if (unlikely (len == RECHEADER_READ_ERROR ||
+ len == RECHEADER_READ_EOF))
+ {
+ DBUG_PRINT("error", ("unexpected end of log or record during "
+ "reading record header: (%lu,0x%lx) len: %d",
+ LSN_IN_PARTS(last_lsn), len));
+ if (readonly)
+ log_descriptor.horizon= last_lsn;
+ else if (translog_truncate_log(last_lsn))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ DBUG_ASSERT(last_lsn == rec.lsn);
+ if (likely(rec.record_length != 0))
+ {
+ /*
+ Reading the last byte of record will trigger scanning all
+ record chunks for now
+ */
+ rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
+ buffer, NULL);
+ if (rec_len != 1)
+ {
+ DBUG_PRINT("error", ("unexpected end of log or record during "
+ "reading record body: (%lu,0x%lx) len: %d",
+ LSN_IN_PARTS(rec.lsn),
+ len));
+ if (readonly)
+ log_descriptor.horizon= last_lsn;
+ else if (translog_truncate_log(last_lsn))
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ }
+ }
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ @brief Free transaction log file buffer.
+
+ @param buffer_no The buffer to free
+*/
+
+static void translog_buffer_destroy(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_destroy");
+ DBUG_PRINT("enter",
+ ("Buffer #%u: 0x%lx file: %d offset: (%lu,0x%lx) size: %lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (buffer->file ? buffer->file->handler.file : -1),
+ LSN_IN_PARTS(buffer->offset),
+ (ulong) buffer->size));
+ if (buffer->file != NULL)
+ {
+ /*
+ We ignore errors here, because we can't do something about it
+ (it is shutting down)
+ */
+ translog_buffer_lock(buffer);
+ translog_buffer_flush(buffer);
+ translog_buffer_unlock(buffer);
+ }
+ DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex));
+ pthread_mutex_destroy(&buffer->mutex);
+ pthread_cond_destroy(&buffer->waiting_filling_buffer);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Free log handler resources
+
+ SYNOPSIS
+ translog_destroy()
+*/
+
+void translog_destroy()
+{
+ TRANSLOG_FILE **file;
+ uint i;
+ DBUG_ENTER("translog_destroy");
+
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+ translog_lock();
+ translog_status= (translog_status == TRANSLOG_READONLY ?
+ TRANSLOG_UNINITED :
+ TRANSLOG_SHUTDOWN);
+ if (log_descriptor.bc.buffer->file != NULL)
+ translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
+ translog_unlock();
+
+ for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
+ {
+ struct st_translog_buffer *buffer= log_descriptor.buffers + i;
+ translog_buffer_destroy(buffer);
+ }
+ translog_status= TRANSLOG_UNINITED;
+
+ /* close files */
+ while ((file= (TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files)))
+ translog_close_log_file(*file);
+ pthread_mutex_destroy(&log_descriptor.sent_to_disk_lock);
+ pthread_mutex_destroy(&log_descriptor.file_header_lock);
+ pthread_mutex_destroy(&log_descriptor.unfinished_files_lock);
+ pthread_mutex_destroy(&log_descriptor.purger_lock);
+ pthread_mutex_destroy(&log_descriptor.log_flush_lock);
+ rwlock_destroy(&log_descriptor.open_files_lock);
+ delete_dynamic(&log_descriptor.open_files);
+ delete_dynamic(&log_descriptor.unfinished_files);
+
+ my_close(log_descriptor.directory_fd, MYF(MY_WME));
+ my_atomic_rwlock_destroy(&LOCK_id_to_share);
+ my_free((uchar*)(id_to_share + 1), MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ @brief Starts new page.
+
+ @param horizon \ Position in file and buffer where we are
+ @param cursor /
+ @param prev_buffer Buffer which should be flushed will be assigned here.
+ This is always set (to NULL if nothing to flush).
+
+ @note We do not want to flush the buffer immediately because we want to
+ let caller of this function first advance 'horizon' pointer and unlock the
+ loghandler and only then flush the log which can take some time.
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ struct st_translog_buffer **prev_buffer)
+{
+ struct st_translog_buffer *buffer= cursor->buffer;
+ DBUG_ENTER("translog_page_next");
+
+ if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
+ cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
+ (LSN_OFFSET(*horizon) >
+ log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
+ {
+ DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d "
+ "File size: %lu max: %lu => %d",
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer),
+ (cursor->ptr + TRANSLOG_PAGE_SIZE >
+ cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
+ (ulong) LSN_OFFSET(*horizon),
+ (ulong) log_descriptor.log_file_max_size,
+ (LSN_OFFSET(*horizon) >
+ (log_descriptor.log_file_max_size -
+ TRANSLOG_PAGE_SIZE))));
+ if (translog_buffer_next(horizon, cursor,
+ LSN_OFFSET(*horizon) >
+ (log_descriptor.log_file_max_size -
+ TRANSLOG_PAGE_SIZE)))
+ DBUG_RETURN(1);
+ *prev_buffer= buffer;
+ DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): "
+ "Buffer Size: %lu (%lu)",
+ (uint) buffer->buffer_no,
+ (ulong) buffer,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ translog_finish_page(horizon, cursor);
+ translog_new_page_header(horizon, cursor);
+ *prev_buffer= NULL;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write data of given length to the current page
+
+ SYNOPSIS
+ translog_write_data_on_page()
+ horizon \ Pointers on file and buffer
+ cursor /
+ length IN length of the chunk
+ buffer buffer with data
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ translog_size_t length,
+ uchar *buffer)
+{
+ DBUG_ENTER("translog_write_data_on_page");
+ DBUG_PRINT("enter", ("Chunk length: %lu Page size %u",
+ (ulong) length, (uint) cursor->current_page_fill));
+ DBUG_ASSERT(length > 0);
+ DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
+ TRANSLOG_WRITE_BUFFER);
+
+ memcpy(cursor->ptr, buffer, length);
+ cursor->ptr+= length;
+ (*horizon)+= length; /* adds offset */
+ cursor->current_page_fill+= length;
+ if (!cursor->chaser)
+ cursor->buffer->size+= length;
+ DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx "
+ "chaser: %d Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ translog_check_cursor(cursor);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write data from parts of given length to the current page
+
+ SYNOPSIS
+ translog_write_parts_on_page()
+ horizon \ Pointers on file and buffer
+ cursor /
+ length IN length of the chunk
+ parts IN/OUT chunk source
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ translog_size_t length,
+ struct st_translog_parts *parts)
+{
+ translog_size_t left= length;
+ uint cur= (uint) parts->current;
+ DBUG_ENTER("translog_write_parts_on_page");
+ DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u "
+ "Buffer size: %lu (%lu)",
+ (ulong) length,
+ (uint) (cur + 1), (uint) parts->elements,
+ (uint) cursor->current_page_fill,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ DBUG_ASSERT(length > 0);
+ DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
+ TRANSLOG_WRITE_BUFFER);
+
+ do
+ {
+ translog_size_t len;
+ LEX_STRING *part;
+ uchar *buff;
+
+ DBUG_ASSERT(cur < parts->elements);
+ part= parts->parts + cur;
+ buff= (uchar*) part->str;
+ DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: 0x%lx",
+ (uint) (cur + 1), (ulong) part->length, (ulong) left,
+ (ulong) buff));
+
+ if (part->length > left)
+ {
+ /* we should write less then the current part */
+ len= left;
+ part->length-= len;
+ part->str+= len;
+ DBUG_PRINT("info", ("Set new part: %u Length: %lu",
+ (uint) (cur + 1), (ulong) part->length));
+ }
+ else
+ {
+ len= part->length;
+ cur++;
+ DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
+ }
+ DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u",
+ (ulong) cursor->ptr, (ulong)buff, (uint)len));
+ if (likely(len))
+ {
+ memcpy(cursor->ptr, buff, len);
+ left-= len;
+ cursor->ptr+= len;
+ }
+ } while (left);
+
+ DBUG_PRINT("info", ("Horizon: (%lu,0x%lx) Length %lu(0x%lx)",
+ LSN_IN_PARTS(*horizon),
+ (ulong) length, (ulong) length));
+ parts->current= cur;
+ (*horizon)+= length; /* offset increasing */
+ cursor->current_page_fill+= length;
+ if (!cursor->chaser)
+ cursor->buffer->size+= length;
+ /*
+ We do not not updating parts->total_record_length here because it is
+ need only before writing record to have total length
+ */
+ DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx "
+ "chaser: %d Size: %lu (%lu) "
+ "Horizon: (%lu,0x%lx) buff offset: 0x%lx",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer),
+ LSN_IN_PARTS(*horizon),
+ (ulong) (LSN_OFFSET(cursor->buffer->offset) +
+ cursor->buffer->size)));
+ translog_check_cursor(cursor);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Put 1 group chunk type 0 header into parts array
+
+ SYNOPSIS
+ translog_write_variable_record_1group_header()
+ parts Descriptor of record source parts
+ type The log record type
+ short_trid Short transaction ID or 0 if it has no sense
+ header_length Calculated header length of chunk type 0
+ chunk0_header Buffer for the chunk header writing
+*/
+
+static void
+translog_write_variable_record_1group_header(struct st_translog_parts *parts,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ uint16 header_length,
+ uchar *chunk0_header)
+{
+ LEX_STRING *part;
+ DBUG_ASSERT(parts->current != 0); /* first part is left for header */
+ part= parts->parts + (--parts->current);
+ parts->total_record_length+= (part->length= header_length);
+ part->str= (char*)chunk0_header;
+ /* puts chunk type */
+ *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
+ int2store(chunk0_header + 1, short_trid);
+ /* puts record length */
+ translog_write_variable_record_1group_code_len(chunk0_header + 3,
+ parts->record_length,
+ header_length);
+ /* puts 0 as chunk length which indicate 1 group record */
+ int2store(chunk0_header + header_length - 2, 0);
+}
+
+
+/*
+ Increase number of writers for this buffer
+
+ SYNOPSIS
+ translog_buffer_increase_writers()
+ buffer target buffer
+*/
+
+static inline void
+translog_buffer_increase_writers(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_increase_writers");
+ translog_buffer_lock_assert_owner(buffer);
+ buffer->copy_to_buffer_in_progress++;
+ DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx progress: %d",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->copy_to_buffer_in_progress));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Decrease number of writers for this buffer
+
+ SYNOPSIS
+ translog_buffer_decrease_writers()
+ buffer target buffer
+*/
+
+
+static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_decrease_writers");
+ translog_buffer_lock_assert_owner(buffer);
+ buffer->copy_to_buffer_in_progress--;
+ DBUG_PRINT("info",
+ ("copy_to_buffer_in_progress. Buffer #%u 0x%lx progress: %d",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->copy_to_buffer_in_progress));
+ if (buffer->copy_to_buffer_in_progress == 0)
+ pthread_cond_broadcast(&buffer->waiting_filling_buffer);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Put chunk 2 from new page beginning
+
+ SYNOPSIS
+ translog_write_variable_record_chunk2_page()
+ parts Descriptor of record source parts
+ horizon \ Pointers on file position and buffer
+ cursor /
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
+ TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ struct st_translog_buffer *buffer_to_flush;
+ int rc;
+ uchar chunk2_header[1];
+ DBUG_ENTER("translog_write_variable_record_chunk2_page");
+ chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
+
+ LINT_INIT(buffer_to_flush);
+ rc= translog_page_next(horizon, cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+
+ /* Puts chunk type */
+ translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
+ /* Puts chunk body */
+ translog_write_parts_on_page(horizon, cursor,
+ log_descriptor.page_capacity_chunk_2, parts);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Put chunk 3 of requested length in the buffer from new page beginning
+
+ SYNOPSIS
+ translog_write_variable_record_chunk3_page()
+ parts Descriptor of record source parts
+ length Length of this chunk
+ horizon \ Pointers on file position and buffer
+ cursor /
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
+ uint16 length,
+ TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ struct st_translog_buffer *buffer_to_flush;
+ LEX_STRING *part;
+ int rc;
+ uchar chunk3_header[1 + 2];
+ DBUG_ENTER("translog_write_variable_record_chunk3_page");
+
+ LINT_INIT(buffer_to_flush);
+ rc= translog_page_next(horizon, cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+ if (length == 0)
+ {
+ /* It was call to write page header only (no data for chunk 3) */
+ DBUG_PRINT("info", ("It is a call to make page header only"));
+ DBUG_RETURN(0);
+ }
+
+ DBUG_ASSERT(parts->current != 0); /* first part is left for header */
+ part= parts->parts + (--parts->current);
+ parts->total_record_length+= (part->length= 1 + 2);
+ part->str= (char*)chunk3_header;
+ /* Puts chunk type */
+ *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
+ /* Puts chunk length */
+ int2store(chunk3_header + 1, length);
+
+ translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
+ DBUG_RETURN(0);
+}
+
+/*
+ Move log pointer (horizon) on given number pages starting from next page,
+ and given offset on the last page
+
+ SYNOPSIS
+ translog_advance_pointer()
+ pages Number of full pages starting from the next one
+ last_page_data Plus this data on the last page
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_advance_pointer(uint pages, uint16 last_page_data)
+{
+ translog_size_t last_page_offset= (log_descriptor.page_overhead +
+ last_page_data);
+ translog_size_t offset= (TRANSLOG_PAGE_SIZE -
+ log_descriptor.bc.current_page_fill +
+ pages * TRANSLOG_PAGE_SIZE + last_page_offset);
+ translog_size_t buffer_end_offset, file_end_offset, min_offset;
+ DBUG_ENTER("translog_advance_pointer");
+ DBUG_PRINT("enter", ("Pointer: (%lu, 0x%lx) + %u + %u pages + %u + %u",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ (uint) (TRANSLOG_PAGE_SIZE -
+ log_descriptor.bc.current_page_fill),
+ pages, (uint) log_descriptor.page_overhead,
+ (uint) last_page_data));
+ translog_lock_assert_owner();
+
+ /*
+ The loop will be executed 1-3 times. Usually we advance the
+ pointer to fill only the current buffer (if we have more then 1/2 of
+ buffer free or 2 buffers (rest of current and all next). In case of
+ really huge record end where we write last group with "table of
+ content" of all groups and ignore buffer borders we can occupy
+ 3 buffers.
+ */
+ for (;;)
+ {
+ uint8 new_buffer_no;
+ struct st_translog_buffer *new_buffer;
+ struct st_translog_buffer *old_buffer;
+ buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
+ file_end_offset= (log_descriptor.log_file_max_size -
+ LSN_OFFSET(log_descriptor.horizon));
+ DBUG_PRINT("info", ("offset: %lu buffer_end_offs: %lu, "
+ "file_end_offs: %lu",
+ (ulong) offset, (ulong) buffer_end_offset,
+ (ulong) file_end_offset));
+ DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = "
+ "0x%lx (0x%lx)",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset),
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
+ log_descriptor.bc.buffer->size),
+ (ulong) LSN_OFFSET(log_descriptor.horizon)));
+ DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
+ log_descriptor.bc.buffer->size ==
+ LSN_OFFSET(log_descriptor.horizon));
+
+ if (offset <= buffer_end_offset && offset <= file_end_offset)
+ break;
+ old_buffer= log_descriptor.bc.buffer;
+ new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ new_buffer= log_descriptor.buffers + new_buffer_no;
+
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+
+ min_offset= min(buffer_end_offset, file_end_offset);
+ /* TODO: check is it ptr or size enough */
+ log_descriptor.bc.buffer->size+= min_offset;
+ log_descriptor.bc.ptr+= min_offset;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
+ buffer->buffer)));
+ DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
+ log_descriptor.bc.buffer->buffer) ==
+ log_descriptor.bc.buffer->size);
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ translog_buffer_increase_writers(log_descriptor.bc.buffer);
+
+ if (file_end_offset <= buffer_end_offset)
+ {
+ log_descriptor.horizon+= LSN_ONE_FILE;
+ log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
+ TRANSLOG_PAGE_SIZE);
+ DBUG_PRINT("info", ("New file: %lu",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon)));
+ if (translog_create_new_file())
+ {
+ DBUG_RETURN(1);
+ }
+ }
+ else
+ {
+ DBUG_PRINT("info", ("The same file"));
+ log_descriptor.horizon+= min_offset; /* offset increasing */
+ }
+ translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
+ old_buffer->next_buffer_offset= new_buffer->offset;
+ if (translog_buffer_unlock(old_buffer))
+ DBUG_RETURN(1);
+ offset-= min_offset;
+ }
+ log_descriptor.bc.ptr+= offset;
+ log_descriptor.bc.buffer->size+= offset;
+ translog_buffer_increase_writers(log_descriptor.bc.buffer);
+ log_descriptor.horizon+= offset; /* offset increasing */
+ log_descriptor.bc.current_page_fill= last_page_offset;
+ DBUG_PRINT("info", ("drop write_counter"));
+ log_descriptor.bc.write_counter= 0;
+ log_descriptor.bc.previous_offset= 0;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) "
+ "offset: %u last page: %u",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -
+ log_descriptor.bc.buffer->
+ buffer), (uint) offset,
+ (uint) last_page_offset));
+ DBUG_PRINT("info",
+ ("pointer moved to: (%lu, 0x%lx)",
+ LSN_IN_PARTS(log_descriptor.horizon)));
+ translog_check_cursor(&log_descriptor.bc);
+ log_descriptor.bc.protected= 0;
+ DBUG_RETURN(0);
+}
+
+
+
+/*
+ Get page rest
+
+ SYNOPSIS
+ translog_get_current_page_rest()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ number of bytes left on the current page
+*/
+
+static uint translog_get_current_page_rest()
+{
+ return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
+}
+
+
+/*
+ Get buffer rest in full pages
+
+ SYNOPSIS
+ translog_get_current_buffer_rest()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ number of full pages left on the current buffer
+*/
+
+static uint translog_get_current_buffer_rest()
+{
+ return ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
+ log_descriptor.bc.ptr) /
+ TRANSLOG_PAGE_SIZE);
+}
+
+/*
+ Calculate possible group size without first (current) page
+
+ SYNOPSIS
+ translog_get_current_group_size()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ group size without first (current) page
+*/
+
+static translog_size_t translog_get_current_group_size()
+{
+ /* buffer rest in full pages */
+ translog_size_t buffer_rest= translog_get_current_buffer_rest();
+ DBUG_ENTER("translog_get_current_group_size");
+ DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
+
+ buffer_rest*= log_descriptor.page_capacity_chunk_2;
+ /* in case of only half of buffer free we can write this and next buffer */
+ if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
+ {
+ DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
+ (ulong) buffer_rest,
+ (ulong) log_descriptor.buffer_capacity_chunk_2));
+ buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
+ }
+
+ DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
+
+ DBUG_RETURN(buffer_rest);
+}
+
+
+/**
+ @brief Write variable record in 1 group.
+
+ @param lsn LSN of the record will be written here
+ @param type the log record type
+ @param short_trid Short transaction ID or 0 if it has no sense
+ @param parts Descriptor of record source parts
+ @param buffer_to_flush Buffer which have to be flushed if it is not 0
+ @param header_length Calculated header length of chunk type 0
+ @param trn Transaction structure pointer for hooks by
+ record log type, for short_id
+ @param hook_arg Argument which will be passed to pre-write and
+ in-write hooks of this record.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_1group(LSN *lsn,
+ enum translog_record_type type,
+ MARIA_HA *tbl_info,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush, uint16 header_length,
+ TRN *trn, void *hook_arg)
+{
+ TRANSLOG_ADDRESS horizon;
+ struct st_buffer_cursor cursor;
+ int rc= 0;
+ uint i;
+ translog_size_t record_rest, full_pages, first_page;
+ uint additional_chunk3_page= 0;
+ uchar chunk0_header[1 + 2 + 5 + 2];
+ DBUG_ENTER("translog_write_variable_record_1group");
+ translog_lock_assert_owner();
+
+ *lsn= horizon= log_descriptor.horizon;
+ if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
+ *lsn, TRUE) ||
+ (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
+ lsn, hook_arg)))
+ {
+ translog_unlock();
+ DBUG_RETURN(1);
+ }
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+
+ /* Advance pointer to be able unlock the loghandler */
+ first_page= translog_get_current_page_rest();
+ record_rest= parts->record_length - (first_page - header_length);
+ full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
+ record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
+
+ if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
+ {
+ DBUG_PRINT("info", ("2 chunks type 3 is needed"));
+ /* We will write 2 chunks type 3 at the end of this group */
+ additional_chunk3_page= 1;
+ record_rest= 1;
+ }
+
+ DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) "
+ "additional: %u (%u) rest %u = %u",
+ first_page, first_page - header_length,
+ full_pages,
+ (ulong) full_pages *
+ log_descriptor.page_capacity_chunk_2,
+ additional_chunk3_page,
+ additional_chunk3_page *
+ (log_descriptor.page_capacity_chunk_2 - 1),
+ record_rest, parts->record_length));
+ /* record_rest + 3 is chunk type 3 overhead + record_rest */
+ rc|= translog_advance_pointer(full_pages + additional_chunk3_page,
+ (record_rest ? record_rest + 3 : 0));
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+
+ rc|= translog_unlock();
+
+ /*
+ Check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+
+ translog_write_variable_record_1group_header(parts, type, short_trid,
+ header_length, chunk0_header);
+
+ /* fill the pages */
+ translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
+
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon)));
+
+ for (i= 0; i < full_pages; i++)
+ {
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ DBUG_RETURN(1);
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon)));
+ }
+
+ if (additional_chunk3_page)
+ {
+ if (translog_write_variable_record_chunk3_page(parts,
+ log_descriptor.
+ page_capacity_chunk_2 - 2,
+ &horizon, &cursor))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon)));
+ DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
+ }
+
+ if (translog_write_variable_record_chunk3_page(parts,
+ record_rest,
+ &horizon, &cursor))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon)));
+
+ if (!(rc= translog_buffer_lock(cursor.buffer)))
+ {
+ translog_buffer_decrease_writers(cursor.buffer);
+ }
+ rc|= translog_buffer_unlock(cursor.buffer);
+ DBUG_RETURN(rc);
+}
+
+
+/**
+ @brief Write variable record in 1 chunk.
+
+ @param lsn LSN of the record will be written here
+ @param type the log record type
+ @param short_trid Short transaction ID or 0 if it has no sense
+ @param parts Descriptor of record source parts
+ @param buffer_to_flush Buffer which have to be flushed if it is not 0
+ @param header_length Calculated header length of chunk type 0
+ @param trn Transaction structure pointer for hooks by
+ record log type, for short_id
+ @param hook_arg Argument which will be passed to pre-write and
+ in-write hooks of this record.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_1chunk(LSN *lsn,
+ enum translog_record_type type,
+ MARIA_HA *tbl_info,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush, uint16 header_length,
+ TRN *trn, void *hook_arg)
+{
+ int rc;
+ uchar chunk0_header[1 + 2 + 5 + 2];
+ DBUG_ENTER("translog_write_variable_record_1chunk");
+ translog_lock_assert_owner();
+
+ translog_write_variable_record_1group_header(parts, type, short_trid,
+ header_length, chunk0_header);
+
+ *lsn= log_descriptor.horizon;
+ if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
+ *lsn, TRUE) ||
+ (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
+ lsn, hook_arg)))
+ {
+ translog_unlock();
+ DBUG_RETURN(1);
+ }
+
+ rc= translog_write_parts_on_page(&log_descriptor.horizon,
+ &log_descriptor.bc,
+ parts->total_record_length, parts);
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+ rc|= translog_unlock();
+
+ /*
+ check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ @brief Calculates and write LSN difference (compressed LSN).
+
+ @param base_lsn LSN from which we calculate difference
+ @param lsn LSN for codding
+ @param dst Result will be written to dst[-pack_length] .. dst[-1]
+
+ @note To store an LSN in a compact way we will use the following compression:
+ If a log record has LSN1, and it contains the LSN2 as a back reference,
+ Instead of LSN2 we write LSN1-LSN2, encoded as:
+ two bits the number N (see below)
+ 14 bits
+ N bytes
+ That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
+ is stored in the first two bits.
+
+ @note function made to write the result in backward direction with no
+ special sense or tricks both directions are equal in complicity
+
+ @retval # pointer on coded LSN
+*/
+
+static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
+{
+ uint64 diff;
+ DBUG_ENTER("translog_put_LSN_diff");
+ DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx) val: (0x%lu,0x%lx) dst: 0x%lx",
+ LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
+ (ulong) dst));
+ DBUG_ASSERT(base_lsn > lsn);
+ diff= base_lsn - lsn;
+ DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
+ if (diff <= 0x3FFF)
+ {
+ dst-= 2;
+ /*
+ Note we store this high uchar first to ensure that first uchar has
+ 0 in the 3 upper bits.
+ */
+ dst[0]= diff >> 8;
+ dst[1]= (diff & 0xFF);
+ }
+ else if (diff <= 0x3FFFFFL)
+ {
+ dst-= 3;
+ dst[0]= 0x40 | (diff >> 16);
+ int2store(dst + 1, diff & 0xFFFF);
+ }
+ else if (diff <= 0x3FFFFFFFL)
+ {
+ dst-= 4;
+ dst[0]= 0x80 | (diff >> 24);
+ int3store(dst + 1, diff & 0xFFFFFFL);
+ }
+ else if (diff <= LL(0x3FFFFFFFFF))
+
+ {
+ dst-= 5;
+ dst[0]= 0xC0 | (diff >> 32);
+ int4store(dst + 1, diff & 0xFFFFFFFFL);
+ }
+ else
+ {
+ /*
+ It is full LSN after special 1 diff (which is impossible
+ in real life)
+ */
+ dst-= 2 + LSN_STORE_SIZE;
+ dst[0]= 0;
+ dst[1]= 1;
+ lsn_store(dst + 2, lsn);
+ }
+ DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst));
+ DBUG_RETURN(dst);
+}
+
+
+/*
+ Get LSN from LSN-difference (compressed LSN)
+
+ SYNOPSIS
+ translog_get_LSN_from_diff()
+ base_lsn LSN from which we calculate difference
+ src pointer to coded lsn
+ dst pointer to buffer where to write 7byte LSN
+
+ NOTE:
+ To store an LSN in a compact way we will use the following compression:
+
+ If a log record has LSN1, and it contains the lSN2 as a back reference,
+ Instead of LSN2 we write LSN1-LSN2, encoded as:
+
+ two bits the number N (see below)
+ 14 bits
+ N bytes
+
+ That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
+ is stored in the first two bits.
+
+ RETURN
+ pointer to buffer after decoded LSN
+*/
+
+static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
+{
+ LSN lsn;
+ uint32 diff;
+ uint32 first_byte;
+ uint32 file_no, rec_offset;
+ uint8 code;
+ DBUG_ENTER("translog_get_LSN_from_diff");
+ DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx) src: 0x%lx dst 0x%lx",
+ LSN_IN_PARTS(base_lsn), (ulong) src, (ulong) dst));
+ first_byte= *((uint8*) src);
+ code= first_byte >> 6; /* Length is in 2 most significant bits */
+ first_byte&= 0x3F;
+ src++; /* Skip length + encode */
+ file_no= LSN_FILE_NO(base_lsn); /* Assume relative */
+ DBUG_PRINT("info", ("code: %u first byte: %lu",
+ (uint) code, (ulong) first_byte));
+ switch (code) {
+ case 0:
+ if (first_byte == 0 && *((uint8*)src) == 1)
+ {
+ /*
+ It is full LSN after special 1 diff (which is impossible
+ in real life)
+ */
+ memcpy(dst, src + 1, LSN_STORE_SIZE);
+ DBUG_PRINT("info", ("Special case of full LSN, new src: 0x%lx",
+ (ulong) (src + 1 + LSN_STORE_SIZE)));
+ DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
+ }
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
+ break;
+ case 1:
+ diff= uint2korr(src);
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
+ break;
+ case 2:
+ diff= uint3korr(src);
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
+ break;
+ case 3:
+ {
+ ulonglong base_offset= LSN_OFFSET(base_lsn);
+ diff= uint4korr(src);
+ if (diff > LSN_OFFSET(base_lsn))
+ {
+ /* take 1 from file offset */
+ first_byte++;
+ base_offset+= LL(0x100000000);
+ }
+ file_no= LSN_FILE_NO(base_lsn) - first_byte;
+ rec_offset= base_offset - diff;
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ DBUG_RETURN(NULL);
+ }
+ lsn= MAKE_LSN(file_no, rec_offset);
+ src+= code + 1;
+ lsn_store(dst, lsn);
+ DBUG_PRINT("info", ("new src: 0x%lx", (ulong) src));
+ DBUG_RETURN(src);
+}
+
+
+/**
+ @brief Encodes relative LSNs listed in the parameters.
+
+ @param parts Parts list with encoded LSN(s)
+ @param base_lsn LSN which is base for encoding
+ @param lsns number of LSN(s) to encode
+ @param compressed_LSNs buffer which can be used for storing compressed LSN(s)
+*/
+
+static void translog_relative_LSN_encode(struct st_translog_parts *parts,
+ LSN base_lsn,
+ uint lsns, uchar *compressed_LSNs)
+{
+ LEX_STRING *part;
+ uint lsns_len= lsns * LSN_STORE_SIZE;
+ char buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
+ char *buffer= buffer_src;
+
+ DBUG_ENTER("translog_relative_LSN_encode");
+
+ DBUG_ASSERT(parts->current != 0);
+ part= parts->parts + parts->current;
+
+ /* collect all LSN(s) in one chunk if it (they) is (are) divided */
+ if (part->length < lsns_len)
+ {
+ uint copied= part->length;
+ LEX_STRING *next_part;
+ DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs));
+ memcpy(buffer, (uchar*)part->str, part->length);
+ next_part= parts->parts + parts->current + 1;
+ do
+ {
+ DBUG_ASSERT(next_part < parts->parts + parts->elements);
+ if ((next_part->length + copied) < lsns_len)
+ {
+ memcpy(buffer + copied, (uchar*)next_part->str,
+ next_part->length);
+ copied+= next_part->length;
+ next_part->length= 0; next_part->str= 0;
+ /* delete_dynamic_element(&parts->parts, parts->current + 1); */
+ next_part++;
+ parts->current++;
+ part= parts->parts + parts->current;
+ }
+ else
+ {
+ uint len= lsns_len - copied;
+ memcpy(buffer + copied, (uchar*)next_part->str, len);
+ copied= lsns_len;
+ next_part->str+= len;
+ next_part->length-= len;
+ }
+ } while (copied < lsns_len);
+ }
+ else
+ {
+ buffer= part->str;
+ part->str+= lsns_len;
+ part->length-= lsns_len;
+ parts->current--;
+ part= parts->parts + parts->current;
+ }
+
+ {
+ /* Compress */
+ LSN ref;
+ int economy;
+ uchar *src_ptr;
+ uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
+ COMPRESSED_LSN_MAX_STORE_SIZE);
+ /*
+ We write the result in backward direction with no special sense or
+ tricks both directions are equal in complicity
+ */
+ for (src_ptr= buffer + lsns_len - LSN_STORE_SIZE;
+ src_ptr >= (uchar*) buffer;
+ src_ptr-= LSN_STORE_SIZE)
+ {
+ ref= lsn_korr(src_ptr);
+ dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
+ }
+ part->length= (uint)((compressed_LSNs +
+ (MAX_NUMBER_OF_LSNS_PER_RECORD *
+ COMPRESSED_LSN_MAX_STORE_SIZE)) -
+ dst_ptr);
+ parts->record_length-= (economy= lsns_len - part->length);
+ DBUG_PRINT("info", ("new length of LSNs: %lu economy: %d",
+ (ulong)part->length, economy));
+ parts->total_record_length-= economy;
+ part->str= (char*)dst_ptr;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Write multi-group variable-size record.
+
+ @param lsn LSN of the record will be written here
+ @param type the log record type
+ @param short_trid Short transaction ID or 0 if it has no sense
+ @param parts Descriptor of record source parts
+ @param buffer_to_flush Buffer which have to be flushed if it is not 0
+ @param header_length Header length calculated for 1 group
+ @param buffer_rest Beginning from which we plan to write in full pages
+ @param trn Transaction structure pointer for hooks by
+ record log type, for short_id
+ @param hook_arg Argument which will be passed to pre-write and
+ in-write hooks of this record.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_mgroup(LSN *lsn,
+ enum translog_record_type type,
+ MARIA_HA *tbl_info,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush,
+ uint16 header_length,
+ translog_size_t buffer_rest,
+ TRN *trn, void *hook_arg)
+{
+ TRANSLOG_ADDRESS horizon;
+ struct st_buffer_cursor cursor;
+ int rc= 0;
+ uint i, chunk2_page, full_pages;
+ uint curr_group= 0;
+ translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
+ translog_size_t done= 0;
+ struct st_translog_group_descriptor group;
+ DYNAMIC_ARRAY groups;
+ uint16 chunk3_size;
+ uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
+ uint16 last_page_capacity;
+ my_bool new_page_before_chunk0= 1, first_chunk0= 1;
+ uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
+ uchar chunk2_header[1];
+ uint header_fixed_part= header_length + 2;
+ uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
+ uint file_of_the_first_group;
+ DBUG_ENTER("translog_write_variable_record_mgroup");
+ translog_lock_assert_owner();
+
+ chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
+
+ if (my_init_dynamic_array(&groups,
+ sizeof(struct st_translog_group_descriptor),
+ 10, 10))
+ {
+ translog_unlock();
+ DBUG_PRINT("error", ("init array failed"));
+ DBUG_RETURN(1);
+ }
+
+ first_page= translog_get_current_page_rest();
+ record_rest= parts->record_length - (first_page - 1);
+ DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
+
+ if (record_rest < buffer_rest)
+ {
+ /*
+ The record (group 1 type) is larger than the free space on the page
+ - we need to split it in two. But when we split it in two, the first
+ part is big enough to hold all the data of the record (because the
+ header of the first part of the split is smaller than the header of
+ the record as a whole when it takes only one chunk)
+ */
+ DBUG_PRINT("info", ("too many free space because changing header"));
+ buffer_rest-= log_descriptor.page_capacity_chunk_2;
+ DBUG_ASSERT(record_rest >= buffer_rest);
+ }
+
+ file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
+ translog_mark_file_unfinished(file_of_the_first_group);
+ do
+ {
+ group.addr= horizon= log_descriptor.horizon;
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+ if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
+ {
+ /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
+ full_pages= 255;
+ buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
+ }
+ /*
+ group chunks =
+ full pages + first page (which actually can be full, too).
+ But here we assign number of chunks - 1
+ */
+ group.num= full_pages;
+ if (insert_dynamic(&groups, (uchar*) &group))
+ {
+ DBUG_PRINT("error", ("insert into array failed"));
+ goto err_unlock;
+ }
+
+ DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) "
+ "full_pages: %lu (%lu) "
+ "Left %lu",
+ groups.elements,
+ first_page, first_page - 1,
+ (ulong) full_pages,
+ (ulong) (full_pages *
+ log_descriptor.page_capacity_chunk_2),
+ (ulong)(parts->record_length - (first_page - 1 +
+ buffer_rest) -
+ done)));
+ rc|= translog_advance_pointer(full_pages, 0);
+
+ rc|= translog_unlock();
+
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ DBUG_PRINT("error", ("flush of unlock buffer failed"));
+ goto err;
+ }
+
+ translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
+ translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left %lu",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ done)));
+
+ for (i= 0; i < full_pages; i++)
+ {
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ goto err;
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) "
+ "local: (%lu,0x%lx) "
+ "Left: %lu",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ i * log_descriptor.page_capacity_chunk_2 -
+ done)));
+ }
+
+ done+= (first_page - 1 + buffer_rest);
+
+ /* TODO: make separate function for following */
+ rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ DBUG_PRINT("error", ("flush of unlock buffer failed"));
+ goto err;
+ }
+ rc= translog_buffer_lock(cursor.buffer);
+ if (!rc)
+ translog_buffer_decrease_writers(cursor.buffer);
+ rc|= translog_buffer_unlock(cursor.buffer);
+ if (rc)
+ goto err;
+
+ translog_lock();
+
+ first_page= translog_get_current_page_rest();
+ buffer_rest= translog_get_current_group_size();
+ } while (first_page + buffer_rest < (uint) (parts->record_length - done));
+
+ group.addr= horizon= log_descriptor.horizon;
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+ group.num= 0; /* 0 because it does not matter */
+ if (insert_dynamic(&groups, (uchar*) &group))
+ {
+ DBUG_PRINT("error", ("insert into array failed"));
+ goto err_unlock;
+ }
+ record_rest= parts->record_length - done;
+ DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
+ if (first_page <= record_rest + 1)
+ {
+ chunk2_page= 1;
+ record_rest-= (first_page - 1);
+ full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
+ record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
+ last_page_capacity= page_capacity;
+ }
+ else
+ {
+ chunk2_page= full_pages= 0;
+ last_page_capacity= first_page;
+ }
+ chunk3_size= 0;
+ chunk3_pages= 0;
+ if (last_page_capacity > record_rest + 1 && record_rest != 0)
+ {
+ if (last_page_capacity >
+ record_rest + header_fixed_part + groups.elements * (7 + 1))
+ {
+ /* 1 record of type 0 */
+ chunk3_pages= 0;
+ }
+ else
+ {
+ chunk3_pages= 1;
+ if (record_rest + 2 == last_page_capacity)
+ {
+ chunk3_size= record_rest - 1;
+ record_rest= 1;
+ }
+ else
+ {
+ chunk3_size= record_rest;
+ record_rest= 0;
+ }
+ }
+ }
+ /*
+ A first non-full page will hold type 0 chunk only if it fit in it with
+ all its headers
+ */
+ while (page_capacity <
+ record_rest + header_fixed_part +
+ (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
+ chunk0_pages++;
+ DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u "
+ "Group on last page: %u",
+ chunk0_pages, groups.elements,
+ groups_per_page,
+ (groups.elements -
+ ((page_capacity - header_fixed_part) / (7 + 1)) *
+ (chunk0_pages - 1))));
+ DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) "
+ "chunk3: %u (%u) rest: %u",
+ first_page,
+ chunk2_page, full_pages,
+ (ulong) full_pages *
+ log_descriptor.page_capacity_chunk_2,
+ chunk3_pages, (uint) chunk3_size, (uint) record_rest));
+ rc= translog_advance_pointer(full_pages + chunk3_pages +
+ (chunk0_pages - 1),
+ record_rest + header_fixed_part +
+ (groups.elements -
+ ((page_capacity -
+ header_fixed_part) / (7 + 1)) *
+ (chunk0_pages - 1)) * (7 + 1));
+ rc|= translog_unlock();
+ if (rc)
+ goto err;
+
+ if (chunk2_page)
+ {
+ DBUG_PRINT("info", ("chunk 2 to finish first page"));
+ translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
+ translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left: %lu",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ done)));
+ }
+ else if (chunk3_pages)
+ {
+ DBUG_PRINT("info", ("chunk 3"));
+ DBUG_ASSERT(full_pages == 0);
+ uchar chunk3_header[3];
+ chunk3_pages= 0;
+ chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
+ int2store(chunk3_header + 1, chunk3_size);
+ translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
+ translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left: %lu",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon),
+ (ulong) (parts->record_length - chunk3_size - done)));
+ }
+ else
+ {
+ DBUG_PRINT("info", ("no new_page_before_chunk0"));
+ new_page_before_chunk0= 0;
+ }
+
+ for (i= 0; i < full_pages; i++)
+ {
+ DBUG_ASSERT(chunk2_page != 0);
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ goto err;
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left: %lu",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ i * log_descriptor.page_capacity_chunk_2 -
+ done)));
+ }
+
+ if (chunk3_pages &&
+ translog_write_variable_record_chunk3_page(parts,
+ chunk3_size,
+ &horizon, &cursor))
+ goto err;
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ LSN_IN_PARTS(log_descriptor.horizon),
+ LSN_IN_PARTS(horizon)));
+
+ *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN);
+ int2store(chunk0_header + 1, short_trid);
+ translog_write_variable_record_1group_code_len(chunk0_header + 3,
+ parts->record_length,
+ header_length);
+ do
+ {
+ int limit;
+ if (new_page_before_chunk0)
+ {
+ rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ DBUG_PRINT("error", ("flush of unlock buffer failed"));
+ goto err;
+ }
+ }
+ new_page_before_chunk0= 1;
+
+ if (first_chunk0)
+ {
+ first_chunk0= 0;
+ *lsn= horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
+ tbl_info,
+ lsn, hook_arg))
+ goto err;
+ }
+
+ /*
+ A first non-full page will hold type 0 chunk only if it fit in it with
+ all its headers => the fist page is full or number of groups less then
+ possible number of full page.
+ */
+ limit= (groups_per_page < groups.elements - curr_group ?
+ groups_per_page : groups.elements - curr_group);
+ DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u",
+ (uint) groups.elements, (uint) curr_group,
+ (uint) limit));
+
+ if (chunk0_pages == 1)
+ {
+ DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
+ (uint) limit, (uint) record_rest,
+ (uint) (2 + limit * (7 + 1) + record_rest)));
+ int2store(chunk0_header + header_length - 2,
+ 2 + limit * (7 + 1) + record_rest);
+ }
+ else
+ {
+ DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
+ (uint) limit, (uint) (2 + limit * (7 + 1))));
+ int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
+ }
+ int2store(chunk0_header + header_length, groups.elements - curr_group);
+ translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
+ chunk0_header);
+ for (i= curr_group; i < limit + curr_group; i++)
+ {
+ struct st_translog_group_descriptor *grp_ptr;
+ grp_ptr= dynamic_element(&groups, i,
+ struct st_translog_group_descriptor *);
+ lsn_store(group_desc, grp_ptr->addr);
+ group_desc[7]= grp_ptr->num;
+ translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
+ }
+
+ if (chunk0_pages == 1 && record_rest != 0)
+ translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
+
+ chunk0_pages--;
+ curr_group+= limit;
+
+ } while (chunk0_pages != 0);
+ rc= translog_buffer_lock(cursor.buffer);
+ if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0)
+ cursor.buffer->last_lsn= *lsn;
+ translog_buffer_decrease_writers(cursor.buffer);
+ rc|= translog_buffer_unlock(cursor.buffer);
+
+ if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
+ *lsn, FALSE))
+ goto err;
+
+ translog_mark_file_finished(file_of_the_first_group);
+
+ delete_dynamic(&groups);
+ DBUG_RETURN(rc);
+
+err_unlock:
+
+ translog_unlock();
+
+err:
+
+ translog_mark_file_finished(file_of_the_first_group);
+
+ delete_dynamic(&groups);
+ DBUG_RETURN(1);
+}
+
+
+/**
+ @brief Write the variable length log record.
+
+ @param lsn LSN of the record will be written here
+ @param type the log record type
+ @param short_trid Short transaction ID or 0 if it has no sense
+ @param parts Descriptor of record source parts
+ @param trn Transaction structure pointer for hooks by
+ record log type, for short_id
+ @param hook_arg Argument which will be passed to pre-write and
+ in-write hooks of this record.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_write_variable_record(LSN *lsn,
+ enum translog_record_type type,
+ MARIA_HA *tbl_info,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ TRN *trn, void *hook_arg)
+{
+ struct st_translog_buffer *buffer_to_flush= NULL;
+ uint header_length1= 1 + 2 + 2 +
+ translog_variable_record_length_bytes(parts->record_length);
+ ulong buffer_rest;
+ uint page_rest;
+ /* Max number of such LSNs per record is 2 */
+ uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
+ COMPRESSED_LSN_MAX_STORE_SIZE];
+ my_bool res;
+ DBUG_ENTER("translog_write_variable_record");
+
+ translog_lock();
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
+ LSN_IN_PARTS(log_descriptor.horizon)));
+ page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
+ DBUG_PRINT("info", ("header length: %u page_rest: %u",
+ header_length1, page_rest));
+
+ /*
+ header and part which we should read have to fit in one chunk
+ TODO: allow to divide readable header
+ */
+ if (page_rest <
+ (header_length1 + log_record_type_descriptor[type].read_header_len))
+ {
+ DBUG_PRINT("info",
+ ("Next page, size: %u header: %u + %u",
+ log_descriptor.bc.current_page_fill,
+ header_length1,
+ log_record_type_descriptor[type].read_header_len));
+ translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
+ &buffer_to_flush);
+ /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
+ page_rest= log_descriptor.page_capacity_chunk_2 + 1;
+ DBUG_PRINT("info", ("page_rest: %u", page_rest));
+ }
+
+ /*
+ To minimize compressed size we will compress always relative to
+ very first chunk address (log_descriptor.horizon for now)
+ */
+ if (log_record_type_descriptor[type].compressed_LSN > 0)
+ {
+ translog_relative_LSN_encode(parts, log_descriptor.horizon,
+ log_record_type_descriptor[type].
+ compressed_LSN, compressed_LSNs);
+ /* recalculate header length after compression */
+ header_length1= 1 + 2 + 2 +
+ translog_variable_record_length_bytes(parts->record_length);
+ DBUG_PRINT("info", ("after compressing LSN(s) header length: %u "
+ "record length: %lu",
+ header_length1, (ulong)parts->record_length));
+ }
+
+ /* TODO: check space on current page for header + few bytes */
+ if (page_rest >= parts->record_length + header_length1)
+ {
+ /* following function makes translog_unlock(); */
+ res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
+ short_trid,
+ parts, buffer_to_flush,
+ header_length1, trn, hook_arg);
+ DBUG_RETURN(res);
+ }
+
+ buffer_rest= translog_get_current_group_size();
+
+ if (buffer_rest >= parts->record_length + header_length1 - page_rest)
+ {
+ /* following function makes translog_unlock(); */
+ res= translog_write_variable_record_1group(lsn, type, tbl_info,
+ short_trid,
+ parts, buffer_to_flush,
+ header_length1, trn, hook_arg);
+ DBUG_RETURN(res);
+ }
+ /* following function makes translog_unlock(); */
+ res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
+ short_trid,
+ parts, buffer_to_flush,
+ header_length1,
+ buffer_rest, trn, hook_arg);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Write the fixed and pseudo-fixed log record.
+
+ @param lsn LSN of the record will be written here
+ @param type the log record type
+ @param short_trid Short transaction ID or 0 if it has no sense
+ @param parts Descriptor of record source parts
+ @param trn Transaction structure pointer for hooks by
+ record log type, for short_id
+ @param hook_arg Argument which will be passed to pre-write and
+ in-write hooks of this record.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool translog_write_fixed_record(LSN *lsn,
+ enum translog_record_type type,
+ MARIA_HA *tbl_info,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ TRN *trn, void *hook_arg)
+{
+ struct st_translog_buffer *buffer_to_flush= NULL;
+ uchar chunk1_header[1 + 2];
+ /* Max number of such LSNs per record is 2 */
+ uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
+ COMPRESSED_LSN_MAX_STORE_SIZE];
+ LEX_STRING *part;
+ int rc;
+ DBUG_ENTER("translog_write_fixed_record");
+ DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
+ LOGRECTYPE_FIXEDLENGTH &&
+ parts->record_length ==
+ log_record_type_descriptor[type].fixed_length) ||
+ (log_record_type_descriptor[type].rclass ==
+ LOGRECTYPE_PSEUDOFIXEDLENGTH &&
+ parts->record_length ==
+ log_record_type_descriptor[type].fixed_length));
+
+ translog_lock();
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
+ LSN_IN_PARTS(log_descriptor.horizon)));
+
+ DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
+ DBUG_PRINT("info",
+ ("Page size: %u record: %u next cond: %d",
+ log_descriptor.bc.current_page_fill,
+ (parts->record_length +
+ log_record_type_descriptor[type].compressed_LSN * 2 + 3),
+ ((((uint) log_descriptor.bc.current_page_fill) +
+ (parts->record_length +
+ log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
+ TRANSLOG_PAGE_SIZE)));
+ /*
+ check that there is enough place on current page.
+ NOTE: compressing may increase page LSN size on two bytes for every LSN
+ */
+ if ((((uint) log_descriptor.bc.current_page_fill) +
+ (parts->record_length +
+ log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
+ TRANSLOG_PAGE_SIZE)
+ {
+ DBUG_PRINT("info", ("Next page"));
+ translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
+ &buffer_to_flush);
+ }
+
+ *lsn= log_descriptor.horizon;
+ if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
+ *lsn, TRUE) ||
+ (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook) (type, trn, tbl_info,
+ lsn, hook_arg)))
+ {
+ rc= 1;
+ goto err;
+ }
+
+ /* compress LSNs */
+ if (log_record_type_descriptor[type].rclass ==
+ LOGRECTYPE_PSEUDOFIXEDLENGTH)
+ {
+ DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
+ translog_relative_LSN_encode(parts, *lsn,
+ log_record_type_descriptor[type].
+ compressed_LSN, compressed_LSNs);
+ }
+
+ /*
+ Write the whole record at once (we know that there is enough place on
+ the destination page)
+ */
+ DBUG_ASSERT(parts->current != 0); /* first part is left for header */
+ part= parts->parts + (--parts->current);
+ parts->total_record_length+= (part->length= 1 + 2);
+ part->str= (char*)chunk1_header;
+ *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
+ int2store(chunk1_header + 1, short_trid);
+
+ rc= translog_write_parts_on_page(&log_descriptor.horizon,
+ &log_descriptor.bc,
+ parts->total_record_length, parts);
+
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+
+err:
+ rc|= translog_unlock();
+
+ /*
+ check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+
+/**
+ @brief Writes the log record
+
+ If share has no 2-byte-id yet, gives an id to the share and logs
+ LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
+ yet, logs it.
+
+ @param lsn LSN of the record will be written here
+ @param type the log record type
+ @param trn Transaction structure pointer for hooks by
+ record log type, for short_id
+ @param tbl_info MARIA_HA of table or NULL
+ @param rec_len record length or 0 (count it)
+ @param part_no number of parts or 0 (count it)
+ @param parts_data zero ended (in case of number of parts is 0)
+ array of LEX_STRINGs (parts), first
+ TRANSLOG_INTERNAL_PARTS positions in the log
+ should be unused (need for loghandler)
+ @param store_share_id if tbl_info!=NULL then share's id will
+ automatically be stored in the two first bytes
+ pointed (so pointer is assumed to be !=NULL)
+ @param hook_arg argument which will be passed to pre-write and
+ in-write hooks of this record.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool translog_write_record(LSN *lsn,
+ enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ translog_size_t rec_len,
+ uint part_no,
+ LEX_STRING *parts_data,
+ uchar *store_share_id,
+ void *hook_arg)
+{
+ struct st_translog_parts parts;
+ LEX_STRING *part;
+ int rc;
+ uint short_trid= trn->short_id;
+ DBUG_ENTER("translog_write_record");
+ DBUG_PRINT("enter", ("type: %u ShortTrID: %u rec_len: %lu",
+ (uint) type, (uint) short_trid, (ulong) rec_len));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+ if (unlikely(translog_status != TRANSLOG_OK))
+ {
+ DBUG_PRINT("error", ("Transaction log is write protected"));
+ DBUG_RETURN(1);
+ }
+
+
+ if (tbl_info)
+ {
+ MARIA_SHARE *share= tbl_info->s;
+ DBUG_ASSERT(share->now_transactional);
+ if (unlikely(share->id == 0))
+ {
+ /*
+ First log write for this MARIA_SHARE; give it a short id.
+ When the lock manager is enabled and needs a short id, it should be
+ assigned in the lock manager (because row locks will be taken before
+ log records are written; for example SELECT FOR UPDATE takes locks but
+ writes no log record.
+ */
+ if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
+ DBUG_RETURN(1);
+ }
+ fileid_store(store_share_id, share->id);
+ }
+ if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
+ {
+ LSN dummy_lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ uchar log_data[6];
+ int6store(log_data, trn->trid);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
+ if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
+ trn, NULL, sizeof(log_data),
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, NULL, NULL)))
+ DBUG_RETURN(1);
+ }
+
+ parts.parts= parts_data;
+
+ /* count parts if they are not counted by upper level */
+ if (part_no == 0)
+ {
+ for (part_no= TRANSLOG_INTERNAL_PARTS;
+ parts_data[part_no].length != 0;
+ part_no++);
+ }
+ parts.elements= part_no;
+ parts.current= TRANSLOG_INTERNAL_PARTS;
+
+ /* clear TRANSLOG_INTERNAL_PARTS */
+ compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
+ parts_data[0].str= 0;
+ parts_data[0].length= 0;
+
+ /* count length of the record */
+ if (rec_len == 0)
+ {
+ for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
+ part < parts_data + part_no;
+ part++)
+ {
+ rec_len+= part->length;
+ }
+ }
+ parts.record_length= rec_len;
+
+#ifndef DBUG_OFF
+ {
+ uint i;
+ uint len= 0;
+#ifdef HAVE_purify
+ ha_checksum checksum= 0;
+#endif
+ for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
+ {
+#ifdef HAVE_purify
+ /* Find unitialized bytes early */
+ checksum+= my_checksum(checksum, parts_data[i].str,
+ parts_data[i].length);
+#endif
+ len+= parts_data[i].length;
+ }
+ DBUG_ASSERT(len == rec_len);
+ }
+#endif
+ /*
+ Start total_record_length from record_length then overhead will
+ be add
+ */
+ parts.total_record_length= parts.record_length;
+ DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
+
+ /* process this parts */
+ if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
+ (*log_record_type_descriptor[type].prewrite_hook) (type, trn,
+ tbl_info,
+ hook_arg))))
+ {
+ switch (log_record_type_descriptor[type].rclass) {
+ case LOGRECTYPE_VARIABLE_LENGTH:
+ rc= translog_write_variable_record(lsn, type, tbl_info,
+ short_trid, &parts, trn, hook_arg);
+ break;
+ case LOGRECTYPE_PSEUDOFIXEDLENGTH:
+ case LOGRECTYPE_FIXEDLENGTH:
+ rc= translog_write_fixed_record(lsn, type, tbl_info,
+ short_trid, &parts, trn, hook_arg);
+ break;
+ case LOGRECTYPE_NOT_ALLOWED:
+ default:
+ DBUG_ASSERT(0);
+ rc= 1;
+ }
+ }
+
+ DBUG_PRINT("info", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(*lsn)));
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Decode compressed (relative) LSN(s)
+
+ SYNOPSIS
+ translog_relative_lsn_decode()
+ base_lsn LSN for encoding
+ src Decode LSN(s) from here
+ dst Put decoded LSNs here
+ lsns number of LSN(s)
+
+ RETURN
+ position in sources after decoded LSN(s)
+*/
+
+static uchar *translog_relative_LSN_decode(LSN base_lsn,
+ uchar *src, uchar *dst, uint lsns)
+{
+ uint i;
+ for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
+ {
+ src= translog_get_LSN_from_diff(base_lsn, src, dst);
+ }
+ return src;
+}
+
+/**
+ @brief Get header of fixed/pseudo length record and call hook for
+ it processing
+
+ @param page Pointer to the buffer with page where LSN chunk is
+ placed
+ @param page_offset Offset of the first chunk in the page
+ @param buff Buffer to be filled with header data
+
+ @return Length of header or operation status
+ @retval # number of bytes in TRANSLOG_HEADER_BUFFER::header where
+ stored decoded part of the header
+*/
+
+static int translog_fixed_length_header(uchar *page,
+ translog_size_t page_offset,
+ TRANSLOG_HEADER_BUFFER *buff)
+{
+ struct st_log_record_type_descriptor *desc=
+ log_record_type_descriptor + buff->type;
+ uchar *src= page + page_offset + 3;
+ uchar *dst= buff->header;
+ uchar *start= src;
+ uint lsns= desc->compressed_LSN;
+ uint length= desc->fixed_length;
+
+ DBUG_ENTER("translog_fixed_length_header");
+
+ buff->record_length= length;
+
+ if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
+ {
+ DBUG_ASSERT(lsns > 0);
+ src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
+ lsns*= LSN_STORE_SIZE;
+ dst+= lsns;
+ length-= lsns;
+ buff->compressed_LSN_economy= (lsns - (src - start));
+ }
+ else
+ buff->compressed_LSN_economy= 0;
+
+ memcpy(dst, src, length);
+ buff->non_header_data_start_offset= page_offset +
+ ((src + length) - (page + page_offset));
+ buff->non_header_data_len= 0;
+ DBUG_RETURN(buff->record_length);
+}
+
+
+/*
+ Free resources used by TRANSLOG_HEADER_BUFFER
+
+ SYNOPSIS
+ translog_free_record_header();
+*/
+
+void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
+{
+ DBUG_ENTER("translog_free_record_header");
+ if (buff->groups_no != 0)
+ {
+ my_free((uchar*) buff->groups, MYF(0));
+ buff->groups_no= 0;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Returns the current horizon at the end of the current log
+
+ @return Horizon
+ @retval LSN_ERROR error
+ @retvar # Horizon
+*/
+
+TRANSLOG_ADDRESS translog_get_horizon()
+{
+ TRANSLOG_ADDRESS res;
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+ translog_lock();
+ res= log_descriptor.horizon;
+ translog_unlock();
+ return res;
+}
+
+
+/**
+ @brief Returns the current horizon at the end of the current log, caller is
+ assumed to already hold the lock
+
+ @return Horizon
+ @retval LSN_ERROR error
+ @retvar # Horizon
+*/
+
+TRANSLOG_ADDRESS translog_get_horizon_no_lock()
+{
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+ translog_lock_assert_owner();
+ return log_descriptor.horizon;
+}
+
+
+/*
+ Set last page in the scanner data structure
+
+ SYNOPSIS
+ translog_scanner_set_last_page()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
+{
+ my_bool page_ok;
+ if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
+ {
+ /* It is last file => we can easy find last page address by horizon */
+ uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
+ scanner->last_file_page= (scanner->horizon -
+ (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
+ return (0);
+ }
+ scanner->last_file_page= scanner->page_addr;
+ return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok));
+}
+
+
+/**
+ @brief Get page from page cache according to requested method
+
+ @param scanner The scanner data
+
+ @return operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool
+translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
+{
+ TRANSLOG_VALIDATOR_DATA data;
+ DBUG_ENTER("translog_scanner_get_page");
+ data.addr= &scanner->page_addr;
+ data.was_recovered= 0;
+ DBUG_RETURN((scanner->page=
+ translog_get_page(&data, scanner->buffer,
+ (scanner->use_direct_link ?
+ &scanner->direct_link :
+ NULL))) ==
+ NULL);
+}
+
+
+/**
+ @brief Initialize reader scanner.
+
+ @param lsn LSN with which it have to be inited
+ @param fixed_horizon true if it is OK do not read records which was written
+ after scanning beginning
+ @param scanner scanner which have to be inited
+ @param use_direct prefer using direct lings from page handler
+ where it is possible.
+
+ @note If direct link was used translog_destroy_scanner should be
+ called after it using
+
+ @return status of the operation
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool translog_scanner_init(LSN lsn,
+ my_bool fixed_horizon,
+ TRANSLOG_SCANNER_DATA *scanner,
+ my_bool use_direct)
+{
+ TRANSLOG_VALIDATOR_DATA data;
+ DBUG_ENTER("translog_scanner_init");
+ DBUG_PRINT("enter", ("Scanner: 0x%lx LSN: (0x%lu,0x%lx)",
+ (ulong) scanner, LSN_IN_PARTS(lsn)));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ data.addr= &scanner->page_addr;
+ data.was_recovered= 0;
+
+ scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
+
+ scanner->fixed_horizon= fixed_horizon;
+ scanner->use_direct_link= use_direct;
+ scanner->direct_link= NULL;
+
+ scanner->horizon= translog_get_horizon();
+ DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)",
+ LSN_IN_PARTS(scanner->horizon)));
+
+ /* lsn < horizon */
+ DBUG_ASSERT(lsn <= scanner->horizon);
+
+ scanner->page_addr= lsn;
+ scanner->page_addr-= scanner->page_offset; /*decrease offset */
+
+ if (translog_scanner_set_last_page(scanner))
+ DBUG_RETURN(1);
+
+ if (translog_scanner_get_page(scanner))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Destroy scanner object;
+
+ @param scanner The scanner object to destroy
+*/
+
+void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_destroy_scanner");
+ DBUG_PRINT("enter", ("Scanner: 0x%lx", (ulong)scanner));
+ translog_free_link(scanner->direct_link);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Checks End of the Log
+
+ SYNOPSIS
+ translog_scanner_eol()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 1 End of the Log
+ 0 OK
+*/
+
+static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_scanner_eol");
+ DBUG_PRINT("enter",
+ ("Horizon: (%lu, 0x%lx) Current: (%lu, 0x%lx+0x%x=0x%lx)",
+ LSN_IN_PARTS(scanner->horizon),
+ LSN_IN_PARTS(scanner->page_addr),
+ (uint) scanner->page_offset,
+ (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
+ if (scanner->horizon > (scanner->page_addr +
+ scanner->page_offset))
+ {
+ DBUG_PRINT("info", ("Horizon is not reached"));
+ DBUG_RETURN(0);
+ }
+ if (scanner->fixed_horizon)
+ {
+ DBUG_PRINT("info", ("Horizon is fixed and reached"));
+ DBUG_RETURN(1);
+ }
+ scanner->horizon= translog_get_horizon();
+ DBUG_PRINT("info",
+ ("Horizon is re-read, EOL: %d",
+ scanner->horizon <= (scanner->page_addr +
+ scanner->page_offset)));
+ DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
+ scanner->page_offset));
+}
+
+
+/**
+ @brief Cheks End of the Page
+
+ @param scanner Information about current chunk during scanning
+
+ @retval 1 End of the Page
+ @retval 0 OK
+*/
+
+static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_scanner_eop");
+ DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
+ scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
+}
+
+
+/**
+ @brief Checks End of the File (i.e. we are scanning last page, which do not
+ mean end of this page)
+
+ @param scanner Information about current chunk during scanning
+
+ @retval 1 End of the File
+ @retval 0 OK
+*/
+
+static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_scanner_eof");
+ DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
+ LSN_FILE_NO(scanner->last_file_page));
+ DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx "
+ "normal EOF: %d",
+ (ulong) LSN_OFFSET(scanner->page_addr),
+ (ulong) LSN_OFFSET(scanner->last_file_page),
+ LSN_OFFSET(scanner->page_addr) ==
+ LSN_OFFSET(scanner->last_file_page)));
+ /*
+ TODO: detect damaged file EOF,
+ TODO: issue warning if damaged file EOF detected
+ */
+ DBUG_RETURN(scanner->page_addr ==
+ scanner->last_file_page);
+}
+
+/*
+ Move scanner to the next chunk
+
+ SYNOPSIS
+ translog_get_next_chunk()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
+{
+ uint16 len;
+ DBUG_ENTER("translog_get_next_chunk");
+
+ if (translog_scanner_eop(scanner))
+ len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
+ else if ((len= translog_get_total_chunk_length(scanner->page,
+ scanner->page_offset)) == 0)
+ DBUG_RETURN(1);
+ scanner->page_offset+= len;
+
+ if (translog_scanner_eol(scanner))
+ {
+ scanner->page= END_OF_LOG;
+ scanner->page_offset= 0;
+ DBUG_RETURN(0);
+ }
+ if (translog_scanner_eop(scanner))
+ {
+ /* before reading next page we should unpin current one if it was pinned */
+ translog_free_link(scanner->direct_link);
+ if (translog_scanner_eof(scanner))
+ {
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx) pageaddr: (%lu,0x%lx)",
+ LSN_IN_PARTS(scanner->horizon),
+ LSN_IN_PARTS(scanner->page_addr)));
+ /* if it is log end it have to be caught before */
+ DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
+ LSN_FILE_NO(scanner->page_addr));
+ scanner->page_addr+= LSN_ONE_FILE;
+ scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
+ TRANSLOG_PAGE_SIZE);
+ if (translog_scanner_set_last_page(scanner))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
+ }
+
+ if (translog_scanner_get_page(scanner))
+ DBUG_RETURN(1);
+
+ scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
+ if (translog_scanner_eol(scanner))
+ {
+ scanner->page= END_OF_LOG;
+ scanner->page_offset= 0;
+ DBUG_RETURN(0);
+ }
+ DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Get header of variable length record and call hook for it processing
+
+ @param page Pointer to the buffer with page where LSN chunk is
+ placed
+ @param page_offset Offset of the first chunk in the page
+ @param buff Buffer to be filled with header data
+ @param scanner If present should be moved to the header page if
+ it differ from LSN page
+
+ @return Length of header or operation status
+ @retval RECHEADER_READ_ERROR error
+ @retval RECHEADER_READ_EOF End of the log reached during the read
+ @retval # number of bytes in
+ TRANSLOG_HEADER_BUFFER::header where
+ stored decoded part of the header
+*/
+
+static int
+translog_variable_length_header(uchar *page, translog_size_t page_offset,
+ TRANSLOG_HEADER_BUFFER *buff,
+ TRANSLOG_SCANNER_DATA *scanner)
+{
+ struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
+ buff->type);
+ uchar *src= page + page_offset + 1 + 2;
+ uchar *dst= buff->header;
+ LSN base_lsn;
+ uint lsns= desc->compressed_LSN;
+ uint16 chunk_len;
+ uint16 length= desc->read_header_len;
+ uint16 buffer_length= length;
+ uint16 body_len;
+ TRANSLOG_SCANNER_DATA internal_scanner;
+ DBUG_ENTER("translog_variable_length_header");
+
+ buff->record_length= translog_variable_record_1group_decode_len(&src);
+ chunk_len= uint2korr(src);
+ DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u",
+ (ulong) buff->record_length, (uint) chunk_len,
+ (uint) length, (uint) buffer_length));
+ if (chunk_len == 0)
+ {
+ uint16 page_rest;
+ DBUG_PRINT("info", ("1 group"));
+ src+= 2;
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+
+ base_lsn= buff->lsn;
+ body_len= min(page_rest, buff->record_length);
+ }
+ else
+ {
+ uint grp_no, curr;
+ uint header_to_skip;
+ uint16 page_rest;
+
+ DBUG_PRINT("info", ("multi-group"));
+ grp_no= buff->groups_no= uint2korr(src + 2);
+ if (!(buff->groups=
+ (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no,
+ MYF(0))))
+ DBUG_RETURN(RECHEADER_READ_ERROR);
+ DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
+ src+= (2 + 2);
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ curr= 0;
+ header_to_skip= src - (page + page_offset);
+ buff->chunk0_pages= 0;
+
+ for (;;)
+ {
+ uint i, read_length= grp_no;
+
+ buff->chunk0_pages++;
+ if (page_rest < grp_no * (7 + 1))
+ read_length= page_rest / (7 + 1);
+ DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u "
+ "start from: %u",
+ buff->chunk0_pages, read_length, grp_no, curr));
+ for (i= 0; i < read_length; i++, curr++)
+ {
+ DBUG_ASSERT(curr < buff->groups_no);
+ buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
+ buff->groups[curr].num= src[i * (7 + 1) + 7];
+ DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks: %u",
+ curr,
+ LSN_IN_PARTS(buff->groups[curr].addr),
+ (uint) buff->groups[curr].num));
+ }
+ grp_no-= read_length;
+ if (grp_no == 0)
+ {
+ if (scanner)
+ {
+ buff->chunk0_data_addr= scanner->page_addr;
+ /* offset increased */
+ buff->chunk0_data_addr+= (page_offset + header_to_skip +
+ read_length * (7 + 1));
+ }
+ else
+ {
+ buff->chunk0_data_addr= buff->lsn;
+ /* offset increased */
+ buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
+ }
+ buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
+ DBUG_PRINT("info", ("Data address: (%lu,0x%lx) len: %u",
+ LSN_IN_PARTS(buff->chunk0_data_addr),
+ buff->chunk0_data_len));
+ break;
+ }
+ if (scanner == NULL)
+ {
+ DBUG_PRINT("info", ("use internal scanner for header reading"));
+ scanner= &internal_scanner;
+ if (translog_scanner_init(buff->lsn, 1, scanner, 0))
+ DBUG_RETURN(RECHEADER_READ_ERROR);
+ }
+ if (translog_get_next_chunk(scanner))
+ {
+ if (scanner == &internal_scanner)
+ translog_destroy_scanner(scanner);
+ DBUG_RETURN(RECHEADER_READ_ERROR);
+ }
+ if (scanner->page == END_OF_LOG)
+ {
+ if (scanner == &internal_scanner)
+ translog_destroy_scanner(scanner);
+ DBUG_RETURN(RECHEADER_READ_EOF);
+ }
+ page= scanner->page;
+ page_offset= scanner->page_offset;
+ src= page + page_offset + header_to_skip;
+ chunk_len= uint2korr(src - 2 - 2);
+ DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ }
+
+ if (scanner == NULL)
+ {
+ DBUG_PRINT("info", ("use internal scanner"));
+ scanner= &internal_scanner;
+ }
+ else
+ {
+ translog_destroy_scanner(scanner);
+ }
+ base_lsn= buff->groups[0].addr;
+ translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
+ /* first group chunk is always chunk type 2 */
+ page= scanner->page;
+ page_offset= scanner->page_offset;
+ src= page + page_offset + 1;
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ body_len= page_rest;
+ if (scanner == &internal_scanner)
+ translog_destroy_scanner(scanner);
+ }
+ if (lsns)
+ {
+ uchar *start= src;
+ src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
+ lsns*= LSN_STORE_SIZE;
+ dst+= lsns;
+ length-= lsns;
+ buff->record_length+= (buff->compressed_LSN_economy=
+ (lsns - (src - start)));
+ DBUG_PRINT("info", ("lsns: %u length: %u economy: %d new length: %lu",
+ lsns / LSN_STORE_SIZE, (uint) length,
+ (int) buff->compressed_LSN_economy,
+ (ulong) buff->record_length));
+ body_len-= (src - start);
+ }
+ else
+ buff->compressed_LSN_economy= 0;
+
+ DBUG_ASSERT(body_len >= length);
+ body_len-= length;
+ memcpy(dst, src, length);
+ buff->non_header_data_start_offset= src + length - page;
+ buff->non_header_data_len= body_len;
+ DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u",
+ buff->non_header_data_start_offset,
+ buff->non_header_data_len, buffer_length));
+ DBUG_RETURN(buffer_length);
+}
+
+
+/**
+ @brief Read record header from the given buffer
+
+ @param page page content buffer
+ @param page_offset offset of the chunk in the page
+ @param buff destination buffer
+ @param scanner If this is set the scanner will be moved to the
+ record header page (differ from LSN page in case of
+ multi-group records)
+
+ @return Length of header or operation status
+ @retval RECHEADER_READ_ERROR error
+ @retval # number of bytes in
+ TRANSLOG_HEADER_BUFFER::header where
+ stored decoded part of the header
+*/
+
+int translog_read_record_header_from_buffer(uchar *page,
+ uint16 page_offset,
+ TRANSLOG_HEADER_BUFFER *buff,
+ TRANSLOG_SCANNER_DATA *scanner)
+{
+ translog_size_t res;
+ DBUG_ENTER("translog_read_record_header_from_buffer");
+ DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
+ TRANSLOG_CHUNK_LSN ||
+ (page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
+ TRANSLOG_CHUNK_FIXED);
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+ buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
+ buff->short_trid= uint2korr(page + page_offset + 1);
+ DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN (%lu,0x%lx)",
+ (uint) buff->type, (uint)buff->short_trid,
+ LSN_IN_PARTS(buff->lsn)));
+ /* Read required bytes from the header and call hook */
+ switch (log_record_type_descriptor[buff->type].rclass) {
+ case LOGRECTYPE_VARIABLE_LENGTH:
+ res= translog_variable_length_header(page, page_offset, buff,
+ scanner);
+ break;
+ case LOGRECTYPE_PSEUDOFIXEDLENGTH:
+ case LOGRECTYPE_FIXEDLENGTH:
+ res= translog_fixed_length_header(page, page_offset, buff);
+ break;
+ default:
+ DBUG_ASSERT(0); /* we read some junk (got no LSN) */
+ res= RECHEADER_READ_ERROR;
+ }
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Read record header and some fixed part of a record (the part depend
+ on record type).
+
+ @param lsn log record serial number (address of the record)
+ @param buff log record header buffer
+
+ @note Some type of record can be read completely by this call
+ @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
+ LSN can be translated to absolute one), some fields can be added (like
+ actual header length in the record if the header has variable length)
+
+ @return Length of header or operation status
+ @retval RECHEADER_READ_ERROR error
+ @retval # number of bytes in
+ TRANSLOG_HEADER_BUFFER::header where
+ stored decoded part of the header
+*/
+
+int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
+{
+ uchar buffer[TRANSLOG_PAGE_SIZE], *page;
+ translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
+ PAGECACHE_BLOCK_LINK *direct_link;
+ TRANSLOG_ADDRESS addr;
+ TRANSLOG_VALIDATOR_DATA data;
+ DBUG_ENTER("translog_read_record_header");
+ DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", LSN_IN_PARTS(lsn)));
+ DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ buff->lsn= lsn;
+ buff->groups_no= 0;
+ data.addr= &addr;
+ data.was_recovered= 0;
+ addr= lsn;
+ addr-= page_offset; /* offset decreasing */
+ res= (!(page= translog_get_page(&data, buffer, &direct_link))) ?
+ RECHEADER_READ_ERROR :
+ translog_read_record_header_from_buffer(page, page_offset, buff, 0);
+ translog_free_link(direct_link);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Read record header and some fixed part of a record (the part depend
+ on record type).
+
+ @param scan scanner position to read
+ @param buff log record header buffer
+ @param move_scanner request to move scanner to the header position
+
+ @note Some type of record can be read completely by this call
+ @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
+ LSN can be translated to absolute one), some fields can be added (like
+ actual header length in the record if the header has variable length)
+
+ @return Length of header or operation status
+ @retval RECHEADER_READ_ERROR error
+ @retval # number of bytes in
+ TRANSLOG_HEADER_BUFFER::header where stored
+ decoded part of the header
+*/
+
+int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
+ TRANSLOG_HEADER_BUFFER *buff,
+ my_bool move_scanner)
+{
+ translog_size_t res;
+ DBUG_ENTER("translog_read_record_header_scan");
+ DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
+ "Lst: (%lu,0x%lx) Offset: %u(%x) fixed %d",
+ LSN_IN_PARTS(scanner->page_addr),
+ LSN_IN_PARTS(scanner->horizon),
+ LSN_IN_PARTS(scanner->last_file_page),
+ (uint) scanner->page_offset,
+ (uint) scanner->page_offset, scanner->fixed_horizon));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+ buff->groups_no= 0;
+ buff->lsn= scanner->page_addr;
+ buff->lsn+= scanner->page_offset; /* offset increasing */
+ res= translog_read_record_header_from_buffer(scanner->page,
+ scanner->page_offset,
+ buff,
+ (move_scanner ?
+ scanner : 0));
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Read record header and some fixed part of the next record (the part
+ depend on record type).
+
+ @param scanner data for scanning if lsn is NULL scanner data
+ will be used for continue scanning.
+ The scanner can be NULL.
+
+ @param buff log record header buffer
+
+ @return Length of header or operation status
+ @retval RECHEADER_READ_ERROR error
+ @retval RECHEADER_READ_EOF EOF
+ @retval # number of bytes in
+ TRANSLOG_HEADER_BUFFER::header where
+ stored decoded part of the header
+*/
+
+int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
+ TRANSLOG_HEADER_BUFFER *buff)
+{
+ uint8 chunk_type;
+ translog_size_t res;
+ buff->groups_no= 0; /* to be sure that we will free it right */
+
+ DBUG_ENTER("translog_read_next_record_header");
+ DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
+ DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
+ "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d",
+ LSN_IN_PARTS(scanner->page_addr),
+ LSN_IN_PARTS(scanner->horizon),
+ LSN_IN_PARTS(scanner->last_file_page),
+ (uint) scanner->page_offset,
+ (uint) scanner->page_offset, scanner->fixed_horizon));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ do
+ {
+ if (translog_get_next_chunk(scanner))
+ DBUG_RETURN(RECHEADER_READ_ERROR);
+ if (scanner->page == END_OF_LOG)
+ {
+ DBUG_PRINT("info", ("End of file from the scanner"));
+ /* Last record was read */
+ buff->lsn= LSN_IMPOSSIBLE;
+ DBUG_RETURN(RECHEADER_READ_EOF);
+ }
+ chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("Page: (%lu,0x%lx) offset: %lu type: %x byte: %x",
+ LSN_IN_PARTS(scanner->page_addr),
+ (ulong) scanner->page_offset,
+ (uint) chunk_type,
+ (uint) scanner->page[scanner->page_offset]));
+ } while (chunk_type != TRANSLOG_CHUNK_LSN &&
+ chunk_type != TRANSLOG_CHUNK_FIXED &&
+ scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
+
+ if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
+ {
+ DBUG_PRINT("info", ("End of file"));
+ /* Last record was read */
+ buff->lsn= LSN_IMPOSSIBLE;
+ /* Return 'end of log' marker */
+ res= RECHEADER_READ_EOF;
+ }
+ else
+ res= translog_read_record_header_scan(scanner, buff, 0);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Moves record data reader to the next chunk and fill the data reader
+ information about that chunk.
+
+ SYNOPSIS
+ translog_record_read_next_chunk()
+ data data cursor
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
+{
+ translog_size_t new_current_offset= data->current_offset + data->chunk_size;
+ uint16 chunk_header_len, chunk_len;
+ uint8 type;
+ DBUG_ENTER("translog_record_read_next_chunk");
+
+ if (data->eor)
+ {
+ DBUG_PRINT("info", ("end of the record flag set"));
+ DBUG_RETURN(1);
+ }
+
+ if (data->header.groups_no &&
+ data->header.groups_no - 1 != data->current_group &&
+ data->header.groups[data->current_group].num == data->current_chunk)
+ {
+ /* Goto next group */
+ data->current_group++;
+ data->current_chunk= 0;
+ DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
+ translog_destroy_scanner(&data->scanner);
+ translog_scanner_init(data->header.groups[data->current_group].addr,
+ 1, &data->scanner, 1);
+ }
+ else
+ {
+ data->current_chunk++;
+ if (translog_get_next_chunk(&data->scanner))
+ DBUG_RETURN(1);
+ if (data->scanner.page == END_OF_LOG)
+ {
+ /*
+ Actually it should not happened, but we want to quit nicely in case
+ of a truncated log
+ */
+ DBUG_RETURN(1);
+ }
+ }
+ type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+
+ if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
+ {
+ DBUG_PRINT("info",
+ ("Last chunk: data len: %u offset: %u group: %u of %u",
+ data->header.chunk0_data_len, data->scanner.page_offset,
+ data->current_group, data->header.groups_no - 1));
+ DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
+ DBUG_ASSERT(data->header.lsn ==
+ data->scanner.page_addr + data->scanner.page_offset);
+ translog_destroy_scanner(&data->scanner);
+ translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
+ data->chunk_size= data->header.chunk0_data_len;
+ data->body_offset= data->scanner.page_offset;
+ data->current_offset= new_current_offset;
+ data->eor= 1;
+ DBUG_RETURN(0);
+ }
+
+ if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
+ {
+ data->eor= 1;
+ DBUG_RETURN(1); /* End of record */
+ }
+
+ chunk_header_len=
+ translog_get_chunk_header_length(data->scanner.page +
+ data->scanner.page_offset);
+ chunk_len= translog_get_total_chunk_length(data->scanner.page,
+ data->scanner.page_offset);
+ data->chunk_size= chunk_len - chunk_header_len;
+ data->body_offset= data->scanner.page_offset + chunk_header_len;
+ data->current_offset= new_current_offset;
+ DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u "
+ "current_offset: %lu",
+ (uint) data->current_group,
+ (uint) data->current_chunk,
+ (uint) data->body_offset,
+ (uint) data->chunk_size, (ulong) data->current_offset));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Initialize record reader data from LSN
+
+ SYNOPSIS
+ translog_init_reader_data()
+ lsn reference to LSN we should start from
+ data reader data to initialize
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_init_reader_data(LSN lsn,
+ TRANSLOG_READER_DATA *data)
+{
+ int read_header;
+ DBUG_ENTER("translog_init_reader_data");
+ if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
+ ((read_header=
+ translog_read_record_header_scan(&data->scanner, &data->header, 1))
+ == RECHEADER_READ_ERROR))
+ DBUG_RETURN(1);
+ data->read_header= read_header;
+ data->body_offset= data->header.non_header_data_start_offset;
+ data->chunk_size= data->header.non_header_data_len;
+ data->current_offset= data->read_header;
+ data->current_group= 0;
+ data->current_chunk= 0;
+ data->eor= 0;
+ DBUG_PRINT("info", ("read_header: %u "
+ "body_offset: %u chunk_size: %u current_offset: %lu",
+ (uint) data->read_header,
+ (uint) data->body_offset,
+ (uint) data->chunk_size, (ulong) data->current_offset));
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Destroy reader data object
+*/
+
+static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
+{
+ translog_destroy_scanner(&data->scanner);
+}
+
+
+/*
+ Read a part of the record.
+
+ SYNOPSIS
+ translog_read_record_header()
+ lsn log record serial number (address of the record)
+ offset From the beginning of the record beginning (read
+ by translog_read_record_header).
+ length Length of record part which have to be read.
+ buffer Buffer where to read the record part (have to be at
+ least 'length' bytes length)
+
+ RETURN
+ length of data actually read
+*/
+
+translog_size_t translog_read_record(LSN lsn,
+ translog_size_t offset,
+ translog_size_t length,
+ uchar *buffer,
+ TRANSLOG_READER_DATA *data)
+{
+ translog_size_t requested_length= length;
+ translog_size_t end= offset + length;
+ TRANSLOG_READER_DATA internal_data;
+ DBUG_ENTER("translog_read_record");
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ if (data == NULL)
+ {
+ DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
+ data= &internal_data;
+ }
+ if (lsn ||
+ (offset < data->current_offset &&
+ !(offset < data->read_header && offset + length < data->read_header)))
+ {
+ if (translog_init_reader_data(lsn, data))
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("info", ("Offset: %lu length: %lu "
+ "Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
+ "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d",
+ (ulong) offset, (ulong) length,
+ LSN_IN_PARTS(data->scanner.page_addr),
+ LSN_IN_PARTS(data->scanner.horizon),
+ LSN_IN_PARTS(data->scanner.last_file_page),
+ (uint) data->scanner.page_offset,
+ (uint) data->scanner.page_offset,
+ data->scanner.fixed_horizon));
+ if (offset < data->read_header)
+ {
+ uint16 len= min(data->read_header, end) - offset;
+ DBUG_PRINT("info",
+ ("enter header offset: %lu length: %lu",
+ (ulong) offset, (ulong) length));
+ memcpy(buffer, data->header.header + offset, len);
+ length-= len;
+ if (length == 0)
+ {
+ translog_destroy_reader_data(data);
+ DBUG_RETURN(requested_length);
+ }
+ offset+= len;
+ buffer+= len;
+ DBUG_PRINT("info",
+ ("len: %u offset: %lu curr: %lu length: %lu",
+ len, (ulong) offset, (ulong) data->current_offset,
+ (ulong) length));
+ }
+ /* TODO: find first page which we should read by offset */
+
+ /* read the record chunk by chunk */
+ for(;;)
+ {
+ uint page_end= data->current_offset + data->chunk_size;
+ DBUG_PRINT("info",
+ ("enter body offset: %lu curr: %lu "
+ "length: %lu page_end: %lu",
+ (ulong) offset, (ulong) data->current_offset, (ulong) length,
+ (ulong) page_end));
+ if (offset < page_end)
+ {
+ uint len= page_end - offset;
+ set_if_smaller(len, length); /* in case we read beyond record's end */
+ DBUG_ASSERT(offset >= data->current_offset);
+ memcpy(buffer,
+ data->scanner.page + data->body_offset +
+ (offset - data->current_offset), len);
+ length-= len;
+ if (length == 0)
+ {
+ translog_destroy_reader_data(data);
+ DBUG_RETURN(requested_length);
+ }
+ offset+= len;
+ buffer+= len;
+ DBUG_PRINT("info",
+ ("len: %u offset: %lu curr: %lu length: %lu",
+ len, (ulong) offset, (ulong) data->current_offset,
+ (ulong) length));
+ }
+ if (translog_record_read_next_chunk(data))
+ {
+ translog_destroy_reader_data(data);
+ DBUG_RETURN(requested_length - length);
+ }
+ }
+}
+
+
+/*
+ @brief Force skipping to the next buffer
+
+ @todo Do not copy old page content if all page protections are switched off
+ (because we do not need calculate something or change old parts of the page)
+*/
+
+static void translog_force_current_buffer_to_finish()
+{
+ TRANSLOG_ADDRESS new_buff_beginning;
+ uint16 old_buffer_no= log_descriptor.bc.buffer_no;
+ uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
+ new_buffer_no);
+ struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
+ uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
+ uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
+ uint16 current_page_fill, write_counter, previous_offset;
+ DBUG_ENTER("translog_force_current_buffer_to_finish");
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx "
+ "Buffer addr: (%lu,0x%lx) "
+ "Page addr: (%lu,0x%lx) "
+ "size: %lu (%lu) Pg: %u left: %u",
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ LSN_IN_PARTS(log_descriptor.bc.buffer->offset),
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) (LSN_OFFSET(log_descriptor.horizon) -
+ log_descriptor.bc.current_page_fill),
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
+ buffer->buffer),
+ (uint) log_descriptor.bc.current_page_fill,
+ (uint) left));
+
+ LINT_INIT(current_page_fill);
+ new_buff_beginning= log_descriptor.bc.buffer->offset;
+ new_buff_beginning+= log_descriptor.bc.buffer->size; /* increase offset */
+
+ DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
+ DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
+ LSN_FILE_NO(log_descriptor.bc.buffer->offset));
+ translog_check_cursor(&log_descriptor.bc);
+ DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
+ if (left != 0)
+ {
+ /*
+ TODO: if 'left' is so small that can't hold any other record
+ then do not move the page
+ */
+ DBUG_PRINT("info", ("left: %u", (uint) left));
+
+ /* decrease offset */
+ new_buff_beginning-= log_descriptor.bc.current_page_fill;
+ current_page_fill= log_descriptor.bc.current_page_fill;
+
+ memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
+ log_descriptor.bc.buffer->size+= left;
+ DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx "
+ "Size: %lu",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) log_descriptor.bc.buffer->size));
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ }
+ else
+ {
+ log_descriptor.bc.current_page_fill= 0;
+ }
+
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+
+ write_counter= log_descriptor.bc.write_counter;
+ previous_offset= log_descriptor.bc.previous_offset;
+ translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
+ /* Fix buffer offset (which was incorrectly set to horizon) */
+ log_descriptor.bc.buffer->offset= new_buff_beginning;
+ log_descriptor.bc.write_counter= write_counter;
+ log_descriptor.bc.previous_offset= previous_offset;
+
+ /*
+ Advances this log pointer, increases writers and let other threads to
+ write to the log while we process old page content
+ */
+ if (left)
+ {
+ log_descriptor.bc.ptr+= current_page_fill;
+ log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
+ current_page_fill;
+ new_buffer->overlay= old_buffer;
+ }
+ else
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+ translog_buffer_increase_writers(new_buffer);
+ translog_buffer_unlock(new_buffer);
+
+ /*
+ We have to wait until all writers finish before start changing the
+ pages by applying protection and copying the page content in the
+ new buffer.
+ */
+ translog_wait_for_writers(old_buffer);
+
+ if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ translog_put_sector_protection(data, &log_descriptor.bc);
+ if (left)
+ {
+ log_descriptor.bc.write_counter++;
+ log_descriptor.bc.previous_offset= current_page_fill;
+ }
+ else
+ {
+ DBUG_PRINT("info", ("drop write_counter"));
+ log_descriptor.bc.write_counter= 0;
+ log_descriptor.bc.previous_offset= 0;
+ }
+ }
+
+ if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_crc(data + log_descriptor.page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ log_descriptor.page_overhead);
+ DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
+ int4store(data + 3 + 3 + 1, crc);
+ }
+
+ if (left)
+ {
+ /*
+ TODO: do not copy beginning of the page if we have no CRC or sector
+ checks on
+ */
+ memcpy(new_buffer->buffer, data, current_page_fill);
+ }
+ old_buffer->next_buffer_offset= new_buffer->offset;
+
+ translog_buffer_lock(new_buffer);
+ translog_buffer_decrease_writers(new_buffer);
+ translog_buffer_unlock(new_buffer);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Flush the log up to given LSN (included)
+
+ @param lsn log record serial number up to which (inclusive)
+ the log has to be flushed
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+
+ @todo LOG: when a log write fails, we should not write to this log anymore
+ (if we add more log records to this log they will be unreadable: we will hit
+ the broken log record): all translog_flush() should be made to fail (because
+ translog_flush() is when a a transaction wants something durable and we
+ cannot make anything durable as log is corrupted). For that, a "my_bool
+ st_translog_descriptor::write_error" could be set to 1 when a
+ translog_write_record() or translog_flush() fails, and translog_flush()
+ would test this var (and translog_write_record() could also test this var if
+ it wants, though it's not absolutely needed).
+ Then, either shut Maria down immediately, or switch to a new log (but if we
+ get write error after write error, that would create too many logs).
+ A popular open-source transactional engine intentionally crashes as soon as
+ a log flush fails (we however don't want to crash the entire mysqld, but
+ stopping all engine's operations immediately would make sense).
+ Same applies to translog_write_record().
+
+ @todo: remove serialization and make group commit.
+*/
+
+my_bool translog_flush(TRANSLOG_ADDRESS lsn)
+{
+ LSN old_flushed, sent_to_disk;
+ TRANSLOG_ADDRESS flush_horizon;
+ int rc= 0;
+ /* We can't have more different files then buffers */
+ TRANSLOG_FILE *file_handlers[TRANSLOG_BUFFERS_NO];
+ int current_file_handler= -1;
+ uint32 prev_file= 0;
+ my_bool full_circle= 0;
+ DBUG_ENTER("translog_flush");
+ DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+ LINT_INIT(sent_to_disk);
+
+ pthread_mutex_lock(&log_descriptor.log_flush_lock);
+ translog_lock();
+ flush_horizon= LSN_IMPOSSIBLE;
+ old_flushed= log_descriptor.flushed;
+ for (;;)
+ {
+ uint16 buffer_no= log_descriptor.bc.buffer_no;
+ uint16 buffer_start= buffer_no;
+ struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
+ struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
+ if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
+ {
+ DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)",
+ LSN_IN_PARTS(log_descriptor.flushed)));
+ translog_unlock();
+ goto out;
+ }
+ /* send to the file if it is not sent */
+ if (translog_status != TRANSLOG_OK)
+ {
+ rc= 1;
+ goto out;
+ }
+ sent_to_disk= translog_get_sent_to_disk();
+ if (cmp_translog_addr(sent_to_disk, lsn) >= 0 || full_circle)
+ break;
+
+ do
+ {
+ buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ buffer= log_descriptor.buffers + buffer_no;
+ translog_buffer_lock(buffer);
+ translog_buffer_unlock(buffer_unlock);
+ buffer_unlock= buffer;
+ if (buffer->file != NULL)
+ {
+ buffer_unlock= NULL;
+ if (buffer_start == buffer_no)
+ {
+ /* we made a circle */
+ full_circle= 1;
+ translog_force_current_buffer_to_finish();
+ }
+ break;
+ }
+ } while ((buffer_start != buffer_no) &&
+ cmp_translog_addr(log_descriptor.flushed, lsn) < 0);
+ if (buffer_unlock != NULL && buffer_unlock != buffer)
+ translog_buffer_unlock(buffer_unlock);
+
+ if (prev_file != LSN_FILE_NO(buffer->offset))
+ {
+ TRANSLOG_FILE *file;
+ uint32 fn= LSN_FILE_NO(buffer->offset);
+ prev_file= fn;
+ file= get_logfile_by_number(fn);
+ if (!file->is_sync)
+ {
+ current_file_handler++;
+ file_handlers[current_file_handler]= file;
+ }
+ /* We sync file when we are closing it => do nothing if file closed */
+ }
+ DBUG_ASSERT(flush_horizon <= buffer->offset + buffer->size);
+ flush_horizon= buffer->offset + buffer->size;
+ rc= translog_buffer_flush(buffer);
+ translog_buffer_unlock(buffer);
+ if (rc)
+ goto out; /* rc is 1 */
+ translog_lock();
+ }
+ translog_unlock();
+
+ {
+ TRANSLOG_FILE **cur= file_handlers;
+ TRANSLOG_FILE **end= file_handlers + current_file_handler;
+ for (; cur <= end; cur++)
+ {
+ (*cur)->is_sync= 1;
+ if (my_sync((*cur)->handler.file, MYF(MY_WME)))
+ {
+ rc= 1;
+ translog_stop_writing();
+ goto out;
+ }
+ }
+ }
+ log_descriptor.flushed= sent_to_disk;
+ /*
+ If we should flush (due to directory flush mode) and
+ previous flush horizon was not within one page border with this one.
+ */
+ if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
+ (LSN_FILE_NO(log_descriptor.previous_flush_horizon) !=
+ LSN_FILE_NO(flush_horizon) ||
+ ((LSN_OFFSET(log_descriptor.previous_flush_horizon) - 1) /
+ TRANSLOG_PAGE_SIZE) !=
+ ((LSN_OFFSET(flush_horizon) - 1) / TRANSLOG_PAGE_SIZE)))
+ rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
+ log_descriptor.previous_flush_horizon= flush_horizon;
+out:
+ pthread_mutex_unlock(&log_descriptor.log_flush_lock);
+ DBUG_RETURN(rc);
+}
+
+
+/**
+ @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact
+
+ If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
+ open MARIA_SHAREs), give it one and record this assignment in the log
+ (LOGREC_FILE_ID log record).
+
+ @param tbl_info table
+ @param trn calling transaction
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+
+ @note Can be called even if share already has an id (then will do nothing)
+*/
+
+int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
+{
+ MARIA_SHARE *share= tbl_info->s;
+ /*
+ If you give an id to a non-BLOCK_RECORD table, you also need to release
+ this id somewhere. Then you can change the assertion.
+ */
+ DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
+ /* re-check under mutex to avoid having 2 ids for the same share */
+ pthread_mutex_lock(&share->intern_lock);
+ if (likely(share->id == 0))
+ {
+ /* Inspired by set_short_trid() of trnman.c */
+ uint i= share->kfile.file % SHARE_ID_MAX + 1;
+ do
+ {
+ my_atomic_rwlock_wrlock(&LOCK_id_to_share);
+ for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
+ {
+ void *tmp= NULL;
+ if (id_to_share[i] == NULL &&
+ my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
+ {
+ share->id= (uint16)i;
+ break;
+ }
+ }
+ my_atomic_rwlock_wrunlock(&LOCK_id_to_share);
+ i= 1; /* scan the whole array */
+ } while (share->id == 0);
+ DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, share->id));
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uchar log_data[FILEID_STORE_SIZE];
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ /*
+ open_file_name is an unresolved name (symlinks are not resolved, datadir
+ is not realpath-ed, etc) which is good: the log can be moved to another
+ directory and continue working.
+ */
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= share->open_file_name;
+ /**
+ @todo if we had the name's length in MARIA_SHARE we could avoid this
+ strlen()
+ */
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
+ strlen(share->open_file_name) + 1;
+ if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
+ sizeof(log_data) +
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 1].length,
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, log_data, NULL)))
+ return 1;
+ }
+ pthread_mutex_unlock(&share->intern_lock);
+ return 0;
+}
+
+
+/**
+ @brief Recycles a MARIA_SHARE's short id.
+
+ @param share table
+
+ @note Must be called only if share has an id (i.e. id != 0)
+*/
+
+void translog_deassign_id_from_share(MARIA_SHARE *share)
+{
+ DBUG_PRINT("info", ("id_to_share: 0x%lx id %u -> 0",
+ (ulong)share, share->id));
+ /*
+ We don't need any mutex as we are called only when closing the last
+ instance of the table or at the end of REPAIR: no writes can be
+ happening. But a Checkpoint may be reading share->id, so we require this
+ mutex:
+ */
+ safe_mutex_assert_owner(&share->intern_lock);
+ my_atomic_rwlock_rdlock(&LOCK_id_to_share);
+ my_atomic_storeptr((void **)&id_to_share[share->id], 0);
+ my_atomic_rwlock_rdunlock(&LOCK_id_to_share);
+ share->id= 0;
+ /* useless but safety: */
+ share->lsn_of_file_id= LSN_IMPOSSIBLE;
+}
+
+
+void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
+ uint16 id)
+{
+ DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
+ DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
+ DBUG_ASSERT(share->id == 0);
+ DBUG_ASSERT(id_to_share[id] == NULL);
+ id_to_share[share->id= id]= share;
+}
+
+
+/**
+ @brief check if such log file exists
+
+ @param file_no number of the file to test
+
+ @retval 0 no such file
+ @retval 1 there is file with such number
+*/
+
+my_bool translog_is_file(uint file_no)
+{
+ MY_STAT stat_buff;
+ char path[FN_REFLEN];
+ return (test(my_stat(translog_filename_by_fileno(file_no, path),
+ &stat_buff, MYF(0))));
+}
+
+
+/**
+ @brief returns minimum log file number
+
+ @param horizon the end of the log
+ @param is_protected true if it is under purge_log protection
+
+ @retval minimum file number
+ @retval 0 no files found
+*/
+
+static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
+{
+ uint min_file= 0, max_file;
+ DBUG_ENTER("translog_first_file");
+ if (!is_protected)
+ pthread_mutex_lock(&log_descriptor.purger_lock);
+ if (log_descriptor.min_file_number &&
+ translog_is_file(log_descriptor.min_file_number))
+ {
+ DBUG_PRINT("info", ("cached %lu",
+ (ulong) log_descriptor.min_file_number));
+ if (!is_protected)
+ pthread_mutex_unlock(&log_descriptor.purger_lock);
+ DBUG_RETURN(log_descriptor.min_file_number);
+ }
+
+ max_file= LSN_FILE_NO(horizon);
+
+ /* binary search for last file */
+ while (min_file != max_file && min_file != (max_file - 1))
+ {
+ uint test= (min_file + max_file) / 2;
+ DBUG_PRINT("info", ("min_file: %u test: %u max_file: %u",
+ min_file, test, max_file));
+ if (test == max_file)
+ test--;
+ if (translog_is_file(test))
+ max_file= test;
+ else
+ min_file= test;
+ }
+ log_descriptor.min_file_number= max_file;
+ if (!is_protected)
+ pthread_mutex_unlock(&log_descriptor.purger_lock);
+ DBUG_PRINT("info", ("first file :%lu", (ulong) max_file));
+ DBUG_ASSERT(max_file >= 1);
+ DBUG_RETURN(max_file);
+}
+
+
+/**
+ @brief returns the most close LSN higher the given chunk address
+
+ @param addr the chunk address to start from
+ @param horizon the horizon if it is known or LSN_IMPOSSIBLE
+
+ @retval LSN_ERROR Error
+ @retval LSN_IMPOSSIBLE no LSNs after the address
+ @retval # LSN of the most close LSN higher the given chunk address
+*/
+
+LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
+{
+ uint chunk_type;
+ TRANSLOG_SCANNER_DATA scanner;
+ LSN result;
+ DBUG_ENTER("translog_next_LSN");
+
+ if (horizon == LSN_IMPOSSIBLE)
+ horizon= translog_get_horizon();
+
+ if (addr == horizon)
+ DBUG_RETURN(LSN_IMPOSSIBLE);
+
+ translog_scanner_init(addr, 0, &scanner, 1);
+ /*
+ addr can point not to a chunk beginning but page end so next
+ page beginning.
+ */
+ if (addr % TRANSLOG_PAGE_SIZE == 0)
+ {
+ /*
+ We are emulating the page end which cased such horizon value to
+ trigger translog_scanner_eop().
+
+ We can't just increase addr on page header overhead because it
+ can be file end so we allow translog_get_next_chunk() to skip
+ to the next page in correct way
+ */
+ scanner.page_addr-= TRANSLOG_PAGE_SIZE;
+ scanner.page_offset= TRANSLOG_PAGE_SIZE;
+#ifndef DBUG_OFF
+ scanner.page= NULL; /* prevent using incorrect page content */
+#endif
+ }
+ /* addr can point not to a chunk beginning but to a page end */
+ if (translog_scanner_eop(&scanner))
+ {
+ if (translog_get_next_chunk(&scanner))
+ {
+ result= LSN_ERROR;
+ goto out;
+ }
+ if (scanner.page == END_OF_LOG)
+ {
+ result= LSN_IMPOSSIBLE;
+ goto out;
+ }
+ }
+
+ chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type,
+ (uint) scanner.page[scanner.page_offset]));
+ while (chunk_type != TRANSLOG_CHUNK_LSN &&
+ chunk_type != TRANSLOG_CHUNK_FIXED &&
+ scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
+ {
+ if (translog_get_next_chunk(&scanner))
+ {
+ result= LSN_ERROR;
+ goto out;
+ }
+ if (scanner.page == END_OF_LOG)
+ {
+ result= LSN_IMPOSSIBLE;
+ goto out;
+ }
+ chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type,
+ (uint) scanner.page[scanner.page_offset]));
+ }
+
+ if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
+ result= LSN_IMPOSSIBLE; /* reached page filler */
+ else
+ result= scanner.page_addr + scanner.page_offset;
+out:
+ translog_destroy_scanner(&scanner);
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief returns the LSN of the first record starting in this log
+
+ @retval LSN_ERROR Error
+ @retval LSN_IMPOSSIBLE no log or the log is empty
+ @retval # LSN of the first record
+*/
+
+LSN translog_first_lsn_in_log()
+{
+ TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
+ TRANSLOG_VALIDATOR_DATA data;
+ uint file;
+ uint16 chunk_offset;
+ uchar *page;
+ DBUG_ENTER("translog_first_lsn_in_log");
+ DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ if (!(file= translog_first_file(horizon, 0)))
+ {
+ /* log has no records yet */
+ DBUG_RETURN(LSN_IMPOSSIBLE);
+ }
+
+ addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
+ data.addr= &addr;
+ {
+ uchar buffer[TRANSLOG_PAGE_SIZE];
+ if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
+ (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
+ DBUG_RETURN(LSN_ERROR);
+ }
+ addr+= chunk_offset;
+
+ DBUG_RETURN(translog_next_LSN(addr, horizon));
+}
+
+
+/**
+ @brief Returns theoretical first LSN if first log is present
+
+ @retval LSN_ERROR Error
+ @retval LSN_IMPOSSIBLE no log
+ @retval # LSN of the first record
+*/
+
+LSN translog_first_theoretical_lsn()
+{
+ TRANSLOG_ADDRESS addr= translog_get_horizon();
+ uchar buffer[TRANSLOG_PAGE_SIZE], *page;
+ TRANSLOG_VALIDATOR_DATA data;
+ DBUG_ENTER("translog_first_theoretical_lsn");
+ DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ if (!translog_is_file(1))
+ DBUG_RETURN(LSN_IMPOSSIBLE);
+ if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
+ {
+ /* log has no records yet */
+ DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
+ log_descriptor.page_overhead));
+ }
+
+ addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
+ data.addr= &addr;
+ if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
+ DBUG_RETURN(LSN_ERROR);
+
+ DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
+ page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
+}
+
+
+/**
+ @brief Checks given low water mark and purge files if it is need
+
+ @param low the last (minimum) address which is need
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool translog_purge(TRANSLOG_ADDRESS low)
+{
+ uint32 last_need_file= LSN_FILE_NO(low);
+ TRANSLOG_ADDRESS horizon= translog_get_horizon();
+ int rc= 0;
+ DBUG_ENTER("translog_purge");
+ DBUG_PRINT("enter", ("low: (%lu,0x%lx)", LSN_IN_PARTS(low)));
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ pthread_mutex_lock(&log_descriptor.purger_lock);
+ if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
+ {
+ uint32 i;
+ uint32 min_file= translog_first_file(horizon, 1);
+ DBUG_ASSERT(min_file != 0); /* log is already started */
+ for(i= min_file; i < last_need_file && rc == 0; i++)
+ {
+ LSN lsn= translog_get_file_max_lsn_stored(i);
+ if (lsn == LSN_IMPOSSIBLE)
+ break; /* files are still in writing */
+ if (lsn == LSN_ERROR)
+ {
+ rc= 1;
+ break;
+ }
+ if (cmp_translog_addr(lsn, low) >= 0)
+ break;
+
+ DBUG_PRINT("info", ("purge file %lu", (ulong) i));
+
+ /* remove file descriptor from the cache */
+ /*
+ log_descriptor.min_file can be changed only here during execution
+ and the function is serialized, so we can access it without problems
+ */
+ if (i >= log_descriptor.min_file)
+ {
+ TRANSLOG_FILE *file;
+ rw_wrlock(&log_descriptor.open_files_lock);
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+ DBUG_ASSERT(log_descriptor.min_file == i);
+ file= *((TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files));
+ DBUG_PRINT("info", ("Files : %d", log_descriptor.open_files.elements));
+ DBUG_ASSERT(i == file->number);
+ log_descriptor.min_file++;
+ DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
+ log_descriptor.open_files.elements);
+ rw_unlock(&log_descriptor.open_files_lock);
+ translog_close_log_file(file);
+ }
+ if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE)
+ {
+ char path[FN_REFLEN], *file_name;
+ file_name= translog_filename_by_fileno(i, path);
+ rc= test(my_delete(file_name, MYF(MY_WME)));
+ }
+ }
+ if (unlikely(rc == 1))
+ log_descriptor.min_need_file= 0; /* impossible value */
+ else
+ log_descriptor.min_need_file= i;
+ }
+
+ pthread_mutex_unlock(&log_descriptor.purger_lock);
+ DBUG_RETURN(rc);
+}
+
+
+/**
+ @brief Purges files by stored min need file in case of
+ "ondemend" purge type
+
+ @note This function do real work only if it is "ondemend" purge type
+ and translog_purge() was called at least once and last time without
+ errors
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool translog_purge_at_flush()
+{
+ uint32 i, min_file;
+ int rc= 0;
+ DBUG_ENTER("translog_purge_at_flush");
+ DBUG_ASSERT(translog_status == TRANSLOG_OK ||
+ translog_status == TRANSLOG_READONLY);
+
+ if (unlikely(translog_status == TRANSLOG_READONLY))
+ {
+ DBUG_PRINT("info", ("The log is read onlyu => exit"));
+ DBUG_RETURN(0);
+ }
+
+ if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
+ {
+ DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
+ DBUG_RETURN(0);
+ }
+
+ pthread_mutex_lock(&log_descriptor.purger_lock);
+
+ if (unlikely(log_descriptor.min_need_file == 0))
+ {
+ DBUG_PRINT("info", ("No info about min need file => exit"));
+ pthread_mutex_unlock(&log_descriptor.purger_lock);
+ DBUG_RETURN(0);
+ }
+
+ min_file= translog_first_file(translog_get_horizon(), 1);
+ DBUG_ASSERT(min_file != 0); /* log is already started */
+ for(i= min_file; i < log_descriptor.min_need_file && rc == 0; i++)
+ {
+ char path[FN_REFLEN], *file_name;
+ DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
+ file_name= translog_filename_by_fileno(i, path);
+ rc= test(my_delete(file_name, MYF(MY_WME)));
+ }
+
+ pthread_mutex_unlock(&log_descriptor.purger_lock);
+ DBUG_RETURN(rc);
+}
+
+
+/**
+ @brief Gets min file number
+
+ @param horizon the end of the log
+
+ @retval minimum file number
+ @retval 0 no files found
+*/
+
+uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
+{
+ return translog_first_file(horizon, 0);
+}
+
+
+/**
+ @brief Gets min file number which is needed
+
+ @retval minimum file number
+ @retval 0 unknown
+*/
+
+uint32 translog_get_first_needed_file()
+{
+ uint32 file_no;
+ pthread_mutex_lock(&log_descriptor.purger_lock);
+ file_no= log_descriptor.min_need_file;
+ pthread_mutex_unlock(&log_descriptor.purger_lock);
+ return file_no;
+}
+
+
+/**
+ @brief Gets transaction log file size
+
+ @return transaction log file size
+*/
+
+uint32 translog_get_file_size()
+{
+ uint32 res;
+ translog_lock();
+ res= log_descriptor.log_file_max_size;
+ translog_unlock();
+ return (res);
+}
+
+
+/**
+ @brief Sets transaction log file size
+
+ @return Returns actually set transaction log size
+*/
+
+void translog_set_file_size(uint32 size)
+{
+ struct st_translog_buffer *old_buffer= NULL;
+ DBUG_ENTER("translog_set_file_size");
+ translog_lock();
+ DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
+ DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0 &&
+ size >= TRANSLOG_MIN_FILE_SIZE);
+ log_descriptor.log_file_max_size= size;
+ /* if current file longer then finish it*/
+ if (LSN_OFFSET(log_descriptor.horizon) >= log_descriptor.log_file_max_size)
+ {
+ old_buffer= log_descriptor.bc.buffer;
+ translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
+ translog_buffer_unlock(old_buffer);
+ }
+ translog_unlock();
+ if (old_buffer)
+ {
+ translog_buffer_lock(old_buffer);
+ translog_buffer_flush(old_buffer);
+ translog_buffer_unlock(old_buffer);
+ }
+ DBUG_VOID_RETURN;
+}
+
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
new file mode 100644
index 00000000000..28a687fa49d
--- /dev/null
+++ b/storage/maria/ma_loghandler.h
@@ -0,0 +1,437 @@
+/* Copyright (C) 2007 MySQL AB & Sanja Belkin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _ma_loghandler_h
+#define _ma_loghandler_h
+
+/* transaction log default cache size (TODO: make it global variable) */
+#define TRANSLOG_PAGECACHE_SIZE (1024*1024*2)
+/* transaction log default file size */
+#define TRANSLOG_FILE_SIZE (1024*1024*1024)
+/* minimum possible transaction log size */
+#define TRANSLOG_MIN_FILE_SIZE (1024*1024*8)
+/* transaction log default flags (TODO: make it global variable) */
+#define TRANSLOG_DEFAULT_FLAGS 0
+
+/*
+ Transaction log flags.
+
+ We allow all kind protections to be switched on together for people who
+ really unsure in their hardware/OS.
+*/
+#define TRANSLOG_PAGE_CRC 1
+#define TRANSLOG_SECTOR_PROTECTION (1<<1)
+#define TRANSLOG_RECORD_CRC (1<<2)
+#define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \
+ TRANSLOG_RECORD_CRC) + 1)
+
+#define RECHEADER_READ_ERROR -1
+#define RECHEADER_READ_EOF -2
+
+/*
+ Page size in transaction log
+ It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE
+ (DISK_DRIVE_SECTOR_SIZE * 2^N)
+*/
+#define TRANSLOG_PAGE_SIZE (8*1024)
+
+#include "ma_loghandler_lsn.h"
+#include "trnman_public.h"
+
+/* short transaction ID type */
+typedef uint16 SHORT_TRANSACTION_ID;
+
+struct st_maria_handler;
+
+/* Changing one of the "SIZE" below will break backward-compatibility! */
+/* Length of CRC at end of pages */
+#define ROW_EXTENT_PAGE_SIZE 5
+#define ROW_EXTENT_COUNT_SIZE 2
+/* Size of file id in logs */
+#define FILEID_STORE_SIZE 2
+/* Size of page reference in log */
+#define PAGE_STORE_SIZE ROW_EXTENT_PAGE_SIZE
+/* Size of page ranges in log */
+#define PAGERANGE_STORE_SIZE ROW_EXTENT_COUNT_SIZE
+#define DIRPOS_STORE_SIZE 1
+#define CLR_TYPE_STORE_SIZE 1
+/* If table has live checksum we store its changes in UNDOs */
+#define HA_CHECKSUM_STORE_SIZE 4
+#define KEY_NR_STORE_SIZE 1
+#define PAGE_LENGTH_STORE_SIZE 2
+
+/* Store methods to match the above sizes */
+#define fileid_store(T,A) int2store(T,A)
+#define page_store(T,A) int5store(T,A)
+#define dirpos_store(T,A) ((*(uchar*) (T)) = A)
+#define pagerange_store(T,A) int2store(T,A)
+#define clr_type_store(T,A) ((*(uchar*) (T)) = A)
+#define key_nr_store(T, A) ((*(uchar*) (T)) = A)
+#define ha_checksum_store(T,A) int4store(T,A)
+#define fileid_korr(P) uint2korr(P)
+#define page_korr(P) uint5korr(P)
+#define dirpos_korr(P) (*(uchar *) (P))
+#define pagerange_korr(P) uint2korr(P)
+#define clr_type_korr(P) (*(uchar *) (P))
+#define key_nr_korr(P) (*(uchar *) (P))
+#define ha_checksum_korr(P) uint4korr(P)
+
+/*
+ Length of disk drive sector size (we assume that writing it
+ to disk is an atomic operation)
+*/
+#define DISK_DRIVE_SECTOR_SIZE 512
+
+/* position reserved in an array of parts of a log record */
+#define TRANSLOG_INTERNAL_PARTS 2
+
+/* types of records in the transaction log */
+/* TODO: Set numbers for these when we have all entries figured out */
+
+enum translog_record_type
+{
+ LOGREC_RESERVED_FOR_CHUNKS23= 0,
+ LOGREC_REDO_INSERT_ROW_HEAD,
+ LOGREC_REDO_INSERT_ROW_TAIL,
+ LOGREC_REDO_NOT_USED, /* Reserver for next tag */
+ LOGREC_REDO_INSERT_ROW_BLOBS,
+ LOGREC_REDO_PURGE_ROW_HEAD,
+ LOGREC_REDO_PURGE_ROW_TAIL,
+ LOGREC_REDO_FREE_BLOCKS,
+ LOGREC_REDO_FREE_HEAD_OR_TAIL,
+ LOGREC_REDO_DELETE_ROW,
+ LOGREC_REDO_UPDATE_ROW_HEAD,
+ LOGREC_REDO_INDEX,
+ LOGREC_REDO_INDEX_NEW_PAGE,
+ LOGREC_REDO_INDEX_FREE_PAGE,
+ LOGREC_REDO_UNDELETE_ROW,
+ LOGREC_CLR_END,
+ LOGREC_PURGE_END,
+ LOGREC_UNDO_ROW_INSERT,
+ LOGREC_UNDO_ROW_DELETE,
+ LOGREC_UNDO_ROW_UPDATE,
+ LOGREC_UNDO_KEY_INSERT,
+ LOGREC_UNDO_KEY_INSERT_WITH_ROOT,
+ LOGREC_UNDO_KEY_DELETE,
+ LOGREC_UNDO_KEY_DELETE_WITH_ROOT,
+ LOGREC_PREPARE,
+ LOGREC_PREPARE_WITH_UNDO_PURGE,
+ LOGREC_COMMIT,
+ LOGREC_COMMIT_WITH_UNDO_PURGE,
+ LOGREC_CHECKPOINT,
+ LOGREC_REDO_CREATE_TABLE,
+ LOGREC_REDO_RENAME_TABLE,
+ LOGREC_REDO_DROP_TABLE,
+ LOGREC_REDO_DELETE_ALL,
+ LOGREC_REDO_REPAIR_TABLE,
+ LOGREC_FILE_ID,
+ LOGREC_LONG_TRANSACTION_ID,
+ LOGREC_INCOMPLETE_LOG,
+ LOGREC_INCOMPLETE_GROUP,
+ LOGREC_RESERVED_FUTURE_EXTENSION= 63
+};
+#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */
+
+/* Type of operations in LOGREC_REDO_INDEX */
+
+enum en_key_op
+{
+ KEY_OP_NONE, /* Not used */
+ KEY_OP_OFFSET, /* Set current position */
+ KEY_OP_SHIFT, /* Shift up/or down at current position */
+ KEY_OP_CHANGE, /* Change data at current position */
+ KEY_OP_ADD_PREFIX, /* Insert data at start of page */
+ KEY_OP_DEL_PREFIX, /* Delete data at start of page */
+ KEY_OP_ADD_SUFFIX, /* Insert data at end of page */
+ KEY_OP_DEL_SUFFIX, /* Delete data at end of page */
+ KEY_OP_CHECK /* For debugging; CRC of used part of page */
+};
+
+/* Size of log file; One log file is restricted to 4G */
+typedef uint32 translog_size_t;
+
+#define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024
+
+typedef struct st_translog_group_descriptor
+{
+ TRANSLOG_ADDRESS addr;
+ uint8 num;
+} TRANSLOG_GROUP;
+
+
+typedef struct st_translog_header_buffer
+{
+ /* LSN of the read record */
+ LSN lsn;
+ /* array of groups descriptors, can be used only if groups_no > 0 */
+ TRANSLOG_GROUP *groups;
+ /* short transaction ID or 0 if it has no sense for the record */
+ SHORT_TRANSACTION_ID short_trid;
+ /*
+ The Record length in buffer (including read header, but excluding
+ hidden part of record (type, short TrID, length)
+ */
+ translog_size_t record_length;
+ /*
+ Buffer for write decoded header of the record (depend on the record
+ type)
+ */
+ uchar header[TRANSLOG_RECORD_HEADER_MAX_SIZE];
+ /* number of groups listed in */
+ uint groups_no;
+ /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */
+ uint chunk0_pages;
+ /* type of the read record */
+ enum translog_record_type type;
+ /* chunk 0 data address (valid only if groups_no > 0) */
+ TRANSLOG_ADDRESS chunk0_data_addr;
+ /*
+ Real compressed LSN(s) size economy (<number of LSN(s)>*7 - <real_size>)
+ */
+ int16 compressed_LSN_economy;
+ /* short transaction ID or 0 if it has no sense for the record */
+ uint16 non_header_data_start_offset;
+ /* non read body data length in this first chunk */
+ uint16 non_header_data_len;
+ /* chunk 0 data size (valid only if groups_no > 0) */
+ uint16 chunk0_data_len;
+} TRANSLOG_HEADER_BUFFER;
+
+
+typedef struct st_translog_scanner_data
+{
+ uchar buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */
+ TRANSLOG_ADDRESS page_addr; /* current page address */
+ /* end of the log which we saw last time */
+ TRANSLOG_ADDRESS horizon;
+ TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */
+ uchar *page; /* page content pointer */
+ /* direct link on the current page or NULL if not supported/requested */
+ PAGECACHE_BLOCK_LINK *direct_link;
+ /* offset of the chunk in the page */
+ translog_size_t page_offset;
+ /* set horizon only once at init */
+ my_bool fixed_horizon;
+ /* try to get direct link on the page if it is possible */
+ my_bool use_direct_link;
+} TRANSLOG_SCANNER_DATA;
+
+
+typedef struct st_translog_reader_data
+{
+ TRANSLOG_HEADER_BUFFER header; /* Header */
+ TRANSLOG_SCANNER_DATA scanner; /* chunks scanner */
+ translog_size_t body_offset; /* current chunk body offset */
+ /* data offset from the record beginning */
+ translog_size_t current_offset;
+ /* number of bytes read in header */
+ uint16 read_header;
+ uint16 chunk_size; /* current chunk size */
+ uint current_group; /* current group */
+ uint current_chunk; /* current chunk in the group */
+ my_bool eor; /* end of the record */
+} TRANSLOG_READER_DATA;
+
+C_MODE_START
+
+/* Records types for unittests */
+#define LOGREC_FIXED_RECORD_0LSN_EXAMPLE 1
+#define LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE 2
+#define LOGREC_FIXED_RECORD_1LSN_EXAMPLE 3
+#define LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE 4
+#define LOGREC_FIXED_RECORD_2LSN_EXAMPLE 5
+#define LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE 6
+
+extern void translog_example_table_init();
+extern void translog_table_init();
+#define translog_init(D,M,V,I,C,F,R) \
+ translog_init_with_table(D,M,V,I,C,F,R,&translog_table_init)
+extern my_bool translog_init_with_table(const char *directory,
+ uint32 log_file_max_size,
+ uint32 server_version,
+ uint32 server_id,
+ PAGECACHE *pagecache,
+ uint flags,
+ my_bool readonly,
+ void (*init_table_func)());
+
+extern my_bool
+translog_write_record(LSN *lsn, enum translog_record_type type, TRN *trn,
+ MARIA_HA *tbl_info,
+ translog_size_t rec_len, uint part_no,
+ LEX_STRING *parts_data, uchar *store_share_id,
+ void *hook_arg);
+
+extern void translog_destroy();
+
+extern int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff);
+
+extern void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff);
+
+extern translog_size_t translog_read_record(LSN lsn,
+ translog_size_t offset,
+ translog_size_t length,
+ uchar *buffer,
+ struct st_translog_reader_data
+ *data);
+
+extern my_bool translog_flush(TRANSLOG_ADDRESS lsn);
+
+extern my_bool translog_scanner_init(LSN lsn,
+ my_bool fixed_horizon,
+ struct st_translog_scanner_data *scanner,
+ my_bool use_direct_link);
+extern void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner);
+
+extern int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
+ TRANSLOG_HEADER_BUFFER *buff);
+extern LSN translog_get_file_max_lsn_stored(uint32 file);
+extern my_bool translog_purge(TRANSLOG_ADDRESS low);
+extern my_bool translog_is_file(uint file_no);
+extern my_bool translog_lock();
+extern my_bool translog_unlock();
+extern void translog_lock_assert_owner();
+extern TRANSLOG_ADDRESS translog_get_horizon();
+extern TRANSLOG_ADDRESS translog_get_horizon_no_lock();
+extern int translog_assign_id_to_share(struct st_maria_handler *tbl_info,
+ TRN *trn);
+extern void translog_deassign_id_from_share(struct st_maria_share *share);
+extern void
+translog_assign_id_to_share_from_recovery(struct st_maria_share *share,
+ uint16 id);
+enum enum_translog_status
+{
+ TRANSLOG_UNINITED, /* no initialization done or error during initialization */
+ TRANSLOG_OK, /* transaction log is functioning */
+ TRANSLOG_READONLY, /* read only mode due to write errors */
+ TRANSLOG_SHUTDOWN /* going to shutdown the loghandler */
+};
+extern enum enum_translog_status translog_status;
+
+/*
+ all the rest added because of recovery; should we make
+ ma_loghandler_for_recovery.h ?
+*/
+
+#define SHARE_ID_MAX 65535 /* array's size */
+
+extern LSN translog_first_lsn_in_log();
+extern LSN translog_first_theoretical_lsn();
+extern LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
+extern my_bool translog_purge_at_flush();
+extern uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon);
+extern uint32 translog_get_first_needed_file();
+extern char *translog_filename_by_fileno(uint32 file_no, char *path);
+extern void translog_set_file_size(uint32 size);
+
+/* record parts descriptor */
+struct st_translog_parts
+{
+ /* full record length */
+ translog_size_t record_length;
+ /* full record length with chunk headers */
+ translog_size_t total_record_length;
+ /* current part index */
+ uint current;
+ /* total number of elements in parts */
+ uint elements;
+ /* array of parts (LEX_STRING) */
+ LEX_STRING *parts;
+};
+
+typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type,
+ TRN *trn,
+ struct st_maria_handler *tbl_info,
+ void *hook_arg);
+
+typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type,
+ TRN *trn,
+ struct st_maria_handler *tbl_info,
+ LSN *lsn, void *hook_arg);
+
+typedef uint16(*read_rec_hook) (enum translog_record_type type,
+ uint16 read_length, uchar *read_buff,
+ uchar *decoded_buff);
+
+
+/* record classes */
+enum record_class
+{
+ LOGRECTYPE_NOT_ALLOWED,
+ LOGRECTYPE_VARIABLE_LENGTH,
+ LOGRECTYPE_PSEUDOFIXEDLENGTH,
+ LOGRECTYPE_FIXEDLENGTH
+};
+
+enum enum_record_in_group {
+ LOGREC_NOT_LAST_IN_GROUP= 0, LOGREC_LAST_IN_GROUP, LOGREC_IS_GROUP_ITSELF
+};
+
+/*
+ Descriptor of log record type
+*/
+typedef struct st_log_record_type_descriptor
+{
+ /* internal class of the record */
+ enum record_class rclass;
+ /*
+ length for fixed-size record, pseudo-fixed record
+ length with uncompressed LSNs
+ */
+ uint16 fixed_length;
+ /* how much record body (belonged to headers too) read with headers */
+ uint16 read_header_len;
+ /* HOOK for writing the record called before lock */
+ prewrite_rec_hook prewrite_hook;
+ /* HOOK for writing the record called when LSN is known, inside lock */
+ inwrite_rec_hook inwrite_hook;
+ /* HOOK for reading headers */
+ read_rec_hook read_hook;
+ /*
+ For pseudo fixed records number of compressed LSNs followed by
+ system header
+ */
+ int16 compressed_LSN;
+ /* the rest is for maria_read_log & Recovery */
+ /** @brief for debug error messages or "maria_read_log" command-line tool */
+ const char *name;
+ enum enum_record_in_group record_in_group;
+ /* a function to execute when we see the record during the REDO phase */
+ int (*record_execute_in_redo_phase)(const TRANSLOG_HEADER_BUFFER *);
+ /* a function to execute when we see the record during the UNDO phase */
+ int (*record_execute_in_undo_phase)(const TRANSLOG_HEADER_BUFFER *, TRN *);
+} LOG_DESC;
+
+extern LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
+
+typedef enum
+{
+ TRANSLOG_PURGE_IMMIDIATE,
+ TRANSLOG_PURGE_EXTERNAL,
+ TRANSLOG_PURGE_ONDEMAND
+} enum_maria_translog_purge_type;
+extern ulong log_purge_type;
+extern ulong log_file_size;
+
+typedef enum
+{
+ TRANSLOG_SYNC_DIR_NEVER,
+ TRANSLOG_SYNC_DIR_NEWFILE,
+ TRANSLOG_SYNC_DIR_ALWAYS
+} enum_maria_sync_log_dir;
+extern ulong sync_log_dir;
+
+C_MODE_END
+#endif
diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h
new file mode 100644
index 00000000000..7d4b5338836
--- /dev/null
+++ b/storage/maria/ma_loghandler_lsn.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2007 MySQL AB & Sanja Belkin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _ma_loghandler_lsn_h
+#define _ma_loghandler_lsn_h
+
+/*
+ Transaction log record address:
+ file_no << 32 | offset
+ file_no is only 3 bytes so we can use signed integer to make
+ comparison simpler.
+*/
+typedef int64 TRANSLOG_ADDRESS;
+
+/*
+ Compare addresses
+ A1 > A2 -> result > 0
+ A1 == A2 -> 0
+ A1 < A2 -> result < 0
+*/
+#define cmp_translog_addr(A1,A2) ((A1) - (A2))
+
+/*
+ TRANSLOG_ADDRESS is just address of some byte in the log (usually some
+ chunk)
+ LSN used where address of some record in the log needed (not just any
+ address)
+*/
+typedef TRANSLOG_ADDRESS LSN;
+
+/* Gets file number part of a LSN/log address */
+#define LSN_FILE_NO(L) ((L) >> 32)
+
+/* Gets raw file number part of a LSN/log address */
+#define LSN_FILE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL))
+
+/* Parts of LSN for printing */
+#define LSN_IN_PARTS(L) (ulong)LSN_FILE_NO(L),(ulong)LSN_OFFSET(L)
+
+/* Gets record offset of a LSN/log address */
+#define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL)
+
+/* Makes lsn/log address from file number and record offset */
+#define MAKE_LSN(F,S) ((LSN) ((((uint64)(F)) << 32) | (S)))
+
+/* checks LSN */
+#define LSN_VALID(L) \
+ ((LSN_FILE_NO_PART(L) != FILENO_IMPOSSIBLE) && \
+ (LSN_OFFSET(L) != LOG_OFFSET_IMPOSSIBLE))
+
+/* size of stored LSN on a disk, don't change it! */
+#define LSN_STORE_SIZE 7
+
+/* Puts LSN into buffer (dst) */
+#define lsn_store(dst, lsn) \
+ do { \
+ int3store((dst), LSN_FILE_NO(lsn)); \
+ int4store((char*)(dst) + 3, LSN_OFFSET(lsn)); \
+ } while (0)
+
+/* Unpacks LSN from the buffer (P) */
+#define lsn_korr(P) MAKE_LSN(uint3korr(P), uint4korr((char*)(P) + 3))
+
+/* what we need to add to LSN to increase it on one file */
+#define LSN_ONE_FILE ((int64)0x100000000LL)
+
+#define LSN_REPLACE_OFFSET(L, S) (LSN_FILE_NO_PART(L) | (S))
+
+/*
+ an 8-byte type whose most significant uchar is used for "flags"; 7
+ other bytes are a LSN.
+*/
+typedef LSN LSN_WITH_FLAGS;
+#define LSN_WITH_FLAGS_TO_LSN(x) (x & ULL(0x00FFFFFFFFFFFFFF))
+#define LSN_WITH_FLAGS_TO_FLAGS(x) (x & ULL(0xFF00000000000000))
+
+#define FILENO_IMPOSSIBLE 0 /**< log file's numbering starts at 1 */
+#define LOG_OFFSET_IMPOSSIBLE 0 /**< log always has a header */
+#define LSN_IMPOSSIBLE ((LSN)0)
+/* following LSN also is impossible */
+#define LSN_ERROR ((LSN)1)
+
+/** @brief some impossible LSN serve as markers */
+#define LSN_REPAIRED_BY_MARIA_CHK ((LSN)2)
+
+/**
+ @brief the maximum valid LSN.
+ Unlike ULONGLONG_MAX, it can be safely used in comparison with valid LSNs
+ (ULONGLONG_MAX is too big for correctness of cmp_translog_addr()).
+*/
+#define LSN_MAX (LSN)ULL(0x00FFFFFFFFFFFFFF)
+
+#endif
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
new file mode 100644
index 00000000000..2415a556a65
--- /dev/null
+++ b/storage/maria/ma_open.c
@@ -0,0 +1,1731 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* open a isam-database */
+
+#include "ma_fulltext.h"
+#include "ma_sp_defs.h"
+#include "ma_rt_index.h"
+#include "ma_blockrec.h"
+#include <m_ctype.h>
+
+#if defined(MSDOS) || defined(__WIN__)
+#ifdef __WIN__
+#include <fcntl.h>
+#else
+#include <process.h> /* Prototype for getpid */
+#endif
+#endif
+
+static void setup_key_functions(MARIA_KEYDEF *keyinfo);
+static my_bool maria_scan_init_dummy(MARIA_HA *info);
+static void maria_scan_end_dummy(MARIA_HA *info);
+static my_bool maria_once_init_dummy(MARIA_SHARE *, File);
+static my_bool maria_once_end_dummy(MARIA_SHARE *);
+static uchar *_ma_base_info_read(uchar *ptr, MARIA_BASE_INFO *base);
+static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state);
+static void set_data_pagecache_callbacks(PAGECACHE_FILE *file,
+ MARIA_SHARE *share);
+static void set_index_pagecache_callbacks(PAGECACHE_FILE *file,
+ MARIA_SHARE *share);
+
+
+#define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \
+ pos+=size;}
+
+
+#define disk_pos_assert(pos, end_pos) \
+if (pos > end_pos) \
+{ \
+ my_errno=HA_ERR_CRASHED; \
+ goto err; \
+}
+
+
+/******************************************************************************
+** Return the shared struct if the table is already open.
+** In MySQL the server will handle version issues.
+******************************************************************************/
+
+MARIA_HA *_ma_test_if_reopen(const char *filename)
+{
+ LIST *pos;
+
+ for (pos=maria_open_list ; pos ; pos=pos->next)
+ {
+ MARIA_HA *info=(MARIA_HA*) pos->data;
+ MARIA_SHARE *share= info->s;
+ if (!strcmp(share->unique_file_name,filename) && share->last_version)
+ return info;
+ }
+ return 0;
+}
+
+
+/*
+ Open a new instance of an already opened Maria table
+
+ SYNOPSIS
+ maria_clone_internal()
+ share Share of already open table
+ mode Mode of table (O_RDONLY | O_RDWR)
+ data_file Filedescriptor of data file to use < 0 if one should open
+ open it.
+
+ RETURN
+ # Maria handler
+ 0 Error
+*/
+
+
+static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, int mode,
+ File data_file)
+{
+ int save_errno;
+ uint errpos;
+ MARIA_HA info,*m_info;
+ my_bitmap_map *changed_fields_bitmap;
+ DBUG_ENTER("maria_clone_internal");
+
+ errpos= 0;
+ bzero((uchar*) &info,sizeof(info));
+
+ if (mode == O_RDWR && share->mode == O_RDONLY)
+ {
+ my_errno=EACCES; /* Can't open in write mode */
+ goto err;
+ }
+ if (data_file >= 0)
+ info.dfile.file= data_file;
+ else if (_ma_open_datafile(&info, share, -1))
+ goto err;
+ errpos= 5;
+
+ /* alloc and set up private structure parts */
+ if (!my_multi_malloc(MY_WME,
+ &m_info,sizeof(MARIA_HA),
+ &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs,
+ &info.buff,(share->base.max_key_block_length*2+
+ share->base.max_key_length),
+ &info.lastkey,share->base.max_key_length*2+1,
+ &info.first_mbr_key, share->base.max_key_length,
+ &info.maria_rtree_recursion_state,
+ share->have_rtree ? 1024 : 0,
+ &info.key_write_undo_lsn,
+ (uint) (sizeof(LSN) * share->base.keys),
+ &info.key_delete_undo_lsn,
+ (uint) (sizeof(LSN) * share->base.keys),
+ &changed_fields_bitmap,
+ bitmap_buffer_size(share->base.fields),
+ NullS))
+ goto err;
+ errpos= 6;
+
+ memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs);
+ info.lastkey2=info.lastkey+share->base.max_key_length;
+
+ info.s=share;
+ info.cur_row.lastpos= HA_OFFSET_ERROR;
+ info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND);
+ info.opt_flag=READ_CHECK_USED;
+ info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */
+ if (share->data_file_type == COMPRESSED_RECORD)
+ info.this_unique= share->state.unique;
+ info.this_loop=0; /* Update counter */
+ info.last_unique= share->state.unique;
+ info.last_loop= share->state.update_count;
+ info.quick_mode=0;
+ info.bulk_insert=0;
+ info.ft1_to_ft2=0;
+ info.errkey= -1;
+ info.page_changed=1;
+ info.keyread_buff= info.buff + share->base.max_key_block_length;
+
+ info.lock_type= F_UNLCK;
+ if (share->options & HA_OPTION_TMP_TABLE)
+ info.lock_type= F_WRLCK;
+
+ set_data_pagecache_callbacks(&info.dfile, share);
+ bitmap_init(&info.changed_fields, changed_fields_bitmap,
+ share->base.fields, 0);
+ if ((*share->init)(&info))
+ goto err;
+
+ /* The following should be big enough for all pinning purposes */
+ if (my_init_dynamic_array(&info.pinned_pages,
+ sizeof(MARIA_PINNED_PAGE),
+ max(share->base.blobs*2 + 4,
+ MARIA_MAX_TREE_LEVELS*3), 16))
+ goto err;
+
+
+ pthread_mutex_lock(&share->intern_lock);
+ info.read_record= share->read_record;
+ share->reopen++;
+ share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL);
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ info.lock_type=F_RDLCK;
+ share->r_locks++;
+ share->tot_locks++;
+ }
+ if ((share->options & HA_OPTION_DELAY_KEY_WRITE) &&
+ maria_delay_key_write)
+ share->delay_key_write=1;
+
+ info.state= &share->state.state; /* Change global values by default */
+ if (!share->base.born_transactional) /* For transactional ones ... */
+ info.trn= &dummy_transaction_object; /* ... force crash if no trn given */
+ pthread_mutex_unlock(&share->intern_lock);
+
+ /* Allocate buffer for one record */
+ /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */
+ if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size,
+ share->base.default_rec_buff_size))
+ goto err;
+
+ bzero(info.rec_buff, share->base.default_rec_buff_size);
+
+ *m_info=info;
+#ifdef THREAD
+ thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
+#endif
+ m_info->open_list.data=(void*) m_info;
+ maria_open_list=list_add(maria_open_list,&m_info->open_list);
+
+ DBUG_RETURN(m_info);
+
+err:
+ save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
+ if ((save_errno == HA_ERR_CRASHED) ||
+ (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
+ (save_errno == HA_ERR_CRASHED_ON_REPAIR))
+ _ma_report_error(save_errno, share->open_file_name);
+ switch (errpos) {
+ case 6:
+ (*share->end)(&info);
+ delete_dynamic(&info.pinned_pages);
+ my_free(m_info, MYF(0));
+ /* fall through */
+ case 5:
+ if (data_file < 0)
+ VOID(my_close(info.dfile.file, MYF(0)));
+ break;
+ }
+ my_errno=save_errno;
+ DBUG_RETURN (NULL);
+} /* maria_clone_internal */
+
+
+/* Make a clone of a maria table */
+
+MARIA_HA *maria_clone(MARIA_SHARE *share, int mode)
+{
+ MARIA_HA *new_info;
+ pthread_mutex_lock(&THR_LOCK_maria);
+ new_info= maria_clone_internal(share, mode,
+ share->data_file_type == BLOCK_RECORD ?
+ share->bitmap.file.file : -1);
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ return new_info;
+}
+
+
+/******************************************************************************
+ open a MARIA table
+
+ See my_base.h for the handle_locking argument
+ if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table
+ is marked crashed or if we are not using locking and the table doesn't
+ have an open count of 0.
+******************************************************************************/
+
+MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
+{
+ int kfile,open_mode,save_errno;
+ uint i,j,len,errpos,head_length,base_pos,info_length,keys,
+ key_parts,unique_key_parts,fulltext_keys,uniques;
+ char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
+ data_name[FN_REFLEN];
+ uchar *disk_cache, *disk_pos, *end_pos;
+ MARIA_HA info,*m_info,*old_info;
+ MARIA_SHARE share_buff,*share;
+ double rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG];
+ long nulls_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG];
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY];
+ ulonglong max_key_file_length, max_data_file_length;
+ File data_file= -1;
+ DBUG_ENTER("maria_open");
+
+ LINT_INIT(m_info);
+ kfile= -1;
+ errpos= 0;
+ head_length=sizeof(share_buff.state.header);
+ bzero((uchar*) &info,sizeof(info));
+
+ my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME),MYF(0));
+ pthread_mutex_lock(&THR_LOCK_maria);
+ old_info= 0;
+ if ((open_flags & HA_OPEN_COPY) ||
+ !(old_info=_ma_test_if_reopen(name_buff)))
+ {
+ share= &share_buff;
+ bzero((uchar*) &share_buff,sizeof(share_buff));
+ share_buff.state.rec_per_key_part= rec_per_key_part;
+ share_buff.state.nulls_per_key_part= nulls_per_key_part;
+ share_buff.state.key_root=key_root;
+ share_buff.pagecache= multi_pagecache_search((uchar*) name_buff,
+ strlen(name_buff),
+ maria_pagecache);
+
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open",
+ if (strstr(name, "/t1"))
+ {
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ });
+ if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0)
+ {
+ if ((errno != EROFS && errno != EACCES) ||
+ mode != O_RDONLY ||
+ (kfile=my_open(name_buff,(open_mode=O_RDONLY) | O_SHARE,MYF(0))) < 0)
+ goto err;
+ }
+ share->mode=open_mode;
+ errpos= 1;
+ if (my_pread(kfile,share->state.header.file_version, head_length, 0,
+ MYF(MY_NABP)))
+ {
+ my_errno= HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ if (memcmp((uchar*) share->state.header.file_version,
+ (uchar*) maria_file_magic, 4))
+ {
+ DBUG_PRINT("error",("Wrong header in %s",name_buff));
+ DBUG_DUMP("error_dump",(char*) share->state.header.file_version,
+ head_length);
+ my_errno=HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ share->options= mi_uint2korr(share->state.header.options);
+ if (share->options &
+ ~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS |
+ HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
+ HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
+ HA_OPTION_RELIES_ON_SQL_LAYER | HA_OPTION_NULL_FIELDS |
+ HA_OPTION_PAGE_CHECKSUM))
+ {
+ DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
+ my_errno=HA_ERR_NEW_FILE;
+ goto err;
+ }
+ if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) &&
+ ! (open_flags & HA_OPEN_FROM_SQL_LAYER))
+ {
+ DBUG_PRINT("error", ("table cannot be openned from non-sql layer"));
+ my_errno= HA_ERR_UNSUPPORTED;
+ goto err;
+ }
+ /* Don't call realpath() if the name can't be a link */
+ if (!strcmp(name_buff, org_name) ||
+ my_readlink(index_name, org_name, MYF(0)) == -1)
+ (void) strmov(index_name, org_name);
+ *strrchr(org_name, '.')= '\0';
+ (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT,
+ MY_APPEND_EXT|MY_UNPACK_FILENAME|MY_RESOLVE_SYMLINKS);
+
+ info_length=mi_uint2korr(share->state.header.header_length);
+ base_pos= mi_uint2korr(share->state.header.base_pos);
+ if (!(disk_cache= (uchar*) my_alloca(info_length+128)))
+ {
+ my_errno=ENOMEM;
+ goto err;
+ }
+ end_pos=disk_cache+info_length;
+ errpos= 3;
+ if (my_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP)))
+ {
+ my_errno=HA_ERR_CRASHED;
+ goto err;
+ }
+ len=mi_uint2korr(share->state.header.state_info_length);
+ keys= (uint) share->state.header.keys;
+ uniques= (uint) share->state.header.uniques;
+ fulltext_keys= (uint) share->state.header.fulltext_keys;
+ key_parts= mi_uint2korr(share->state.header.key_parts);
+ unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts);
+ if (len != MARIA_STATE_INFO_SIZE)
+ {
+ DBUG_PRINT("warning",
+ ("saved_state_info_length: %d state_info_length: %d",
+ len,MARIA_STATE_INFO_SIZE));
+ }
+ share->state_diff_length=len-MARIA_STATE_INFO_SIZE;
+
+ _ma_state_info_read(disk_cache, &share->state);
+ len= mi_uint2korr(share->state.header.base_info_length);
+ if (len != MARIA_BASE_INFO_SIZE)
+ {
+ DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d",
+ len,MARIA_BASE_INFO_SIZE));
+ }
+ disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base);
+ share->state.state_length=base_pos;
+
+ if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
+ ((share->state.changed & STATE_CRASHED) ||
+ ((open_flags & HA_OPEN_ABORT_IF_CRASHED) &&
+ (my_disable_locking && share->state.open_count))))
+ {
+ DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u "
+ "changed: %u open_count: %u !locking: %d",
+ open_flags, share->state.changed,
+ share->state.open_count, my_disable_locking));
+ my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
+ HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
+ goto err;
+ }
+
+ /* sanity check */
+ if (share->base.keystart > 65535 || share->base.rec_reflength > 8)
+ {
+ my_errno=HA_ERR_CRASHED;
+ goto err;
+ }
+
+ key_parts+=fulltext_keys*FT_SEGS;
+ if (share->base.max_key_length > maria_max_key_length() ||
+ keys > MARIA_MAX_KEY || key_parts > MARIA_MAX_KEY * HA_MAX_KEY_SEG)
+ {
+ DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts));
+ my_errno=HA_ERR_UNSUPPORTED;
+ goto err;
+ }
+ /*
+ If page cache is not initialized, then assume we will create the
+ page_cache after the table is opened!
+ This is only used by maria_check to allow it to check/repair tables
+ with different block sizes.
+ */
+ if (share->base.block_size != maria_block_size &&
+ share_buff.pagecache->inited != 0)
+ {
+ DBUG_PRINT("error", ("Wrong block size %u; Expected %u",
+ (uint) share->base.block_size,
+ (uint) maria_block_size));
+ my_errno=HA_ERR_UNSUPPORTED;
+ goto err;
+ }
+
+ /* Correct max_file_length based on length of sizeof(off_t) */
+ max_data_file_length=
+ (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ?
+ (((ulonglong) 1 << (share->base.rec_reflength*8))-1) :
+ (_ma_safe_mul(share->base.pack_reclength,
+ (ulonglong) 1 << (share->base.rec_reflength*8))-1);
+
+ max_key_file_length=
+ _ma_safe_mul(MARIA_MIN_KEY_BLOCK_LENGTH,
+ ((ulonglong) 1 << (share->base.key_reflength*8))-1);
+#if SIZEOF_OFF_T == 4
+ set_if_smaller(max_data_file_length, INT_MAX32);
+ set_if_smaller(max_key_file_length, INT_MAX32);
+#endif
+ share->base.max_data_file_length=(my_off_t) max_data_file_length;
+ share->base.max_key_file_length=(my_off_t) max_key_file_length;
+
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ share->base.max_key_length+=2; /* For safety */
+
+ if (!my_multi_malloc(MY_WME,
+ &share,sizeof(*share),
+ &share->state.rec_per_key_part,
+ sizeof(double) * key_parts,
+ &share->state.nulls_per_key_part,
+ sizeof(long)* key_parts,
+ &share->keyinfo,keys*sizeof(MARIA_KEYDEF),
+ &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF),
+ &share->keyparts,
+ (key_parts+unique_key_parts+keys+uniques) *
+ sizeof(HA_KEYSEG),
+ &share->columndef,
+ (share->base.fields+1)*sizeof(MARIA_COLUMNDEF),
+ &share->column_nr, share->base.fields*sizeof(uint16),
+ &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs,
+ &share->unique_file_name,strlen(name_buff)+1,
+ &share->index_file_name,strlen(index_name)+1,
+ &share->data_file_name,strlen(data_name)+1,
+ &share->open_file_name,strlen(name)+1,
+ &share->state.key_root,keys*sizeof(my_off_t),
+#ifdef THREAD
+ &share->key_root_lock,sizeof(rw_lock_t)*keys,
+#endif
+ &share->mmap_lock,sizeof(rw_lock_t),
+ NullS))
+ goto err;
+ errpos= 4;
+
+ *share=share_buff;
+ memcpy((char*) share->state.rec_per_key_part,
+ (char*) rec_per_key_part, sizeof(double)*key_parts);
+ memcpy((char*) share->state.nulls_per_key_part,
+ (char*) nulls_per_key_part, sizeof(long)*key_parts);
+ memcpy((char*) share->state.key_root,
+ (char*) key_root, sizeof(my_off_t)*keys);
+ strmov(share->unique_file_name, name_buff);
+ share->unique_name_length= strlen(name_buff);
+ strmov(share->index_file_name, index_name);
+ strmov(share->data_file_name, data_name);
+ strmov(share->open_file_name, name);
+
+ share->block_size= share->base.block_size; /* Convenience */
+ {
+ HA_KEYSEG *pos=share->keyparts;
+ for (i=0 ; i < keys ; i++)
+ {
+ share->keyinfo[i].share= share;
+ disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]);
+ share->keyinfo[i].key_nr= i;
+ disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE,
+ end_pos);
+ if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE)
+ share->have_rtree= 1;
+ share->keyinfo[i].seg=pos;
+ for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++)
+ {
+ disk_pos=_ma_keyseg_read(disk_pos, pos);
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (!pos->language)
+ pos->charset=default_charset_info;
+ else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
+ {
+ my_errno=HA_ERR_UNKNOWN_CHARSET;
+ goto err;
+ }
+ }
+ else if (pos->type == HA_KEYTYPE_BINARY)
+ pos->charset= &my_charset_bin;
+ }
+ if (share->keyinfo[i].flag & HA_SPATIAL)
+ {
+#ifdef HAVE_SPATIAL
+ uint sp_segs=SPDIMS*2;
+ share->keyinfo[i].seg=pos-sp_segs;
+ share->keyinfo[i].keysegs--;
+#else
+ my_errno=HA_ERR_UNSUPPORTED;
+ goto err;
+#endif
+ }
+ else if (share->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if (!fulltext_keys)
+ { /* 4.0 compatibility code, to be removed in 5.0 */
+ share->keyinfo[i].seg=pos-FT_SEGS;
+ share->keyinfo[i].keysegs-=FT_SEGS;
+ }
+ else
+ {
+ uint k;
+ share->keyinfo[i].seg=pos;
+ for (k=0; k < FT_SEGS; k++)
+ {
+ *pos= ft_keysegs[k];
+ pos[0].language= pos[-1].language;
+ if (!(pos[0].charset= pos[-1].charset))
+ {
+ my_errno=HA_ERR_CRASHED;
+ goto err;
+ }
+ pos++;
+ }
+ }
+ if (!share->ft2_keyinfo.seg)
+ {
+ memcpy(& share->ft2_keyinfo, & share->keyinfo[i], sizeof(MARIA_KEYDEF));
+ share->ft2_keyinfo.keysegs=1;
+ share->ft2_keyinfo.flag=0;
+ share->ft2_keyinfo.keylength=
+ share->ft2_keyinfo.minlength=
+ share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength;
+ share->ft2_keyinfo.seg=pos-1;
+ share->ft2_keyinfo.end=pos;
+ setup_key_functions(& share->ft2_keyinfo);
+ }
+ }
+ setup_key_functions(share->keyinfo+i);
+ share->keyinfo[i].end=pos;
+ pos->type=HA_KEYTYPE_END; /* End */
+ pos->length=share->base.rec_reflength;
+ pos->null_bit=0;
+ pos->flag=0; /* For purify */
+ pos++;
+ }
+ for (i=0 ; i < uniques ; i++)
+ {
+ disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]);
+ disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs *
+ HA_KEYSEG_SIZE, end_pos);
+ share->uniqueinfo[i].seg=pos;
+ for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++)
+ {
+ disk_pos=_ma_keyseg_read(disk_pos, pos);
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (!pos->language)
+ pos->charset=default_charset_info;
+ else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
+ {
+ my_errno=HA_ERR_UNKNOWN_CHARSET;
+ goto err;
+ }
+ }
+ }
+ share->uniqueinfo[i].end=pos;
+ pos->type=HA_KEYTYPE_END; /* End */
+ pos->null_bit=0;
+ pos->flag=0;
+ pos++;
+ }
+ share->ftparsers= 0;
+ }
+ share->data_file_type= share->state.header.data_file_type;
+ share->base_length= (BASE_ROW_HEADER_SIZE +
+ share->base.is_nulls_extended +
+ share->base.null_bytes +
+ share->base.pack_bytes +
+ test(share->options & HA_OPTION_CHECKSUM));
+ share->keypage_header= ((share->base.born_transactional ?
+ LSN_STORE_SIZE + TRANSID_SIZE :
+ 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE +
+ KEYPAGE_USED_SIZE);
+
+ if (open_flags & HA_OPEN_COPY)
+ {
+ /*
+ this instance will be a temporary one used just to create a data
+ file for REPAIR. Don't do logging. This base information will not go
+ to disk.
+ */
+ share->base.born_transactional= FALSE;
+ }
+ if (share->base.born_transactional)
+ {
+ share->page_type= PAGECACHE_LSN_PAGE;
+#ifdef ENABLE_WHEN_WE_HAVE_TRANS_ROW_ID /* QQ */
+ share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE;
+#endif
+ if (share->state.create_rename_lsn == LSN_REPAIRED_BY_MARIA_CHK)
+ {
+ /*
+ Was repaired with maria_chk, maybe later maria_pack-ed. Some sort of
+ import into the server. It starts its existence (from the point of
+ view of the server, including server's recovery) now.
+ */
+ if ((open_flags & HA_OPEN_FROM_SQL_LAYER) || maria_in_recovery)
+ _ma_update_create_rename_lsn_sub(share, translog_get_horizon(),
+ TRUE);
+ }
+ else if ((!LSN_VALID(share->state.create_rename_lsn) ||
+ !LSN_VALID(share->state.is_of_horizon) ||
+ (cmp_translog_addr(share->state.create_rename_lsn,
+ share->state.is_of_horizon) > 0)) &&
+ !(open_flags & HA_OPEN_FOR_REPAIR))
+ {
+ /*
+ If in Recovery, it will not work. If LSN is invalid and not
+ LSN_REPAIRED_BY_MARIA_CHK, header must be corrupted.
+ In both cases, must repair.
+ */
+ my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
+ HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
+ goto err;
+ }
+ }
+ else
+ share->page_type= PAGECACHE_PLAIN_PAGE;
+ share->now_transactional= share->base.born_transactional;
+
+ if (share->data_file_type == DYNAMIC_RECORD)
+ {
+ /* add bits used to pack data to pack_reclength for faster allocation */
+ share->base.pack_reclength+= share->base.pack_bytes;
+ share->base.extra_rec_buff_size=
+ (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH +
+ MARIA_REC_BUFF_OFFSET);
+ }
+ share->base.default_rec_buff_size= (max(share->base.pack_reclength,
+ share->base.max_key_length) +
+ share->base.extra_rec_buff_size);
+
+ if (share->data_file_type == COMPRESSED_RECORD)
+ {
+ /* Need some extra bytes for decode_bytes */
+ share->base.extra_rec_buff_size= 7;
+ }
+ disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE,
+ end_pos);
+ for (i= j= 0 ; i < share->base.fields ; i++)
+ {
+ disk_pos=_ma_columndef_read(disk_pos,&share->columndef[i]);
+ share->columndef[i].pack_type=0;
+ share->columndef[i].huff_tree=0;
+ if (share->columndef[i].type == (int) FIELD_BLOB)
+ {
+ share->blobs[j].pack_length=
+ share->columndef[i].length-portable_sizeof_char_ptr;;
+ share->blobs[j].offset= share->columndef[i].offset;
+ j++;
+ }
+ }
+ share->columndef[i].type=(int) FIELD_LAST; /* End marker */
+ disk_pos= _ma_column_nr_read(disk_pos, share->column_nr,
+ share->base.fields);
+
+ if ((share->data_file_type == BLOCK_RECORD ||
+ share->data_file_type == COMPRESSED_RECORD))
+ {
+ if (_ma_open_datafile(&info, share, -1))
+ goto err;
+ data_file= info.dfile.file;
+ }
+ errpos= 5;
+
+ if (open_flags & HA_OPEN_DELAY_KEY_WRITE)
+ share->options|= HA_OPTION_DELAY_KEY_WRITE;
+ if (mode == O_RDONLY)
+ share->options|= HA_OPTION_READ_ONLY_DATA;
+ share->is_log_table= FALSE;
+
+ if (open_flags & HA_OPEN_TMP_TABLE)
+ {
+ share->options|= HA_OPTION_TMP_TABLE;
+ share->temporary= share->delay_key_write= 1;
+ share->write_flag=MYF(MY_NABP);
+ share->w_locks++; /* We don't have to update status */
+ share->tot_locks++;
+ }
+
+ share->kfile.file= kfile;
+ set_index_pagecache_callbacks(&share->kfile, share);
+ share->this_process=(ulong) getpid();
+ share->last_process= share->state.process;
+ share->base.key_parts=key_parts;
+ share->base.all_key_parts=key_parts+unique_key_parts;
+ if (!(share->last_version=share->state.version))
+ share->last_version=1; /* Safety */
+ share->rec_reflength=share->base.rec_reflength; /* May be changed */
+ share->base.margin_key_file_length=(share->base.max_key_file_length -
+ (keys ? MARIA_INDEX_BLOCK_MARGIN *
+ share->block_size * keys : 0));
+ share->block_size= share->base.block_size;
+ my_afree(disk_cache);
+ _ma_setup_functions(share);
+ if ((*share->once_init)(share, info.dfile.file))
+ goto err;
+
+#ifdef THREAD
+ thr_lock_init(&share->lock);
+ VOID(pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST));
+ VOID(pthread_cond_init(&share->intern_cond, 0));
+ for (i=0; i<keys; i++)
+ VOID(my_rwlock_init(&share->key_root_lock[i], NULL));
+ VOID(my_rwlock_init(&share->mmap_lock, NULL));
+ if (!thr_lock_inited)
+ {
+ /* Probably a single threaded program; Don't use concurrent inserts */
+ maria_concurrent_insert=0;
+ }
+ else if (maria_concurrent_insert)
+ {
+ share->concurrent_insert=
+ ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE |
+ HA_OPTION_COMPRESS_RECORD |
+ HA_OPTION_TEMP_COMPRESS_RECORD)) ||
+ (open_flags & HA_OPEN_TMP_TABLE) ||
+ share->data_file_type == BLOCK_RECORD ||
+ share->have_rtree) ? 0 : 1;
+ if (share->concurrent_insert)
+ {
+ share->lock.get_status=_ma_get_status;
+ share->lock.copy_status=_ma_copy_status;
+ /**
+ @todo RECOVERY
+ INSERT DELAYED and concurrent inserts are currently disabled for
+ transactional tables; when enabled again, we should re-evaluate
+ what problems the call to _ma_update_status() by
+ thr_reschedule_write_lock() can do (it may hurt Checkpoint as it
+ would be without intern_lock, and it modifies the state).
+ */
+ share->lock.update_status=_ma_update_status;
+ share->lock.restore_status=_ma_restore_status;
+ share->lock.check_status=_ma_check_status;
+ }
+ }
+#endif
+ /*
+ Memory mapping can only be requested after initializing intern_lock.
+ */
+ if (open_flags & HA_OPEN_MMAP)
+ {
+ info.s= share;
+ maria_extra(&info, HA_EXTRA_MMAP, 0);
+ }
+ }
+ else
+ {
+ share= old_info->s;
+ if (share->data_file_type == BLOCK_RECORD)
+ data_file= share->bitmap.file.file; /* Only opened once */
+ }
+
+ if (!(m_info= maria_clone_internal(share, mode, data_file)))
+ goto err;
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ DBUG_RETURN(m_info);
+
+err:
+ save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
+ if ((save_errno == HA_ERR_CRASHED) ||
+ (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
+ (save_errno == HA_ERR_CRASHED_ON_REPAIR))
+ _ma_report_error(save_errno, name);
+ switch (errpos) {
+ case 5:
+ if (data_file >= 0)
+ VOID(my_close(data_file, MYF(0)));
+ if (old_info)
+ break; /* Don't remove open table */
+ (*share->once_end)(share);
+ /* fall through */
+ case 4:
+ my_free((uchar*) share,MYF(0));
+ /* fall through */
+ case 3:
+ /* fall through */
+ case 2:
+ my_afree((uchar*) disk_cache);
+ /* fall through */
+ case 1:
+ VOID(my_close(kfile,MYF(0)));
+ /* fall through */
+ case 0:
+ default:
+ break;
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ my_errno= save_errno;
+ DBUG_RETURN (NULL);
+} /* maria_open */
+
+
+/*
+ Reallocate a buffer, if the current buffer is not large enough
+*/
+
+my_bool _ma_alloc_buffer(uchar **old_addr, size_t *old_size,
+ size_t new_size)
+{
+ if (*old_size < new_size)
+ {
+ uchar *addr;
+ if (!(addr= (uchar*) my_realloc((uchar*) *old_addr, new_size,
+ MYF(MY_ALLOW_ZERO_PTR))))
+ return 1;
+ *old_addr= addr;
+ *old_size= new_size;
+ }
+ return 0;
+}
+
+
+ulonglong _ma_safe_mul(ulonglong a, ulonglong b)
+{
+ ulonglong max_val= ~ (ulonglong) 0; /* my_off_t is unsigned */
+
+ if (!a || max_val / a < b)
+ return max_val;
+ return a*b;
+}
+
+ /* Set up functions in structs */
+
+void _ma_setup_functions(register MARIA_SHARE *share)
+{
+ share->once_init= maria_once_init_dummy;
+ share->once_end= maria_once_end_dummy;
+ share->init= maria_scan_init_dummy;
+ share->end= maria_scan_end_dummy;
+ share->scan_init= maria_scan_init_dummy;/* Compat. dummy function */
+ share->scan_end= maria_scan_end_dummy;/* Compat. dummy function */
+ share->scan_remember_pos= _ma_def_scan_remember_pos;
+ share->scan_restore_pos= _ma_def_scan_restore_pos;
+
+ share->write_record_init= _ma_write_init_default;
+ share->write_record_abort= _ma_write_abort_default;
+ share->keypos_to_recpos= _ma_transparent_recpos;
+ share->recpos_to_keypos= _ma_transparent_recpos;
+
+ switch (share->data_file_type) {
+ case COMPRESSED_RECORD:
+ share->read_record= _ma_read_pack_record;
+ share->scan= _ma_read_rnd_pack_record;
+ share->once_init= _ma_once_init_pack_row;
+ share->once_end= _ma_once_end_pack_row;
+ /*
+ Calculate checksum according to data in the original, not compressed,
+ row.
+ */
+ if (share->state.header.org_data_file_type == STATIC_RECORD &&
+ ! (share->options & HA_OPTION_NULL_FIELDS))
+ share->calc_checksum= _ma_static_checksum;
+ else
+ share->calc_checksum= _ma_checksum;
+ share->calc_write_checksum= share->calc_checksum;
+ break;
+ case DYNAMIC_RECORD:
+ share->read_record= _ma_read_dynamic_record;
+ share->scan= _ma_read_rnd_dynamic_record;
+ share->delete_record= _ma_delete_dynamic_record;
+ share->compare_record= _ma_cmp_dynamic_record;
+ share->compare_unique= _ma_cmp_dynamic_unique;
+ share->calc_checksum= share->calc_write_checksum= _ma_checksum;
+ if (share->base.blobs)
+ {
+ share->update_record= _ma_update_blob_record;
+ share->write_record= _ma_write_blob_record;
+ }
+ else
+ {
+ share->write_record= _ma_write_dynamic_record;
+ share->update_record= _ma_update_dynamic_record;
+ }
+ break;
+ case STATIC_RECORD:
+ share->read_record= _ma_read_static_record;
+ share->scan= _ma_read_rnd_static_record;
+ share->delete_record= _ma_delete_static_record;
+ share->compare_record= _ma_cmp_static_record;
+ share->update_record= _ma_update_static_record;
+ share->write_record= _ma_write_static_record;
+ share->compare_unique= _ma_cmp_static_unique;
+ share->keypos_to_recpos= _ma_static_keypos_to_recpos;
+ share->recpos_to_keypos= _ma_static_recpos_to_keypos;
+ if (share->state.header.org_data_file_type == STATIC_RECORD &&
+ ! (share->options & HA_OPTION_NULL_FIELDS))
+ share->calc_checksum= _ma_static_checksum;
+ else
+ share->calc_checksum= _ma_checksum;
+ break;
+ case BLOCK_RECORD:
+ share->once_init= _ma_once_init_block_record;
+ share->once_end= _ma_once_end_block_record;
+ share->init= _ma_init_block_record;
+ share->end= _ma_end_block_record;
+ share->write_record_init= _ma_write_init_block_record;
+ share->write_record_abort= _ma_write_abort_block_record;
+ share->scan_init= _ma_scan_init_block_record;
+ share->scan_end= _ma_scan_end_block_record;
+ share->scan= _ma_scan_block_record;
+ share->scan_remember_pos= _ma_scan_remember_block_record;
+ share->scan_restore_pos= _ma_scan_restore_block_record;
+ share->read_record= _ma_read_block_record;
+ share->delete_record= _ma_delete_block_record;
+ share->compare_record= _ma_compare_block_record;
+ share->update_record= _ma_update_block_record;
+ share->write_record= _ma_write_block_record;
+ share->compare_unique= _ma_cmp_block_unique;
+ share->calc_checksum= _ma_checksum;
+ share->keypos_to_recpos= _ma_transaction_keypos_to_recpos;
+ share->recpos_to_keypos= _ma_transaction_recpos_to_keypos;
+
+ /*
+ write_block_record() will calculate the checksum; Tell maria_write()
+ that it doesn't have to do this.
+ */
+ share->calc_write_checksum= 0;
+ break;
+ }
+ share->file_read= _ma_nommap_pread;
+ share->file_write= _ma_nommap_pwrite;
+ share->calc_check_checksum= share->calc_checksum;
+
+ if (!(share->options & HA_OPTION_CHECKSUM) &&
+ share->data_file_type != COMPRESSED_RECORD)
+ share->calc_checksum= share->calc_write_checksum= 0;
+ return;
+}
+
+
+static void setup_key_functions(register MARIA_KEYDEF *keyinfo)
+{
+ if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
+ {
+#ifdef HAVE_RTREE_KEYS
+ keyinfo->ck_insert = maria_rtree_insert;
+ keyinfo->ck_delete = maria_rtree_delete;
+#else
+ DBUG_ASSERT(0); /* maria_open should check it never happens */
+#endif
+ }
+ else
+ {
+ keyinfo->ck_insert = _ma_ck_write;
+ keyinfo->ck_delete = _ma_ck_delete;
+ }
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ { /* Simple prefix compression */
+ keyinfo->bin_search= _ma_seq_search;
+ keyinfo->get_key= _ma_get_binary_pack_key;
+ keyinfo->pack_key= _ma_calc_bin_pack_key_length;
+ keyinfo->store_key= _ma_store_bin_pack_key;
+ }
+ else if (keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ keyinfo->get_key= _ma_get_pack_key;
+ if (keyinfo->seg[0].flag & HA_PACK_KEY)
+ { /* Prefix compression */
+ if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) ||
+ (keyinfo->seg->flag & HA_NULL_PART) ||
+ keyinfo->seg->charset->mbminlen > 1)
+ keyinfo->bin_search= _ma_seq_search;
+ else
+ keyinfo->bin_search= _ma_prefix_search;
+ keyinfo->pack_key= _ma_calc_var_pack_key_length;
+ keyinfo->store_key= _ma_store_var_pack_key;
+ }
+ else
+ {
+ keyinfo->bin_search= _ma_seq_search;
+ keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */
+ keyinfo->store_key= _ma_store_static_key;
+ }
+ }
+ else
+ {
+ keyinfo->bin_search= _ma_bin_search;
+ keyinfo->get_key= _ma_get_static_key;
+ keyinfo->pack_key= _ma_calc_static_key_length;
+ keyinfo->store_key= _ma_store_static_key;
+ }
+
+ /* set keyinfo->write_comp_flag */
+ if (keyinfo->flag & HA_SORT_ALLOWS_SAME)
+ keyinfo->write_comp_flag=SEARCH_BIGGER; /* Put after same key */
+ else if (keyinfo->flag & ( HA_NOSAME | HA_FULLTEXT))
+ {
+ keyinfo->write_comp_flag= SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
+ if (keyinfo->flag & HA_NULL_ARE_EQUAL)
+ keyinfo->write_comp_flag|= SEARCH_NULL_ARE_EQUAL;
+ }
+ else
+ keyinfo->write_comp_flag= SEARCH_SAME; /* Keys in rec-pos order */
+ return;
+}
+
+
+/**
+ @brief Function to save and store the header in the index file (.MYI)
+
+ Operates under MARIA_SHARE::intern_lock if requested.
+ Sets MARIA_SHARE::MARIA_STATE_INFO::is_of_horizon if transactional table.
+ Then calls _ma_state_info_write_sub().
+
+ @param share table
+ @param pWrite bitmap: if 1 is set my_pwrite() is used otherwise
+ my_write(); if 2 is set, info about keys is written
+ (should only be needed after ALTER TABLE
+ ENABLE/DISABLE KEYS, and REPAIR/OPTIMIZE); if 4 is
+ set, MARIA_SHARE::intern_lock is taken.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
+{
+ uint res;
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ return 0;
+
+ if (pWrite & 4)
+ pthread_mutex_lock(&share->intern_lock);
+ else if (maria_multi_threaded)
+ {
+ safe_mutex_assert_owner(&share->intern_lock);
+ }
+ if (share->base.born_transactional && translog_status == TRANSLOG_OK &&
+ !maria_in_recovery)
+ {
+ /*
+ In a recovery, we want to set is_of_horizon to the LSN of the last
+ record executed by Recovery, not the current EOF of the log (which
+ is too new). Recovery does it by itself.
+ */
+ share->state.is_of_horizon= translog_get_horizon();
+ DBUG_PRINT("info", ("is_of_horizon set to LSN (%lu,0x%lx)",
+ LSN_IN_PARTS(share->state.is_of_horizon)));
+ }
+ res= _ma_state_info_write_sub(share->kfile.file, &share->state, pWrite);
+ if (pWrite & 4)
+ pthread_mutex_unlock(&share->intern_lock);
+ return res;
+}
+
+
+/**
+ @brief Function to save and store the header in the index file (.MYI).
+
+ Shortcut to use instead of _ma_state_info_write() when appropriate.
+
+ @param file descriptor of the index file to write
+ @param state state information to write to the file
+ @param pWrite bitmap: if 1 is set my_pwrite() is used otherwise
+ my_write(); if 2 is set, info about keys is written
+ (should only be needed after ALTER TABLE
+ ENABLE/DISABLE KEYS, and REPAIR/OPTIMIZE).
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite)
+{
+ uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
+ uchar *ptr=buff;
+ uint i, keys= (uint) state->header.keys;
+ size_t res;
+ DBUG_ENTER("_ma_state_info_write_sub");
+
+ memcpy_fixed(ptr,&state->header,sizeof(state->header));
+ ptr+=sizeof(state->header);
+
+ /* open_count must be first because of _ma_mark_file_changed ! */
+ mi_int2store(ptr,state->open_count); ptr+= 2;
+ /*
+ if you change the offset of create_rename_lsn/is_of_horizon inside the
+ index file's header, fix ma_create + ma_rename + ma_delete_all +
+ backward-compatibility.
+ */
+ lsn_store(ptr, state->create_rename_lsn); ptr+= LSN_STORE_SIZE;
+ lsn_store(ptr, state->is_of_horizon); ptr+= LSN_STORE_SIZE;
+ *ptr++= (uchar)state->changed;
+ *ptr++= state->sortkey;
+ mi_rowstore(ptr,state->state.records); ptr+= 8;
+ mi_rowstore(ptr,state->state.del); ptr+= 8;
+ mi_rowstore(ptr,state->split); ptr+= 8;
+ mi_sizestore(ptr,state->dellink); ptr+= 8;
+ mi_sizestore(ptr,state->first_bitmap_with_space); ptr+= 8;
+ mi_sizestore(ptr,state->state.key_file_length); ptr+= 8;
+ mi_sizestore(ptr,state->state.data_file_length); ptr+= 8;
+ mi_sizestore(ptr,state->state.empty); ptr+= 8;
+ mi_sizestore(ptr,state->state.key_empty); ptr+= 8;
+ mi_int8store(ptr,state->auto_increment); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->state.checksum); ptr+= 8;
+ mi_int4store(ptr,state->process); ptr+= 4;
+ mi_int4store(ptr,state->unique); ptr+= 4;
+ mi_int4store(ptr,state->status); ptr+= 4;
+ mi_int4store(ptr,state->update_count); ptr+= 4;
+
+ ptr+= state->state_diff_length;
+
+ for (i=0; i < keys; i++)
+ {
+ mi_sizestore(ptr,state->key_root[i]); ptr+= 8;
+ }
+ mi_sizestore(ptr,state->key_del); ptr+= 8;
+ if (pWrite & 2) /* From maria_chk */
+ {
+ uint key_parts= mi_uint2korr(state->header.key_parts);
+ mi_int4store(ptr,state->sec_index_changed); ptr+= 4;
+ mi_int4store(ptr,state->sec_index_used); ptr+= 4;
+ mi_int4store(ptr,state->version); ptr+= 4;
+ mi_int8store(ptr,state->key_map); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->create_time); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->recover_time); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->check_time); ptr+= 8;
+ mi_sizestore(ptr, state->records_at_analyze); ptr+= 8;
+ /* reserve place for some information per key */
+ bzero(ptr, keys*4); ptr+= keys*4;
+ for (i=0 ; i < key_parts ; i++)
+ {
+ float8store(ptr, state->rec_per_key_part[i]); ptr+= 8;
+ mi_int4store(ptr, state->nulls_per_key_part[i]); ptr+= 4;
+ }
+ }
+
+ res= (pWrite & 1) ?
+ my_pwrite(file, buff, (size_t) (ptr-buff), 0L,
+ MYF(MY_NABP | MY_THREADSAFE)) :
+ my_write(file, buff, (size_t) (ptr-buff),
+ MYF(MY_NABP));
+ DBUG_RETURN(res != 0);
+}
+
+
+static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state)
+{
+ uint i,keys,key_parts;
+ memcpy_fixed(&state->header,ptr, sizeof(state->header));
+ ptr+= sizeof(state->header);
+ keys= (uint) state->header.keys;
+ key_parts= mi_uint2korr(state->header.key_parts);
+
+ state->open_count = mi_uint2korr(ptr); ptr+= 2;
+ state->create_rename_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
+ state->is_of_horizon= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
+ state->changed= (my_bool) *ptr++;
+ state->sortkey= (uint) *ptr++;
+ state->state.records= mi_rowkorr(ptr); ptr+= 8;
+ state->state.del = mi_rowkorr(ptr); ptr+= 8;
+ state->split = mi_rowkorr(ptr); ptr+= 8;
+ state->dellink= mi_sizekorr(ptr); ptr+= 8;
+ state->first_bitmap_with_space= mi_sizekorr(ptr); ptr+= 8;
+ state->state.key_file_length = mi_sizekorr(ptr); ptr+= 8;
+ state->state.data_file_length= mi_sizekorr(ptr); ptr+= 8;
+ state->state.empty = mi_sizekorr(ptr); ptr+= 8;
+ state->state.key_empty= mi_sizekorr(ptr); ptr+= 8;
+ state->auto_increment=mi_uint8korr(ptr); ptr+= 8;
+ state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8;
+ /* Not used (legacy from MyISAM) */
+ state->process= mi_uint4korr(ptr); ptr+= 4;
+ /* Not used (legacy from MyISAM) */
+ state->unique = mi_uint4korr(ptr); ptr+= 4;
+ state->status = mi_uint4korr(ptr); ptr+= 4;
+ state->update_count=mi_uint4korr(ptr); ptr+= 4;
+
+ ptr+= state->state_diff_length;
+
+ for (i=0; i < keys; i++)
+ {
+ state->key_root[i]= mi_sizekorr(ptr); ptr+= 8;
+ }
+ state->key_del= mi_sizekorr(ptr); ptr+= 8;
+ state->sec_index_changed = mi_uint4korr(ptr); ptr+= 4;
+ state->sec_index_used = mi_uint4korr(ptr); ptr+= 4;
+ state->version = mi_uint4korr(ptr); ptr+= 4;
+ state->key_map = mi_uint8korr(ptr); ptr+= 8;
+ state->create_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->recover_time =(time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->check_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->records_at_analyze= mi_sizekorr(ptr); ptr+= 8;
+ ptr+= keys * 4; /* Skip reserved bytes */
+ for (i=0 ; i < key_parts ; i++)
+ {
+ float8get(state->rec_per_key_part[i], ptr); ptr+= 8;
+ state->nulls_per_key_part[i]= mi_uint4korr(ptr); ptr+= 4;
+ }
+ return ptr;
+}
+
+
+/**
+ @brief Fills the state by reading its copy on disk.
+
+ Should not be called for transactional tables, as their state on disk is
+ rarely current and so is often misleading for a reader.
+ Does nothing in single user mode.
+
+ @param file file to read from
+ @param state state which will be filled
+*/
+
+uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state)
+{
+ uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
+
+ /* trick to detect transactional tables */
+ DBUG_ASSERT(state->create_rename_lsn == LSN_IMPOSSIBLE);
+ if (!maria_single_user)
+ {
+ if (my_pread(file, buff, state->state_length, 0L, MYF(MY_NABP)))
+ return 1;
+ _ma_state_info_read(buff, state);
+ }
+ return 0;
+}
+
+
+/****************************************************************************
+** store and read of MARIA_BASE_INFO
+****************************************************************************/
+
+uint _ma_base_info_write(File file, MARIA_BASE_INFO *base)
+{
+ uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff;
+
+ bmove(ptr, maria_uuid, MY_UUID_SIZE);
+ ptr+= MY_UUID_SIZE;
+ mi_sizestore(ptr,base->keystart); ptr+= 8;
+ mi_sizestore(ptr,base->max_data_file_length); ptr+= 8;
+ mi_sizestore(ptr,base->max_key_file_length); ptr+= 8;
+ mi_rowstore(ptr,base->records); ptr+= 8;
+ mi_rowstore(ptr,base->reloc); ptr+= 8;
+ mi_int4store(ptr,base->mean_row_length); ptr+= 4;
+ mi_int4store(ptr,base->reclength); ptr+= 4;
+ mi_int4store(ptr,base->pack_reclength); ptr+= 4;
+ mi_int4store(ptr,base->min_pack_length); ptr+= 4;
+ mi_int4store(ptr,base->max_pack_length); ptr+= 4;
+ mi_int4store(ptr,base->min_block_length); ptr+= 4;
+ mi_int2store(ptr,base->fields); ptr+= 2;
+ mi_int2store(ptr,base->fixed_not_null_fields); ptr+= 2;
+ mi_int2store(ptr,base->fixed_not_null_fields_length); ptr+= 2;
+ mi_int2store(ptr,base->max_field_lengths); ptr+= 2;
+ mi_int2store(ptr,base->pack_fields); ptr+= 2;
+ mi_int2store(ptr,base->extra_options) ptr+= 2;
+ mi_int2store(ptr,base->null_bytes); ptr+= 2;
+ mi_int2store(ptr,base->original_null_bytes); ptr+= 2;
+ mi_int2store(ptr,base->field_offsets); ptr+= 2;
+ mi_int2store(ptr,0); ptr+= 2; /* reserved */
+ mi_int2store(ptr,base->block_size); ptr+= 2;
+ *ptr++= base->rec_reflength;
+ *ptr++= base->key_reflength;
+ *ptr++= base->keys;
+ *ptr++= base->auto_key;
+ *ptr++= base->born_transactional;
+ *ptr++= 0; /* Reserved */
+ mi_int2store(ptr,base->pack_bytes); ptr+= 2;
+ mi_int2store(ptr,base->blobs); ptr+= 2;
+ mi_int2store(ptr,base->max_key_block_length); ptr+= 2;
+ mi_int2store(ptr,base->max_key_length); ptr+= 2;
+ mi_int2store(ptr,base->extra_alloc_bytes); ptr+= 2;
+ *ptr++= base->extra_alloc_procent;
+ bzero(ptr,16); ptr+= 16; /* extra */
+ DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE);
+ return my_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+
+static uchar *_ma_base_info_read(uchar *ptr, MARIA_BASE_INFO *base)
+{
+ bmove(base->uuid, ptr, MY_UUID_SIZE); ptr+= MY_UUID_SIZE;
+ base->keystart= mi_sizekorr(ptr); ptr+= 8;
+ base->max_data_file_length= mi_sizekorr(ptr); ptr+= 8;
+ base->max_key_file_length= mi_sizekorr(ptr); ptr+= 8;
+ base->records= (ha_rows) mi_sizekorr(ptr); ptr+= 8;
+ base->reloc= (ha_rows) mi_sizekorr(ptr); ptr+= 8;
+ base->mean_row_length= mi_uint4korr(ptr); ptr+= 4;
+ base->reclength= mi_uint4korr(ptr); ptr+= 4;
+ base->pack_reclength= mi_uint4korr(ptr); ptr+= 4;
+ base->min_pack_length= mi_uint4korr(ptr); ptr+= 4;
+ base->max_pack_length= mi_uint4korr(ptr); ptr+= 4;
+ base->min_block_length= mi_uint4korr(ptr); ptr+= 4;
+ base->fields= mi_uint2korr(ptr); ptr+= 2;
+ base->fixed_not_null_fields= mi_uint2korr(ptr); ptr+= 2;
+ base->fixed_not_null_fields_length= mi_uint2korr(ptr);ptr+= 2;
+ base->max_field_lengths= mi_uint2korr(ptr); ptr+= 2;
+ base->pack_fields= mi_uint2korr(ptr); ptr+= 2;
+ base->extra_options= mi_uint2korr(ptr); ptr+= 2;
+ base->null_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->original_null_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->field_offsets= mi_uint2korr(ptr); ptr+= 2;
+ ptr+= 2;
+ base->block_size= mi_uint2korr(ptr); ptr+= 2;
+
+ base->rec_reflength= *ptr++;
+ base->key_reflength= *ptr++;
+ base->keys= *ptr++;
+ base->auto_key= *ptr++;
+ base->born_transactional= *ptr++;
+ ptr++;
+ base->pack_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->blobs= mi_uint2korr(ptr); ptr+= 2;
+ base->max_key_block_length= mi_uint2korr(ptr); ptr+= 2;
+ base->max_key_length= mi_uint2korr(ptr); ptr+= 2;
+ base->extra_alloc_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->extra_alloc_procent= *ptr++;
+ ptr+= 16;
+ return ptr;
+}
+
+/*--------------------------------------------------------------------------
+ maria_keydef
+---------------------------------------------------------------------------*/
+
+my_bool _ma_keydef_write(File file, MARIA_KEYDEF *keydef)
+{
+ uchar buff[MARIA_KEYDEF_SIZE];
+ uchar *ptr=buff;
+
+ *ptr++= (uchar) keydef->keysegs;
+ *ptr++= keydef->key_alg; /* Rtree or Btree */
+ mi_int2store(ptr,keydef->flag); ptr+= 2;
+ mi_int2store(ptr,keydef->block_length); ptr+= 2;
+ mi_int2store(ptr,keydef->keylength); ptr+= 2;
+ mi_int2store(ptr,keydef->minlength); ptr+= 2;
+ mi_int2store(ptr,keydef->maxlength); ptr+= 2;
+ return my_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef)
+{
+ keydef->keysegs = (uint) *ptr++;
+ keydef->key_alg = *ptr++; /* Rtree or Btree */
+
+ keydef->flag = mi_uint2korr(ptr); ptr+= 2;
+ keydef->block_length = mi_uint2korr(ptr); ptr+= 2;
+ keydef->keylength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->minlength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->maxlength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->underflow_block_length=keydef->block_length/3;
+ keydef->version = 0; /* Not saved */
+ keydef->parser = &ft_default_parser;
+ keydef->ftparser_nr = 0;
+ return ptr;
+}
+
+/***************************************************************************
+** maria_keyseg
+***************************************************************************/
+
+my_bool _ma_keyseg_write(File file, const HA_KEYSEG *keyseg)
+{
+ uchar buff[HA_KEYSEG_SIZE];
+ uchar *ptr=buff;
+ ulong pos;
+
+ *ptr++= keyseg->type;
+ *ptr++= keyseg->language;
+ *ptr++= keyseg->null_bit;
+ *ptr++= keyseg->bit_start;
+ *ptr++= keyseg->bit_end;
+ *ptr++= keyseg->bit_length;
+ mi_int2store(ptr,keyseg->flag); ptr+= 2;
+ mi_int2store(ptr,keyseg->length); ptr+= 2;
+ mi_int4store(ptr,keyseg->start); ptr+= 4;
+ pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos;
+ mi_int4store(ptr, pos);
+ ptr+=4;
+
+ return my_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+
+uchar *_ma_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg)
+{
+ keyseg->type = *ptr++;
+ keyseg->language = *ptr++;
+ keyseg->null_bit = *ptr++;
+ keyseg->bit_start = *ptr++;
+ keyseg->bit_end = *ptr++;
+ keyseg->bit_length = *ptr++;
+ keyseg->flag = mi_uint2korr(ptr); ptr+= 2;
+ keyseg->length = mi_uint2korr(ptr); ptr+= 2;
+ keyseg->start = mi_uint4korr(ptr); ptr+= 4;
+ keyseg->null_pos = mi_uint4korr(ptr); ptr+= 4;
+ keyseg->charset=0; /* Will be filled in later */
+ if (keyseg->null_bit)
+ keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7));
+ else
+ {
+ keyseg->bit_pos= (uint16)keyseg->null_pos;
+ keyseg->null_pos= 0;
+ }
+ return ptr;
+}
+
+/*--------------------------------------------------------------------------
+ maria_uniquedef
+---------------------------------------------------------------------------*/
+
+my_bool _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def)
+{
+ uchar buff[MARIA_UNIQUEDEF_SIZE];
+ uchar *ptr=buff;
+
+ mi_int2store(ptr,def->keysegs); ptr+=2;
+ *ptr++= (uchar) def->key;
+ *ptr++ = (uchar) def->null_are_equal;
+
+ return my_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+uchar *_ma_uniquedef_read(uchar *ptr, MARIA_UNIQUEDEF *def)
+{
+ def->keysegs = mi_uint2korr(ptr);
+ def->key = ptr[2];
+ def->null_are_equal=ptr[3];
+ return ptr+4; /* 1 extra uchar */
+}
+
+/***************************************************************************
+** MARIA_COLUMNDEF
+***************************************************************************/
+
+my_bool _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef)
+{
+ uchar buff[MARIA_COLUMNDEF_SIZE];
+ uchar *ptr=buff;
+
+ mi_int2store(ptr,(ulong) columndef->column_nr); ptr+= 2;
+ mi_int2store(ptr,(ulong) columndef->offset); ptr+= 2;
+ mi_int2store(ptr,columndef->type); ptr+= 2;
+ mi_int2store(ptr,columndef->length); ptr+= 2;
+ mi_int2store(ptr,columndef->fill_length); ptr+= 2;
+ mi_int2store(ptr,columndef->null_pos); ptr+= 2;
+ mi_int2store(ptr,columndef->empty_pos); ptr+= 2;
+
+ (*ptr++)= columndef->null_bit;
+ (*ptr++)= columndef->empty_bit;
+ ptr[0]= ptr[1]= ptr[2]= ptr[3]= 0; ptr+= 4; /* For future */
+ return my_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+uchar *_ma_columndef_read(uchar *ptr, MARIA_COLUMNDEF *columndef)
+{
+ columndef->column_nr= mi_uint2korr(ptr); ptr+= 2;
+ columndef->offset= mi_uint2korr(ptr); ptr+= 2;
+ columndef->type= mi_sint2korr(ptr); ptr+= 2;
+ columndef->length= mi_uint2korr(ptr); ptr+= 2;
+ columndef->fill_length= mi_uint2korr(ptr); ptr+= 2;
+ columndef->null_pos= mi_uint2korr(ptr); ptr+= 2;
+ columndef->empty_pos= mi_uint2korr(ptr); ptr+= 2;
+ columndef->null_bit= (uint8) *ptr++;
+ columndef->empty_bit= (uint8) *ptr++;
+ ptr+= 4;
+ return ptr;
+}
+
+my_bool _ma_column_nr_write(File file, uint16 *offsets, uint columns)
+{
+ uchar *buff, *ptr, *end;
+ size_t size= columns*2;
+ my_bool res;
+
+ if (!(buff= (uchar*) my_alloca(size)))
+ return 1;
+ for (ptr= buff, end= ptr + size; ptr < end ; ptr+= 2, offsets++)
+ int2store(ptr, *offsets);
+ res= my_write(file, buff, size, MYF(MY_NABP)) != 0;
+ my_afree(buff);
+ return res;
+}
+
+
+uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns)
+{
+ uchar *end;
+ size_t size= columns*2;
+ for (end= ptr + size; ptr < end ; ptr+=2, offsets++)
+ *offsets= uint2korr(ptr);
+ return ptr;
+}
+
+
+static void set_data_pagecache_callbacks(PAGECACHE_FILE *file,
+ MARIA_SHARE *share)
+{
+ file->callback_data= (uchar*) share;
+ file->write_fail= &maria_page_write_failure;
+ if (share->temporary)
+ {
+ file->read_callback= &maria_page_crc_check_none;
+ file->write_callback= &maria_page_filler_set_none;
+ }
+ else
+ {
+ file->read_callback= &maria_page_crc_check_data;
+ if (share->options & HA_OPTION_PAGE_CHECKSUM)
+ file->write_callback= &maria_page_crc_set_normal;
+ else
+ file->write_callback= &maria_page_filler_set_normal;
+ }
+}
+
+
+static void set_index_pagecache_callbacks(PAGECACHE_FILE *file,
+ MARIA_SHARE *share)
+{
+ file->callback_data= (uchar*) share;
+ if (share->temporary)
+ {
+ file->read_callback= &maria_page_crc_check_none;
+ file->write_callback= &maria_page_filler_set_none;
+ }
+ else
+ {
+ file->read_callback= &maria_page_crc_check_index;
+ if (share->options & HA_OPTION_PAGE_CHECKSUM)
+ file->write_callback= &maria_page_crc_set_index;
+ else
+ file->write_callback= &maria_page_filler_set_normal;
+ }
+}
+
+
+/**************************************************************************
+ Open data file
+ We can't use dup() here as the data file descriptors need to have different
+ active seek-positions.
+
+ The argument file_to_dup is here for the future if there would on some OS
+ exist a dup()-like call that would give us two different file descriptors.
+*************************************************************************/
+
+int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share,
+ File file_to_dup __attribute__((unused)))
+{
+ info->dfile.file= share->bitmap.file.file=
+ my_open(share->data_file_name, share->mode | O_SHARE,
+ MYF(MY_WME));
+ return info->dfile.file >= 0 ? 0 : 1;
+}
+
+
+int _ma_open_keyfile(MARIA_SHARE *share)
+{
+ /*
+ Modifications to share->kfile should be under intern_lock to protect
+ against a concurrent checkpoint.
+ */
+ pthread_mutex_lock(&share->intern_lock);
+ share->kfile.file= my_open(share->unique_file_name,
+ share->mode | O_SHARE,
+ MYF(MY_WME));
+ pthread_mutex_unlock(&share->intern_lock);
+ return (share->kfile.file < 0);
+}
+
+
+/*
+ Disable all indexes.
+
+ SYNOPSIS
+ maria_disable_indexes()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Disable all indexes.
+
+ RETURN
+ 0 ok
+*/
+
+int maria_disable_indexes(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ maria_clear_all_keys_active(share->state.key_map);
+ return 0;
+}
+
+
+/*
+ Enable all indexes
+
+ SYNOPSIS
+ maria_enable_indexes()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Enable all indexes. The indexes might have been disabled
+ by maria_disable_index() before.
+ The function works only if both data and indexes are empty,
+ otherwise a repair is required.
+ To be sure, call handler::delete_all_rows() before.
+
+ RETURN
+ 0 ok
+ HA_ERR_CRASHED data or index is non-empty.
+*/
+
+int maria_enable_indexes(MARIA_HA *info)
+{
+ int error= 0;
+ MARIA_SHARE *share= info->s;
+
+ if (share->state.state.data_file_length ||
+ (share->state.state.key_file_length != share->base.keystart))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ error= HA_ERR_CRASHED;
+ }
+ else
+ maria_set_all_keys_active(share->state.key_map, share->base.keys);
+ return error;
+}
+
+
+/*
+ Test if indexes are disabled.
+
+ SYNOPSIS
+ maria_indexes_are_disabled()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Test if indexes are disabled.
+
+ RETURN
+ 0 indexes are not disabled
+ 1 all indexes are disabled
+ 2 non-unique indexes are disabled
+*/
+
+int maria_indexes_are_disabled(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ /*
+ No keys or all are enabled. keys is the number of keys. Left shifted
+ gives us only one bit set. When decreased by one, gives us all all bits
+ up to this one set and it gets unset.
+ */
+ if (!share->base.keys ||
+ (maria_is_all_keys_active(share->state.key_map, share->base.keys)))
+ return 0;
+
+ /* All are disabled */
+ if (maria_is_any_key_active(share->state.key_map))
+ return 1;
+
+ /*
+ We have keys. Some enabled, some disabled.
+ Don't check for any non-unique disabled but return directly 2
+ */
+ return 2;
+}
+
+
+static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused)))
+{
+ return 0;
+}
+
+static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused)))
+{
+}
+
+static my_bool maria_once_init_dummy(MARIA_SHARE *share
+ __attribute__((unused)),
+ File dfile __attribute__((unused)))
+{
+ return 0;
+}
+
+static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused)))
+{
+ return 0;
+}
diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c
new file mode 100644
index 00000000000..8cca98e9bed
--- /dev/null
+++ b/storage/maria/ma_packrec.c
@@ -0,0 +1,1722 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+ /* Functions to compressed records */
+
+#include "maria_def.h"
+
+#define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */
+
+/* Some definitions to keep in sync with maria_pack.c */
+#define HEAD_LENGTH 32 /* Length of fixed header */
+
+#if INT_MAX > 32767
+#define BITS_SAVED 32
+#define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */
+#else
+#define BITS_SAVED 16
+#define MAX_QUICK_TABLE_BITS 6
+#endif
+
+#define get_bit(BU) ((BU)->bits ? \
+ (BU)->current_byte & ((maria_bit_type) 1 << --(BU)->bits) :\
+ (fill_buffer(BU), (BU)->bits= BITS_SAVED-1,\
+ (BU)->current_byte & ((maria_bit_type) 1 << (BITS_SAVED-1))))
+#define skip_to_next_byte(BU) ((BU)->bits&=~7)
+#define get_bits(BU,count) (((BU)->bits >= count) ? (((BU)->current_byte >> ((BU)->bits-=count)) & mask[count]) : fill_and_get_bits(BU,count))
+
+#define decode_bytes_test_bit(bit) \
+ if (low_byte & (1 << (7-bit))) \
+ pos++; \
+ if (*pos & IS_CHAR) \
+ { bits-=(bit+1); break; } \
+ pos+= *pos
+
+/*
+ Size in uint16 of a Huffman tree for uchar compression of 256 uchar values
+*/
+#define OFFSET_TABLE_SIZE 512
+
+static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file,
+ pbool fix_keys);
+static uint read_huff_table(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree,
+ uint16 **decode_table,uchar **intervall_buff,
+ uint16 *tmp_buff);
+static void make_quick_table(uint16 *to_table,uint16 *decode_table,
+ uint *next_free,uint value,uint bits,
+ uint max_bits);
+static void fill_quick_table(uint16 *table,uint bits, uint max_bits,
+ uint value);
+static uint copy_decode_table(uint16 *to_pos,uint offset,
+ uint16 *decode_table);
+static uint find_longest_bitstream(uint16 *table, uint16 *end);
+static void (*get_unpack_function(MARIA_COLUMNDEF *rec))(MARIA_COLUMNDEF *field,
+ MARIA_BIT_BUFF *buff,
+ uchar *to,
+ uchar *end);
+static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_space_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end);
+static void uf_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_space_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end);
+static void uf_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_space_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_zerofill_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_constant(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_intervall(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end);
+static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end);
+static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end);
+static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to,uchar *end);
+static uint decode_pos(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree);
+static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff,uchar *buffer,
+ uint length);
+static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff,uint count);
+static void fill_buffer(MARIA_BIT_BUFF *bit_buff);
+static uint max_bit(uint value);
+static uint read_pack_length(uint version, const uchar *buf, ulong *length);
+#ifdef HAVE_MMAP
+static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,
+ MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info,
+ uchar **rec_buff_p,
+ size_t *rec_buff_size_p,
+ uchar *header);
+#endif
+
+static maria_bit_type mask[]=
+{
+ 0x00000000,
+ 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
+ 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
+ 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+ 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
+#if BITS_SAVED > 16
+ 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
+ 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
+ 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
+ 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff,
+#endif
+};
+
+
+my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile)
+{
+ share->options|= HA_OPTION_READ_ONLY_DATA;
+ return (_ma_read_pack_info(share, dfile,
+ (pbool)
+ test(!(share->options &
+ (HA_OPTION_PACK_RECORD |
+ HA_OPTION_TEMP_COMPRESS_RECORD)))));
+}
+
+
+my_bool _ma_once_end_pack_row(MARIA_SHARE *share)
+{
+ if (share->decode_trees)
+ {
+ my_free((uchar*) share->decode_trees,MYF(0));
+ my_free((uchar*) share->decode_tables,MYF(0));
+ }
+ return 0;
+}
+
+
+/* Read all packed info, allocate memory and fix field structs */
+
+static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file,
+ pbool fix_keys)
+{
+ int diff_length;
+ uint i,trees,huff_tree_bits,rec_reflength,length;
+ uint16 *decode_table,*tmp_buff;
+ ulong elements,intervall_length;
+ uchar *disk_cache;
+ uchar *intervall_buff;
+ uchar header[HEAD_LENGTH];
+ MARIA_BIT_BUFF bit_buff;
+ DBUG_ENTER("_ma_read_pack_info");
+
+ if (maria_quick_table_bits < 4)
+ maria_quick_table_bits=4;
+ else if (maria_quick_table_bits > MAX_QUICK_TABLE_BITS)
+ maria_quick_table_bits=MAX_QUICK_TABLE_BITS;
+
+ my_errno=0;
+ if (my_read(file,(uchar*) header,sizeof(header),MYF(MY_NABP)))
+ {
+ if (!my_errno)
+ my_errno=HA_ERR_END_OF_FILE;
+ goto err0;
+ }
+ /* Only the first three bytes of magic number are independent of version. */
+ if (memcmp((uchar*) header, (uchar*) maria_pack_file_magic, 3))
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err0;
+ }
+ share->pack.version= header[3]; /* fourth uchar of magic number */
+ share->pack.header_length= uint4korr(header+4);
+ share->min_pack_length=(uint) uint4korr(header+8);
+ share->max_pack_length=(uint) uint4korr(header+12);
+ set_if_bigger(share->base.pack_reclength,share->max_pack_length);
+ elements=uint4korr(header+16);
+ intervall_length=uint4korr(header+20);
+ trees=uint2korr(header+24);
+ share->pack.ref_length=header[26];
+ rec_reflength=header[27];
+ diff_length=(int) rec_reflength - (int) share->base.rec_reflength;
+ if (fix_keys)
+ share->rec_reflength=rec_reflength;
+ DBUG_PRINT("info", ("fixed header length: %u", HEAD_LENGTH));
+ DBUG_PRINT("info", ("total header length: %lu", share->pack.header_length));
+ DBUG_PRINT("info", ("pack file version: %u", share->pack.version));
+ DBUG_PRINT("info", ("min pack length: %lu", share->min_pack_length));
+ DBUG_PRINT("info", ("max pack length: %lu", share->max_pack_length));
+ DBUG_PRINT("info", ("elements of all trees: %lu", elements));
+ DBUG_PRINT("info", ("distinct values bytes: %lu", intervall_length));
+ DBUG_PRINT("info", ("number of code trees: %u", trees));
+ DBUG_PRINT("info", ("bytes for record lgt: %u", share->pack.ref_length));
+ DBUG_PRINT("info", ("record pointer length: %u", rec_reflength));
+
+
+ /*
+ Memory segment #1:
+ - Decode tree heads
+ - Distinct column values
+ */
+ if (!(share->decode_trees=(MARIA_DECODE_TREE*)
+ my_malloc((uint) (trees*sizeof(MARIA_DECODE_TREE)+
+ intervall_length*sizeof(uchar)),
+ MYF(MY_WME))))
+ goto err0;
+ intervall_buff=(uchar*) (share->decode_trees+trees);
+
+ /*
+ Memory segment #2:
+ - Decode tables
+ - Quick decode tables
+ - Temporary decode table
+ - Compressed data file header cache
+ This segment will be reallocated after construction of the tables.
+ */
+ length=(uint) (elements*2+trees*(1 << maria_quick_table_bits));
+ if (!(share->decode_tables=(uint16*)
+ my_malloc((length+OFFSET_TABLE_SIZE)*sizeof(uint16)+
+ (uint) (share->pack.header_length - sizeof(header)) +
+ share->base.extra_rec_buff_size,
+ MYF(MY_WME | MY_ZEROFILL))))
+ goto err1;
+ tmp_buff=share->decode_tables+length;
+ disk_cache=(uchar*) (tmp_buff+OFFSET_TABLE_SIZE);
+
+ if (my_read(file,disk_cache,
+ (uint) (share->pack.header_length-sizeof(header)),
+ MYF(MY_NABP)))
+ goto err2;
+#ifdef HAVE_purify
+ /* Zero bytes accessed by fill_buffer */
+ bzero(disk_cache + (share->pack.header_length-sizeof(header)),
+ share->base.extra_rec_buff_size);
+#endif
+
+ huff_tree_bits=max_bit(trees ? trees-1 : 0);
+ init_bit_buffer(&bit_buff, disk_cache,
+ (uint) (share->pack.header_length-sizeof(header)));
+ /* Read new info for each field */
+ for (i=0 ; i < share->base.fields ; i++)
+ {
+ share->columndef[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5);
+ share->columndef[i].pack_type=(uint) get_bits(&bit_buff,6);
+ share->columndef[i].space_length_bits=get_bits(&bit_buff,5);
+ share->columndef[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff,
+ huff_tree_bits);
+ share->columndef[i].unpack= get_unpack_function(share->columndef + i);
+ DBUG_PRINT("info", ("col: %2u type: %2u pack: %u slbits: %2u",
+ i, share->columndef[i].base_type,
+ share->columndef[i].pack_type,
+ share->columndef[i].space_length_bits));
+ }
+ skip_to_next_byte(&bit_buff);
+ /*
+ Construct the decoding tables from the file header. Keep track of
+ the used memory.
+ */
+ decode_table=share->decode_tables;
+ for (i=0 ; i < trees ; i++)
+ if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table,
+ &intervall_buff,tmp_buff))
+ goto err3;
+ /* Reallocate the decoding tables to the used size. */
+ decode_table=(uint16*)
+ my_realloc((uchar*) share->decode_tables,
+ (uint) ((uchar*) decode_table - (uchar*) share->decode_tables),
+ MYF(MY_HOLD_ON_ERROR));
+ /* Fix the table addresses in the tree heads. */
+ {
+ long diff=PTR_BYTE_DIFF(decode_table,share->decode_tables);
+ share->decode_tables=decode_table;
+ for (i=0 ; i < trees ; i++)
+ share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table,
+ diff, uint16*);
+ }
+
+ /* Fix record-ref-length for keys */
+ if (fix_keys)
+ {
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ MARIA_KEYDEF *keyinfo= &share->keyinfo[i];
+ keyinfo->keylength+= (uint16) diff_length;
+ keyinfo->minlength+= (uint16) diff_length;
+ keyinfo->maxlength+= (uint16) diff_length;
+ keyinfo->seg[keyinfo->flag & HA_FULLTEXT ?
+ FT_SEGS : keyinfo->keysegs].length= (uint16) rec_reflength;
+ }
+ if (share->ft2_keyinfo.seg)
+ {
+ MARIA_KEYDEF *ft2_keyinfo= &share->ft2_keyinfo;
+ ft2_keyinfo->keylength+= (uint16) diff_length;
+ ft2_keyinfo->minlength+= (uint16) diff_length;
+ ft2_keyinfo->maxlength+= (uint16) diff_length;
+ }
+ }
+
+ if (bit_buff.error || bit_buff.pos < bit_buff.end)
+ goto err3;
+
+ DBUG_RETURN(0);
+
+err3:
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+err2:
+ my_free((uchar*) share->decode_tables,MYF(0));
+err1:
+ my_free((uchar*) share->decode_trees,MYF(0));
+err0:
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Read a huff-code-table from datafile.
+
+ SYNOPSIS
+ read_huff_table()
+ bit_buff Bit buffer pointing at start of the
+ decoding table in the file header cache.
+ decode_tree Pointer to the decode tree head.
+ decode_table IN/OUT Address of a pointer to the next free space.
+ intervall_buff IN/OUT Address of a pointer to the next unused values.
+ tmp_buff Buffer for temporary extraction of a full
+ decoding table as read from bit_buff.
+
+ RETURN
+ 0 OK.
+ 1 Error.
+*/
+static uint read_huff_table(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree,
+ uint16 **decode_table, uchar **intervall_buff,
+ uint16 *tmp_buff)
+{
+ uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits,
+ next_free_offset;
+ uint16 *ptr,*end;
+ DBUG_ENTER("read_huff_table");
+
+ if (!get_bits(bit_buff,1))
+ {
+ /* Byte value compression. */
+ min_chr=get_bits(bit_buff,8);
+ elements=get_bits(bit_buff,9);
+ char_bits=get_bits(bit_buff,5);
+ offset_bits=get_bits(bit_buff,5);
+ intervall_length=0;
+ ptr=tmp_buff;
+ ptr=tmp_buff;
+ DBUG_PRINT("info", ("byte value compression"));
+ DBUG_PRINT("info", ("minimum uchar value: %u", min_chr));
+ DBUG_PRINT("info", ("number of tree nodes: %u", elements));
+ DBUG_PRINT("info", ("bits for values: %u", char_bits));
+ DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits));
+ if (elements > 256)
+ {
+ DBUG_PRINT("error", ("ERROR: illegal number of tree elements: %u",
+ elements));
+ DBUG_RETURN(1);
+ }
+ }
+ else
+ {
+ /* Distinct column value compression. */
+ min_chr=0;
+ elements=get_bits(bit_buff,15);
+ intervall_length=get_bits(bit_buff,16);
+ char_bits=get_bits(bit_buff,5);
+ offset_bits=get_bits(bit_buff,5);
+ decode_tree->quick_table_bits=0;
+ ptr= *decode_table;
+ DBUG_PRINT("info", ("distinct column value compression"));
+ DBUG_PRINT("info", ("number of tree nodes: %u", elements));
+ DBUG_PRINT("info", ("value buffer length: %u", intervall_length));
+ DBUG_PRINT("info", ("bits for value index: %u", char_bits));
+ DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits));
+ }
+ size=elements*2-2;
+ DBUG_PRINT("info", ("tree size in uint16: %u", size));
+ DBUG_PRINT("info", ("tree size in bytes: %u",
+ size * (uint) sizeof(uint16)));
+
+ for (end=ptr+size ; ptr < end ; ptr++)
+ {
+ if (get_bit(bit_buff))
+ {
+ *ptr= (uint16) get_bits(bit_buff,offset_bits);
+ if ((ptr + *ptr >= end) || !*ptr)
+ {
+ DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
+ DBUG_RETURN(1);
+ }
+ }
+ else
+ *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr));
+ }
+ skip_to_next_byte(bit_buff);
+
+ decode_tree->table= *decode_table;
+ decode_tree->intervalls= *intervall_buff;
+ if (! intervall_length)
+ {
+ /* Byte value compression. ptr started from tmp_buff. */
+ /* Find longest Huffman code from begin to end of tree in bits. */
+ table_bits= find_longest_bitstream(tmp_buff, ptr);
+ if (table_bits >= OFFSET_TABLE_SIZE)
+ DBUG_RETURN(1);
+ if (table_bits > maria_quick_table_bits)
+ table_bits=maria_quick_table_bits;
+ DBUG_PRINT("info", ("table bits: %u", table_bits));
+
+ next_free_offset= (1 << table_bits);
+ make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits,
+ table_bits);
+ (*decode_table)+= next_free_offset;
+ decode_tree->quick_table_bits=table_bits;
+ }
+ else
+ {
+ /* Distinct column value compression. ptr started from *decode_table */
+ (*decode_table)=end;
+ /*
+ get_bits() moves some bytes to a cache buffer in advance. May need
+ to step back.
+ */
+ bit_buff->pos-= bit_buff->bits/8;
+ /* Copy the distinct column values from the buffer. */
+ memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length);
+ (*intervall_buff)+=intervall_length;
+ bit_buff->pos+=intervall_length;
+ bit_buff->bits=0;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Make a quick_table for faster decoding.
+
+ SYNOPSIS
+ make_quick_table()
+ to_table Target quick_table and remaining decode table.
+ decode_table Source Huffman (sub-)tree within tmp_buff.
+ next_free_offset IN/OUT Next free offset from to_table.
+ Starts behind quick_table on the top-level.
+ value Huffman bits found so far.
+ bits Remaining bits to be collected.
+ max_bits Total number of bits to collect (table_bits).
+
+ DESCRIPTION
+
+ The quick table is an array of 16-bit values. There exists one value
+ for each possible code representable by max_bits (table_bits) bits.
+ In most cases table_bits is 9. So there are 512 16-bit values.
+
+ If the high-order bit (16) is set (IS_CHAR) then the array slot for
+ this value is a valid Huffman code for a resulting uchar value.
+
+ The low-order 8 bits (1..8) are the resulting uchar value.
+
+ Bits 9..14 are the length of the Huffman code for this uchar value.
+ This means so many bits from the input stream were needed to
+ represent this uchar value. The remaining bits belong to later
+ Huffman codes. This also means that for every Huffman code shorter
+ than table_bits there are multiple entires in the array, which
+ differ just in the unused bits.
+
+ If the high-order bit (16) is clear (0) then the remaining bits are
+ the position of the remaining Huffman decode tree segment behind the
+ quick table.
+
+ RETURN
+ void
+*/
+
+static void make_quick_table(uint16 *to_table, uint16 *decode_table,
+ uint *next_free_offset, uint value, uint bits,
+ uint max_bits)
+{
+ DBUG_ENTER("make_quick_table");
+
+ /*
+ When down the table to the requested maximum, copy the rest of the
+ Huffman table.
+ */
+ if (!bits--)
+ {
+ /*
+ Remaining left Huffman tree segment starts behind quick table.
+ Remaining right Huffman tree segment starts behind left segment.
+ */
+ to_table[value]= (uint16) *next_free_offset;
+ /*
+ Re-construct the remaining Huffman tree segment at
+ next_free_offset in to_table.
+ */
+ *next_free_offset=copy_decode_table(to_table, *next_free_offset,
+ decode_table);
+ DBUG_VOID_RETURN;
+ }
+
+ /* Descent on the left side. Left side bits are clear (0). */
+ if (!(*decode_table & IS_CHAR))
+ {
+ /* Not a leaf. Follow the pointer. */
+ make_quick_table(to_table,decode_table+ *decode_table,
+ next_free_offset,value,bits,max_bits);
+ }
+ else
+ {
+ /*
+ A leaf. A Huffman code is complete. Fill the quick_table
+ array for all possible bit strings starting with this Huffman
+ code.
+ */
+ fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table);
+ }
+
+ /* Descent on the right side. Right side bits are set (1). */
+ decode_table++;
+ value|= (1 << bits);
+ if (!(*decode_table & IS_CHAR))
+ {
+ /* Not a leaf. Follow the pointer. */
+ make_quick_table(to_table,decode_table+ *decode_table,
+ next_free_offset,value,bits,max_bits);
+ }
+ else
+ {
+ /*
+ A leaf. A Huffman code is complete. Fill the quick_table
+ array for all possible bit strings starting with this Huffman
+ code.
+ */
+ fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Fill quick_table for all possible values starting with this Huffman code.
+
+ SYNOPSIS
+ fill_quick_table()
+ table Target quick_table position.
+ bits Unused bits from max_bits.
+ max_bits Total number of bits to collect (table_bits).
+ value The uchar encoded by the found Huffman code.
+
+ DESCRIPTION
+
+ Fill the segment (all slots) of the quick_table array with the
+ resulting value for the found Huffman code. There are as many slots
+ as there are combinations representable by the unused bits.
+
+ In most cases we use 9 table bits. Assume a 3-bit Huffman code. Then
+ there are 6 unused bits. Hence we fill 2**6 = 64 slots with the
+ value.
+
+ RETURN
+ void
+*/
+
+static void fill_quick_table(uint16 *table, uint bits, uint max_bits,
+ uint value)
+{
+ uint16 *end;
+ DBUG_ENTER("fill_quick_table");
+
+ /*
+ Bits 1..8 of value represent the decoded uchar value.
+ Bits 9..14 become the length of the Huffman code for this uchar value.
+ Bit 16 flags a valid code (IS_CHAR).
+ */
+ value|= (max_bits - bits) << 8 | IS_CHAR;
+
+ for (end= table + (uint) (((uint) 1 << bits)); table < end; table++)
+ {
+ *table= (uint16) value;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Reconstruct a decode subtree at the target position.
+
+ SYNOPSIS
+ copy_decode_table()
+ to_pos Target quick_table and remaining decode table.
+ offset Next free offset from to_pos.
+ decode_table Source Huffman subtree within tmp_buff.
+
+ NOTE
+ Pointers in the decode tree are relative to the pointers position.
+
+ RETURN
+ next free offset from to_pos.
+*/
+
+static uint copy_decode_table(uint16 *to_pos, uint offset,
+ uint16 *decode_table)
+{
+ uint prev_offset= offset;
+ DBUG_ENTER("copy_decode_table");
+
+ /* Descent on the left side. */
+ if (!(*decode_table & IS_CHAR))
+ {
+ /* Set a pointer to the next target node. */
+ to_pos[offset]=2;
+ /* Copy the left hand subtree there. */
+ offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table);
+ }
+ else
+ {
+ /* Copy the uchar value. */
+ to_pos[offset]= *decode_table;
+ /* Step behind this node. */
+ offset+=2;
+ }
+
+ /* Descent on the right side. */
+ decode_table++;
+ if (!(*decode_table & IS_CHAR))
+ {
+ /* Set a pointer to the next free target node. */
+ to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1);
+ /* Copy the right hand subtree to the entry of that node. */
+ offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table);
+ }
+ else
+ {
+ /* Copy the uchar value. */
+ to_pos[prev_offset+1]= *decode_table;
+ }
+ DBUG_RETURN(offset);
+}
+
+
+/*
+ Find the length of the longest Huffman code in this table in bits.
+
+ SYNOPSIS
+ find_longest_bitstream()
+ table Code (sub-)table start.
+ end End of code table.
+
+ IMPLEMENTATION
+
+ Recursively follow the branch(es) of the code pair on every level of
+ the tree until two uchar values (and no branch) are found. Add one to
+ each level when returning back from each recursion stage.
+
+ 'end' is used for error checking only. A clean tree terminates
+ before reaching 'end'. Hence the exact value of 'end' is not too
+ important. However having it higher than necessary could lead to
+ misbehaviour should 'next' jump into the dirty area.
+
+ RETURN
+ length Length of longest Huffman code in bits.
+ >= OFFSET_TABLE_SIZE Error, broken tree. It does not end before 'end'.
+*/
+
+static uint find_longest_bitstream(uint16 *table, uint16 *end)
+{
+ uint length=1;
+ uint length2;
+ if (!(*table & IS_CHAR))
+ {
+ uint16 *next= table + *table;
+ if (next > end || next == table)
+ {
+ DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
+ return OFFSET_TABLE_SIZE;
+ }
+ length=find_longest_bitstream(next, end)+1;
+ }
+ table++;
+ if (!(*table & IS_CHAR))
+ {
+ uint16 *next= table + *table;
+ if (next > end || next == table)
+ {
+ DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
+ return OFFSET_TABLE_SIZE;
+ }
+ length2= find_longest_bitstream(next, end) + 1;
+ length=max(length,length2);
+ }
+ return length;
+}
+
+
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _ma_read_pack_record()
+ info A pointer to MARIA_HA.
+ filepos File offset of the record.
+ buf RETURN The buffer to receive the record.
+
+ RETURN
+ 0 On success
+ # Error number
+*/
+
+int _ma_read_pack_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
+{
+ MARIA_BLOCK_INFO block_info;
+ File file;
+ DBUG_ENTER("maria_read_pack_record");
+
+ if (filepos == HA_OFFSET_ERROR)
+ DBUG_RETURN(my_errno); /* _search() didn't find record */
+
+ file= info->dfile.file;
+ if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, &info->rec_buff_size, file,
+ filepos))
+ goto err;
+ if (my_read(file,(uchar*) info->rec_buff + block_info.offset ,
+ block_info.rec_len - block_info.offset, MYF(MY_NABP)))
+ goto panic;
+ info->update|= HA_STATE_AKTIV;
+ DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf,
+ info->rec_buff, block_info.rec_len));
+panic:
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+err:
+ DBUG_RETURN(my_errno);
+}
+
+
+
+int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff,
+ register uchar *to, uchar *from, ulong reclength)
+{
+ uchar *end_field;
+ reg3 MARIA_COLUMNDEF *end;
+ MARIA_COLUMNDEF *current_field;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_pack_rec_unpack");
+
+ if (info->s->base.null_bytes)
+ {
+ memcpy(to, from, info->s->base.null_bytes);
+ to+= info->s->base.null_bytes;
+ from+= info->s->base.null_bytes;
+ reclength-= info->s->base.null_bytes;
+ }
+ init_bit_buffer(bit_buff, (uchar*) from, reclength);
+ for (current_field=share->columndef, end=current_field+share->base.fields ;
+ current_field < end ;
+ current_field++,to=end_field)
+ {
+ end_field=to+current_field->length;
+ (*current_field->unpack)(current_field, bit_buff, to, end_field);
+ }
+ if (!bit_buff->error &&
+ bit_buff->pos - bit_buff->bits / 8 == bit_buff->end)
+ DBUG_RETURN(0);
+ info->update&= ~HA_STATE_AKTIV;
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD);
+} /* _ma_pack_rec_unpack */
+
+
+ /* Return function to unpack field */
+
+static void (*get_unpack_function(MARIA_COLUMNDEF *rec))
+ (MARIA_COLUMNDEF *, MARIA_BIT_BUFF *, uchar *, uchar *)
+{
+ switch (rec->base_type) {
+ case FIELD_SKIP_ZERO:
+ if (rec->pack_type & PACK_TYPE_ZERO_FILL)
+ return &uf_zerofill_skip_zero;
+ return &uf_skip_zero;
+ case FIELD_NORMAL:
+ if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
+ return &uf_space_normal;
+ if (rec->pack_type & PACK_TYPE_ZERO_FILL)
+ return &uf_zerofill_normal;
+ return &decode_bytes;
+ case FIELD_SKIP_ENDSPACE:
+ if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
+ {
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_space_endspace_selected;
+ return &uf_space_endspace;
+ }
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_endspace_selected;
+ return &uf_endspace;
+ case FIELD_SKIP_PRESPACE:
+ if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
+ {
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_space_prespace_selected;
+ return &uf_space_prespace;
+ }
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_prespace_selected;
+ return &uf_prespace;
+ case FIELD_CONSTANT:
+ return &uf_constant;
+ case FIELD_INTERVALL:
+ return &uf_intervall;
+ case FIELD_ZERO:
+ case FIELD_CHECK:
+ return &uf_zero;
+ case FIELD_BLOB:
+ return &uf_blob;
+ case FIELD_VARCHAR:
+ if (rec->length <= 256) /* 255 + 1 uchar length */
+ return &uf_varchar1;
+ return &uf_varchar2;
+ case FIELD_LAST:
+ default:
+ return 0; /* This should never happend */
+ }
+}
+
+ /* The different functions to unpack a field */
+
+static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ if (get_bit(bit_buff))
+ bzero((char*) to,(uint) (end-to));
+ else
+ {
+ end-=rec->space_length_bits;
+ decode_bytes(rec,bit_buff,to,end);
+ bzero((char*) end,rec->space_length_bits);
+ }
+}
+
+static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ if (get_bit(bit_buff))
+ bzero((char*) to,(uint) (end-to));
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ if (get_bit(bit_buff))
+ bfill((uchar*) to,(end-to),' ');
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((uchar*) to,(end-to),' ');
+ else
+ {
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((uchar*) end-spaces,spaces,' ');
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+ }
+}
+
+static void uf_endspace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((uchar*) end-spaces,spaces,' ');
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((uchar*) to,(end-to),' ');
+ else
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((uchar*) end-spaces,spaces,' ');
+ }
+}
+
+static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((uchar*) end-spaces,spaces,' ');
+}
+
+static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((uchar*) to,(end-to),' ');
+ else
+ {
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((uchar*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+ }
+}
+
+
+static void uf_prespace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((uchar*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+
+static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((uchar*) to,(end-to),' ');
+ else
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((uchar*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+ }
+}
+
+static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ uint spaces;
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((uchar*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+}
+
+static void uf_zerofill_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ end-=rec->space_length_bits;
+ decode_bytes(rec,bit_buff, to, end);
+ bzero((char*) end,rec->space_length_bits);
+}
+
+static void uf_constant(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff __attribute__((unused)),
+ uchar *to, uchar *end)
+{
+ memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to));
+}
+
+static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to,
+ uchar *end)
+{
+ reg1 uint field_length=(uint) (end-to);
+ memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff,
+ rec->huff_tree),
+ (size_t) field_length);
+}
+
+
+/*ARGSUSED*/
+static void uf_zero(MARIA_COLUMNDEF *rec __attribute__((unused)),
+ MARIA_BIT_BUFF *bit_buff __attribute__((unused)),
+ uchar *to, uchar *end)
+{
+ bzero(to, (uint) (end-to));
+}
+
+static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ if (get_bit(bit_buff))
+ bzero(to, (uint) (end-to));
+ else
+ {
+ ulong length=get_bits(bit_buff,rec->space_length_bits);
+ uint pack_length=(uint) (end-to)-portable_sizeof_char_ptr;
+ if (bit_buff->blob_pos+length > bit_buff->blob_end)
+ {
+ bit_buff->error=1;
+ bzero((uchar*) to,(end-to));
+ return;
+ }
+ decode_bytes(rec,bit_buff,(uchar*) bit_buff->blob_pos,
+ (uchar*) bit_buff->blob_pos+length);
+ _ma_store_blob_length((uchar*) to,pack_length,length);
+ memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos,
+ sizeof(char*));
+ bit_buff->blob_pos+=length;
+ }
+}
+
+
+static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end __attribute__((unused)))
+{
+ if (get_bit(bit_buff))
+ to[0]= 0; /* Zero lengths */
+ else
+ {
+ ulong length=get_bits(bit_buff,rec->space_length_bits);
+ *to= (char) length;
+ decode_bytes(rec,bit_buff,to+1,to+1+length);
+ }
+}
+
+
+static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end __attribute__((unused)))
+{
+ if (get_bit(bit_buff))
+ to[0]=to[1]=0; /* Zero lengths */
+ else
+ {
+ ulong length=get_bits(bit_buff,rec->space_length_bits);
+ int2store(to,length);
+ decode_bytes(rec,bit_buff,to+2,to+2+length);
+ }
+}
+
+ /* Functions to decode of buffer of bits */
+
+#if BITS_SAVED == 64
+
+static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ reg1 uint bits,low_byte;
+ reg3 uint16 *pos;
+ reg4 uint table_bits,table_and;
+ MARIA_DECODE_TREE *decode_tree;
+
+ decode_tree=rec->decode_tree;
+ bits=bit_buff->bits; /* Save in reg for quicker access */
+ table_bits=decode_tree->quick_table_bits;
+ table_and= (1 << table_bits)-1;
+
+ do
+ {
+ if (bits <= 32)
+ {
+ if (bit_buff->pos > bit_buff->end+4)
+ {
+ bit_buff->error=1;
+ return; /* Can't be right */
+ }
+ bit_buff->current_byte= (bit_buff->current_byte << 32) +
+ ((((uint) bit_buff->pos[3])) +
+ (((uint) bit_buff->pos[2]) << 8) +
+ (((uint) bit_buff->pos[1]) << 16) +
+ (((uint) bit_buff->pos[0]) << 24));
+ bit_buff->pos+=4;
+ bits+=32;
+ }
+ /*
+ First use info in quick_table.
+
+ The quick table is an array of 16-bit values. There exists one
+ value for each possible code representable by table_bits bits.
+ In most cases table_bits is 9. So there are 512 16-bit values.
+
+ If the high-order bit (16) is set (IS_CHAR) then the array slot
+ for this value is a valid Huffman code for a resulting uchar value.
+
+ The low-order 8 bits (1..8) are the resulting uchar value.
+
+ Bits 9..14 are the length of the Huffman code for this uchar value.
+ This means so many bits from the input stream were needed to
+ represent this uchar value. The remaining bits belong to later
+ Huffman codes. This also means that for every Huffman code shorter
+ than table_bits there are multiple entires in the array, which
+ differ just in the unused bits.
+
+ If the high-order bit (16) is clear (0) then the remaining bits are
+ the position of the remaining Huffman decode tree segment behind the
+ quick table.
+ */
+ low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and;
+ low_byte=decode_tree->table[low_byte];
+ if (low_byte & IS_CHAR)
+ {
+ /*
+ All Huffman codes of less or equal table_bits length are in the
+ quick table. This is one of them.
+ */
+ *to++ = (char) (low_byte & 255); /* Found char in quick table */
+ bits-= ((low_byte >> 8) & 31); /* Remove bits used */
+ }
+ else
+ { /* Map through rest of decode-table */
+ /* This means that the Huffman code must be longer than table_bits. */
+ pos=decode_tree->table+low_byte;
+ bits-=table_bits;
+ /* NOTE: decode_bytes_test_bit() is a macro wich contains a break !!! */
+ for (;;)
+ {
+ low_byte=(uint) (bit_buff->current_byte >> (bits-8));
+ decode_bytes_test_bit(0);
+ decode_bytes_test_bit(1);
+ decode_bytes_test_bit(2);
+ decode_bytes_test_bit(3);
+ decode_bytes_test_bit(4);
+ decode_bytes_test_bit(5);
+ decode_bytes_test_bit(6);
+ decode_bytes_test_bit(7);
+ bits-=8;
+ }
+ *to++ = (char) *pos;
+ }
+ } while (to != end);
+
+ bit_buff->bits=bits;
+ return;
+}
+
+#else
+
+static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *end)
+{
+ reg1 uint bits,low_byte;
+ reg3 uint16 *pos;
+ reg4 uint table_bits,table_and;
+ MARIA_DECODE_TREE *decode_tree;
+
+ decode_tree=rec->huff_tree;
+ bits=bit_buff->bits; /* Save in reg for quicker access */
+ table_bits=decode_tree->quick_table_bits;
+ table_and= (1 << table_bits)-1;
+
+ do
+ {
+ if (bits < table_bits)
+ {
+ if (bit_buff->pos > bit_buff->end+1)
+ {
+ bit_buff->error=1;
+ return; /* Can't be right */
+ }
+#if BITS_SAVED == 32
+ bit_buff->current_byte= (bit_buff->current_byte << 24) +
+ (((uint) ((uchar) bit_buff->pos[2]))) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 16);
+ bit_buff->pos+=3;
+ bits+=24;
+#else
+ if (bits) /* We must have at leasts 9 bits */
+ {
+ bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ (uint) ((uchar) bit_buff->pos[0]);
+ bit_buff->pos++;
+ bits+=8;
+ }
+ else
+ {
+ bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) +
+ ((uint) ((uchar) bit_buff->pos[1]));
+ bit_buff->pos+=2;
+ bits+=16;
+ }
+#endif
+ }
+ /* First use info in quick_table */
+ low_byte=(bit_buff->current_byte >> (bits - table_bits)) & table_and;
+ low_byte=decode_tree->table[low_byte];
+ if (low_byte & IS_CHAR)
+ {
+ *to++ = (low_byte & 255); /* Found char in quick table */
+ bits-= ((low_byte >> 8) & 31); /* Remove bits used */
+ }
+ else
+ { /* Map through rest of decode-table */
+ pos=decode_tree->table+low_byte;
+ bits-=table_bits;
+ for (;;)
+ {
+ if (bits < 8)
+ { /* We don't need to check end */
+#if BITS_SAVED == 32
+ bit_buff->current_byte= (bit_buff->current_byte << 24) +
+ (((uint) ((uchar) bit_buff->pos[2]))) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 16);
+ bit_buff->pos+=3;
+ bits+=24;
+#else
+ bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ (uint) ((uchar) bit_buff->pos[0]);
+ bit_buff->pos+=1;
+ bits+=8;
+#endif
+ }
+ low_byte=(uint) (bit_buff->current_byte >> (bits-8));
+ decode_bytes_test_bit(0);
+ decode_bytes_test_bit(1);
+ decode_bytes_test_bit(2);
+ decode_bytes_test_bit(3);
+ decode_bytes_test_bit(4);
+ decode_bytes_test_bit(5);
+ decode_bytes_test_bit(6);
+ decode_bytes_test_bit(7);
+ bits-=8;
+ }
+ *to++ = (char) *pos;
+ }
+ } while (to != end);
+
+ bit_buff->bits=bits;
+ return;
+}
+#endif /* BIT_SAVED == 64 */
+
+
+static uint decode_pos(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree)
+{
+ uint16 *pos=decode_tree->table;
+ for (;;)
+ {
+ if (get_bit(bit_buff))
+ pos++;
+ if (*pos & IS_CHAR)
+ return (uint) (*pos & ~IS_CHAR);
+ pos+= *pos;
+ }
+}
+
+
+int _ma_read_rnd_pack_record(MARIA_HA *info,
+ uchar *buf,
+ register MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks)
+{
+ File file;
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_read_rnd_pack_record");
+
+ if (filepos >= info->state->data_file_length)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ goto err;
+ }
+
+ file= info->dfile.file;
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache, (uchar*) block_info.header,
+ filepos, share->pack.ref_length,
+ skip_deleted_blocks ? READING_NEXT : 0))
+ goto err;
+ file= -1;
+ }
+ if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, &info->rec_buff_size,
+ file, filepos))
+ goto err; /* Error code is already set */
+#ifndef DBUG_OFF
+ if (block_info.rec_len > share->max_pack_length)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+#endif
+
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache, (uchar*) info->rec_buff,
+ block_info.filepos, block_info.rec_len,
+ skip_deleted_blocks ? READING_NEXT : 0))
+ goto err;
+ }
+ else
+ {
+ if (my_read(info->dfile.file, (uchar*)info->rec_buff + block_info.offset,
+ block_info.rec_len-block_info.offset,
+ MYF(MY_NABP)))
+ goto err;
+ }
+ info->packed_length= block_info.rec_len;
+ info->cur_row.lastpos= filepos;
+ info->cur_row.nextpos= block_info.filepos+block_info.rec_len;
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+
+ DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf,
+ info->rec_buff, block_info.rec_len));
+ err:
+ DBUG_RETURN(my_errno);
+}
+
+
+ /* Read and process header from a huff-record-file */
+
+uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info,
+ uchar **rec_buff_p, size_t *rec_buff_size_p,
+ File file, my_off_t filepos)
+{
+ uchar *header= info->header;
+ uint head_length,ref_length;
+ LINT_INIT(ref_length);
+
+ if (file >= 0)
+ {
+ ref_length=maria->s->pack.ref_length;
+ /*
+ We can't use my_pread() here because _ma_read_rnd_pack_record assumes
+ position is ok
+ */
+ VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
+ if (my_read(file, header,ref_length,MYF(MY_NABP)))
+ return BLOCK_FATAL_ERROR;
+ DBUG_DUMP("header",(uchar*) header,ref_length);
+ }
+ head_length= read_pack_length((uint) maria->s->pack.version, header,
+ &info->rec_len);
+ if (maria->s->base.blobs)
+ {
+ head_length+= read_pack_length((uint) maria->s->pack.version,
+ header + head_length, &info->blob_len);
+ /*
+ Ensure that the record buffer is big enough for the compressed
+ record plus all expanded blobs. [We do not have an extra buffer
+ for the resulting blobs. Sigh.]
+ */
+ if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p,
+ info->rec_len + info->blob_len +
+ maria->s->base.extra_rec_buff_size))
+ return BLOCK_FATAL_ERROR; /* not enough memory */
+ bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len;
+ bit_buff->blob_end= bit_buff->blob_pos + info->blob_len;
+ maria->blob_length=info->blob_len;
+ }
+ info->filepos=filepos+head_length;
+ if (file > 0)
+ {
+ info->offset=min(info->rec_len, ref_length - head_length);
+ memcpy(*rec_buff_p, header + head_length, info->offset);
+ }
+ return 0;
+}
+
+
+ /* rutines for bit buffer */
+ /* Note buffer must be 6 uchar bigger than longest row */
+
+static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff, uchar *buffer,
+ uint length)
+{
+ bit_buff->pos=buffer;
+ bit_buff->end=buffer+length;
+ bit_buff->bits=bit_buff->error=0;
+ bit_buff->current_byte=0; /* Avoid purify errors */
+}
+
+static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff, uint count)
+{
+ uint tmp;
+ count-=bit_buff->bits;
+ tmp=(bit_buff->current_byte & mask[bit_buff->bits]) << count;
+ fill_buffer(bit_buff);
+ bit_buff->bits=BITS_SAVED - count;
+ return tmp+(bit_buff->current_byte >> (BITS_SAVED - count));
+}
+
+ /* Fill in empty bit_buff->current_byte from buffer */
+ /* Sets bit_buff->error if buffer is exhausted */
+
+static void fill_buffer(MARIA_BIT_BUFF *bit_buff)
+{
+ if (bit_buff->pos >= bit_buff->end)
+ {
+ bit_buff->error= 1;
+ bit_buff->current_byte=0;
+ return;
+ }
+#if BITS_SAVED == 64
+ bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) +
+ (((uint) ((uchar) bit_buff->pos[6])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[5])) << 16) +
+ (((uint) ((uchar) bit_buff->pos[4])) << 24) +
+ ((ulonglong)
+ ((((uint) ((uchar) bit_buff->pos[3]))) +
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32));
+ bit_buff->pos+=8;
+#else
+#if BITS_SAVED == 32
+ bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) +
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 24));
+ bit_buff->pos+=4;
+#else
+ bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1]))+
+ (((uint) ((uchar) bit_buff->pos[0])) << 8));
+ bit_buff->pos+=2;
+#endif
+#endif
+}
+
+ /* Get number of bits neaded to represent value */
+
+static uint max_bit(register uint value)
+{
+ reg2 uint power=1;
+
+ while ((value>>=1))
+ power++;
+ return (power);
+}
+
+
+/*****************************************************************************
+ Some redefined functions to handle files when we are using memmap
+*****************************************************************************/
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef HAVE_MMAP
+
+static int _ma_read_mempack_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos);
+static int _ma_read_rnd_mempack_record(MARIA_HA*, uchar *, MARIA_RECORD_POS,
+ my_bool);
+
+my_bool _ma_memmap_file(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_memmap_file");
+
+ if (!info->s->file_map)
+ {
+ if (my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) <
+ share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN)
+ {
+ DBUG_PRINT("warning",("File isn't extended for memmap"));
+ DBUG_RETURN(0);
+ }
+ if (_ma_dynmap_file(info, share->state.state.data_file_length))
+ DBUG_RETURN(0);
+ }
+ info->opt_flag|= MEMMAP_USED;
+ info->read_record= share->read_record= _ma_read_mempack_record;
+ share->scan= _ma_read_rnd_mempack_record;
+ DBUG_RETURN(1);
+}
+
+
+void _ma_unmap_file(MARIA_HA *info)
+{
+ VOID(my_munmap((char*) info->s->file_map,
+ (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN));
+}
+
+
+static uchar *
+_ma_mempack_get_block_info(MARIA_HA *maria,
+ MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info,
+ uchar **rec_buff_p,
+ size_t *rec_buff_size_p,
+ uchar *header)
+{
+ header+= read_pack_length((uint) maria->s->pack.version, header,
+ &info->rec_len);
+ if (maria->s->base.blobs)
+ {
+ header+= read_pack_length((uint) maria->s->pack.version, header,
+ &info->blob_len);
+ /* _ma_alloc_rec_buff sets my_errno on error */
+ if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p,
+ info->blob_len + maria->s->base.extra_rec_buff_size))
+ return 0; /* not enough memory */
+ bit_buff->blob_pos= (uchar*) *rec_buff_p;
+ bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len;
+ }
+ return header;
+}
+
+
+static int _ma_read_mempack_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos)
+{
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share= info->s;
+ uchar *pos;
+ DBUG_ENTER("maria_read_mempack_record");
+
+ if (filepos == HA_OFFSET_ERROR)
+ DBUG_RETURN(my_errno); /* _search() didn't find record */
+
+ if (!(pos= (uchar*) _ma_mempack_get_block_info(info, &info->bit_buff,
+ &block_info, &info->rec_buff,
+ &info->rec_buff_size,
+ (uchar*) share->file_map+
+ filepos)))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(_ma_pack_rec_unpack(info, &info->bit_buff, buf,
+ pos, block_info.rec_len));
+}
+
+
+/*ARGSUSED*/
+static int _ma_read_rnd_mempack_record(MARIA_HA *info,
+ uchar *buf,
+ register MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks
+ __attribute__((unused)))
+{
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share= info->s;
+ uchar *pos,*start;
+ DBUG_ENTER("_ma_read_rnd_mempack_record");
+
+ if (filepos >= share->state.state.data_file_length)
+ {
+ my_errno=HA_ERR_END_OF_FILE;
+ goto err;
+ }
+ if (!(pos= (uchar*) _ma_mempack_get_block_info(info, &info->bit_buff,
+ &block_info,
+ &info->rec_buff,
+ &info->rec_buff_size,
+ (uchar*)
+ (start= share->file_map +
+ filepos))))
+ goto err;
+#ifndef DBUG_OFF
+ if (block_info.rec_len > info->s->max_pack_length)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+#endif
+ info->packed_length=block_info.rec_len;
+ info->cur_row.lastpos= filepos;
+ info->cur_row.nextpos= filepos+(uint) (pos-start)+block_info.rec_len;
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+
+ DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf,
+ pos, block_info.rec_len));
+ err:
+ DBUG_RETURN(my_errno);
+}
+
+#endif /* HAVE_MMAP */
+
+ /* Save length of row */
+
+uint _ma_save_pack_length(uint version, uchar *block_buff, ulong length)
+{
+ if (length < 254)
+ {
+ *(uchar*) block_buff= (uchar) length;
+ return 1;
+ }
+ if (length <= 65535)
+ {
+ *(uchar*) block_buff=254;
+ int2store(block_buff+1,(uint) length);
+ return 3;
+ }
+ *(uchar*) block_buff=255;
+ if (version == 1) /* old format */
+ {
+ DBUG_ASSERT(length <= 0xFFFFFF);
+ int3store(block_buff + 1, (ulong) length);
+ return 4;
+ }
+ else
+ {
+ int4store(block_buff + 1, (ulong) length);
+ return 5;
+ }
+}
+
+
+static uint read_pack_length(uint version, const uchar *buf, ulong *length)
+{
+ if (buf[0] < 254)
+ {
+ *length= buf[0];
+ return 1;
+ }
+ else if (buf[0] == 254)
+ {
+ *length= uint2korr(buf + 1);
+ return 3;
+ }
+ if (version == 1) /* old format */
+ {
+ *length= uint3korr(buf + 1);
+ return 4;
+ }
+ else
+ {
+ *length= uint4korr(buf + 1);
+ return 5;
+ }
+}
+
+
+uint _ma_calc_pack_length(uint version, ulong length)
+{
+ return (length < 254) ? 1 : (length < 65536) ? 3 : (version == 1) ? 4 : 5;
+}
diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c
new file mode 100644
index 00000000000..863f3eede3f
--- /dev/null
+++ b/storage/maria/ma_page.c
@@ -0,0 +1,341 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read and write key blocks */
+
+#include "maria_def.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+
+/* Fetch a key-page in memory */
+
+uchar *_ma_fetch_keypage(register MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo __attribute__ ((unused)),
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff,
+ int return_buffer __attribute__ ((unused)),
+ MARIA_PINNED_PAGE **page_link_res)
+{
+ uchar *tmp;
+ uint page_size __attribute__((unused));
+ MARIA_PINNED_PAGE page_link;
+ MARIA_SHARE *share= info->s;
+ uint block_size= share->block_size;
+ DBUG_ENTER("_ma_fetch_keypage");
+ DBUG_PRINT("enter",("page: %ld", (long) page));
+
+ tmp= pagecache_read(share->pagecache, &share->kfile,
+ page / block_size, level, buff,
+ share->page_type, lock, &page_link.link);
+
+ if (lock != PAGECACHE_LOCK_LEFT_UNLOCKED)
+ {
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE);
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 0;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ *page_link_res= dynamic_element(&info->pinned_pages,
+ info->pinned_pages.elements-1,
+ MARIA_PINNED_PAGE *);
+ }
+
+ if (tmp == info->buff)
+ info->keyread_buff_used=1;
+ else if (!tmp)
+ {
+ DBUG_PRINT("error",("Got errno: %d from pagecache_read",my_errno));
+ info->last_keypage=HA_OFFSET_ERROR;
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ info->last_keypage=page;
+#ifdef EXTRA_DEBUG
+ page_size= _ma_get_page_used(share, tmp);
+ if (page_size < 4 || page_size > block_size ||
+ _ma_get_keynr(share, tmp) != keyinfo->key_nr)
+ {
+ DBUG_PRINT("error",("page %lu had wrong page length: %u keynr: %u",
+ (ulong) page, page_size,
+ _ma_get_keynr(share, tmp)));
+ DBUG_DUMP("page", (char*) tmp, page_size);
+ info->last_keypage = HA_OFFSET_ERROR;
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ tmp= 0;
+ }
+#endif
+ DBUG_RETURN(tmp);
+} /* _ma_fetch_keypage */
+
+
+/* Write a key-page on disk */
+
+int _ma_write_keypage(register MARIA_HA *info,
+ register MARIA_KEYDEF *keyinfo __attribute__((unused)),
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_PINNED_PAGE page_link;
+ uint block_size= share->block_size;
+ int res;
+ DBUG_ENTER("_ma_write_keypage");
+
+#ifdef EXTRA_DEBUG /* Safety check */
+ {
+ uint page_length, nod;
+ _ma_get_used_and_nod(share, buff, page_length, nod);
+ if (page < share->base.keystart ||
+ page+block_size > info->state->key_file_length ||
+ (page & (MARIA_MIN_KEY_BLOCK_LENGTH-1)))
+ {
+ DBUG_PRINT("error",("Trying to write inside key status region: "
+ "key_start: %lu length: %lu page: %lu",
+ (long) share->base.keystart,
+ (long) info->state->key_file_length,
+ (long) page));
+ my_errno=EINVAL;
+ DBUG_ASSERT(0);
+ DBUG_RETURN((-1));
+ }
+ DBUG_PRINT("page",("write page at: %lu",(long) page));
+ DBUG_DUMP("buff", buff, page_length);
+ DBUG_ASSERT(page_length >= share->keypage_header + nod +
+ keyinfo->minlength || maria_in_recovery);
+ }
+#endif
+
+ /* Verify that keynr is correct */
+ DBUG_ASSERT(_ma_get_keynr(share, buff) == keyinfo->key_nr);
+
+#if defined(EXTRA_DEBUG) && defined(HAVE_purify)
+ {
+ /* This is here to catch uninitialized bytes */
+ ulong crc= my_checksum(0, buff, block_size - KEYPAGE_CHECKSUM_SIZE);
+ int4store(buff + block_size - KEYPAGE_CHECKSUM_SIZE, crc);
+ }
+#endif
+
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ {
+ uint length= _ma_get_page_used(share, buff);
+ DBUG_ASSERT(length <= block_size - KEYPAGE_CHECKSUM_SIZE);
+ bzero(buff + length, block_size - length);
+ }
+#endif
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+
+ res= pagecache_write(share->pagecache,
+ &share->kfile, page / block_size,
+ level, buff, share->page_type,
+ lock,
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED ?
+ PAGECACHE_PIN_LEFT_PINNED :
+ PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE);
+
+ if (lock == PAGECACHE_LOCK_WRITE)
+ {
+ /* It was not locked before, we have to unlock it when we unpin pages */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ }
+ DBUG_RETURN(res);
+
+} /* maria_write_keypage */
+
+
+/*
+ @brief Put page in free list
+
+ @fn _ma_dispose()
+ @param info Maria handle
+ @param pos Address to page
+ @param page_not_read 1 if page has not yet been read
+
+ @note
+ The page at 'pos' must have been read with a write lock
+
+ @return
+ @retval 0 ok
+ £retval 1 error
+
+*/
+
+int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read)
+{
+ my_off_t old_link;
+ uchar buff[MAX_KEYPAGE_HEADER_SIZE+8];
+ ulonglong page_no;
+ MARIA_SHARE *share= info->s;
+ MARIA_PINNED_PAGE page_link;
+ uint block_size= share->block_size;
+ int result= 0;
+ enum pagecache_page_lock lock_method;
+ enum pagecache_page_pin pin_method;
+ DBUG_ENTER("_ma_dispose");
+ DBUG_PRINT("enter",("pos: %ld", (long) pos));
+ DBUG_ASSERT(pos % block_size == 0);
+
+ (void) _ma_lock_key_del(info, 0);
+
+ old_link= share->current_key_del;
+ share->current_key_del= pos;
+ page_no= pos / block_size;
+ bzero(buff, share->keypage_header);
+ _ma_store_keynr(share, buff, (uchar) MARIA_DELETE_KEY_NR);
+ _ma_store_page_used(share, buff, share->keypage_header + 8);
+ mi_sizestore(buff + share->keypage_header, old_link);
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+
+ if (share->now_transactional)
+ {
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ my_off_t page;
+
+ /* Store address of deleted page */
+ page_store(log_data + FILEID_STORE_SIZE, page_no);
+
+ /* Store link to next unused page (the link that is written to page) */
+ page= (old_link == HA_OFFSET_ERROR ? IMPOSSIBLE_PAGE_NO :
+ old_link / block_size);
+ page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX_FREE_PAGE,
+ info->trn, info, sizeof(log_data),
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data, NULL))
+ result= 1;
+ }
+
+ if (page_not_read)
+ {
+ lock_method= PAGECACHE_LOCK_WRITE;
+ pin_method= PAGECACHE_PIN;
+ }
+ else
+ {
+ lock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED;
+ pin_method= PAGECACHE_PIN_LEFT_PINNED;
+ }
+
+ if (pagecache_write_part(share->pagecache,
+ &share->kfile, (pgcache_page_no_t) page_no,
+ PAGECACHE_PRIORITY_LOW, buff,
+ share->page_type,
+ lock_method, pin_method,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE,
+ 0, share->keypage_header + 8))
+ result= 1;
+
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ {
+ uchar *page_buff= pagecache_block_link_to_buffer(page_link.link);
+ bzero(page_buff + share->keypage_header + 8,
+ block_size - share->keypage_header - 8 - KEYPAGE_CHECKSUM_SIZE);
+ }
+#endif
+
+ if (page_not_read)
+ {
+ /* It was not locked before, we have to unlock it when we unpin pages */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ }
+
+ DBUG_RETURN(result);
+} /* _ma_dispose */
+
+
+/**
+ @brief Get address for free page to use
+
+ @fn _ma_new()
+ @param info Maria handle
+ @param level Type of key block (caching priority for pagecache)
+ @param page_link Pointer to page in page cache if read. One can
+ check if this is used by checking if
+ page_link->changed != 0
+
+ @return
+ HA_OFFSET_ERROR File is full or page read error
+ # Page address to use
+*/
+
+my_off_t _ma_new(register MARIA_HA *info, int level,
+ MARIA_PINNED_PAGE **page_link)
+
+{
+ my_off_t pos;
+ MARIA_SHARE *share= info->s;
+ uint block_size= share->block_size;
+ DBUG_ENTER("_ma_new");
+
+ if (_ma_lock_key_del(info, 1))
+ {
+ if (info->state->key_file_length >=
+ share->base.max_key_file_length - block_size)
+ {
+ my_errno=HA_ERR_INDEX_FILE_FULL;
+ DBUG_RETURN(HA_OFFSET_ERROR);
+ }
+ pos= info->state->key_file_length;
+ info->state->key_file_length+= block_size;
+ (*page_link)->changed= 0;
+ (*page_link)->write_lock= PAGECACHE_LOCK_WRITE;
+ }
+ else
+ {
+ uchar *buff;
+ /*
+ TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when
+ LSN on the pages will be implemented
+ */
+ pos= share->current_key_del; /* Protected */
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (!(buff= pagecache_read(share->pagecache,
+ &share->kfile, pos / block_size, level,
+ 0, share->page_type,
+ PAGECACHE_LOCK_WRITE, &(*page_link)->link)))
+ pos= HA_OFFSET_ERROR;
+ else
+ {
+ share->current_key_del= mi_sizekorr(buff+share->keypage_header);
+ DBUG_ASSERT(share->current_key_del != share->state.key_del &&
+ share->current_key_del);
+ }
+
+ (*page_link)->unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ (*page_link)->write_lock= PAGECACHE_LOCK_WRITE;
+ (*page_link)->changed= 0;
+ push_dynamic(&info->pinned_pages, (void*) *page_link);
+ *page_link= dynamic_element(&info->pinned_pages,
+ info->pinned_pages.elements-1,
+ MARIA_PINNED_PAGE *);
+ }
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+ DBUG_PRINT("exit",("Pos: %ld",(long) pos));
+ DBUG_RETURN(pos);
+} /* _ma_new */
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
new file mode 100755
index 00000000000..962018256d4
--- /dev/null
+++ b/storage/maria/ma_pagecache.c
@@ -0,0 +1,4517 @@
+/* Copyright (C) 2000-2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ These functions handle page caching for Maria tables.
+
+ One cache can handle many files.
+ It must contain buffers of the same blocksize.
+ init_pagecache() should be used to init cache handler.
+
+ The free list (free_block_list) is a stack like structure.
+ When a block is freed by free_block(), it is pushed onto the stack.
+ When a new block is required it is first tried to pop one from the stack.
+ If the stack is empty, it is tried to get a never-used block from the pool.
+ If this is empty too, then a block is taken from the LRU ring, flushing it
+ to disk, if necessary. This is handled in find_block().
+ With the new free list, the blocks can have three temperatures:
+ hot, warm and cold (which is free). This is remembered in the block header
+ by the enum PCBLOCK_TEMPERATURE temperature variable. Remembering the
+ temperature is necessary to correctly count the number of warm blocks,
+ which is required to decide when blocks are allowed to become hot. Whenever
+ a block is inserted to another (sub-)chain, we take the old and new
+ temperature into account to decide if we got one more or less warm block.
+ blocks_unused is the sum of never used blocks in the pool and of currently
+ free blocks. blocks_used is the number of blocks fetched from the pool and
+ as such gives the maximum number of in-use blocks at any time.
+*/
+
+#include "maria_def.h"
+#include <m_string.h>
+#include "ma_pagecache.h"
+#include <my_bit.h>
+#include <errno.h>
+
+/*
+ Some compilation flags have been added specifically for this module
+ to control the following:
+ - not to let a thread to yield the control when reading directly
+ from page cache, which might improve performance in many cases;
+ to enable this add:
+ #define SERIALIZED_READ_FROM_CACHE
+ - to set an upper bound for number of threads simultaneously
+ using the page cache; this setting helps to determine an optimal
+ size for hash table and improve performance when the number of
+ blocks in the page cache much less than the number of threads
+ accessing it;
+ to set this number equal to <N> add
+ #define MAX_THREADS <N>
+ - to substitute calls of pthread_cond_wait for calls of
+ pthread_cond_timedwait (wait with timeout set up);
+ this setting should be used only when you want to trap a deadlock
+ situation, which theoretically should not happen;
+ to set timeout equal to <T> seconds add
+ #define PAGECACHE_TIMEOUT <T>
+ - to enable the module traps and to send debug information from
+ page cache module to a special debug log add:
+ #define PAGECACHE_DEBUG
+ the name of this debug log file <LOG NAME> can be set through:
+ #define PAGECACHE_DEBUG_LOG <LOG NAME>
+ if the name is not defined, it's set by default;
+ if the PAGECACHE_DEBUG flag is not set up and we are in a debug
+ mode, i.e. when ! defined(DBUG_OFF), the debug information from the
+ module is sent to the regular debug log.
+
+ Example of the settings:
+ #define SERIALIZED_READ_FROM_CACHE
+ #define MAX_THREADS 100
+ #define PAGECACHE_TIMEOUT 1
+ #define PAGECACHE_DEBUG
+ #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log"
+*/
+
+/*
+ In key cache we have external raw locking here we use
+ SERIALIZED_READ_FROM_CACHE to avoid problem of reading
+ not consistent data from the page.
+ (keycache functions (key_cache_read(), key_cache_insert() and
+ key_cache_write()) rely on external MyISAM lock, we don't)
+*/
+#define SERIALIZED_READ_FROM_CACHE yes
+
+#define PCBLOCK_INFO(B) \
+ DBUG_PRINT("info", \
+ ("block: 0x%lx fd: %lu page: %lu s: %0x hshL: 0x%lx req: %u/%u " \
+ "wrlocks: %u pins: %u", \
+ (ulong)(B), \
+ (ulong)((B)->hash_link ? \
+ (B)->hash_link->file.file : \
+ 0), \
+ (ulong)((B)->hash_link ? \
+ (B)->hash_link->pageno : \
+ 0), \
+ (B)->status, \
+ (ulong)(B)->hash_link, \
+ (uint) (B)->requests, \
+ (uint)((B)->hash_link ? \
+ (B)->hash_link->requests : \
+ 0), \
+ block->wlocks, \
+ (uint)(B)->pins))
+
+/* TODO: put it to my_static.c */
+my_bool my_disable_flush_pagecache_blocks= 0;
+
+#define STRUCT_PTR(TYPE, MEMBER, a) \
+ (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
+
+/* types of condition variables */
+#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */
+#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */
+#define COND_FOR_WRLOCK 2 /* queue of write lock */
+#define COND_SIZE 3 /* number of COND_* queues */
+
+/* offset of LSN on the page */
+#define PAGE_LSN_OFFSET 0
+
+typedef pthread_cond_t KEYCACHE_CONDVAR;
+
+/* descriptor of the page in the page cache block buffer */
+struct st_pagecache_page
+{
+ PAGECACHE_FILE file; /* file to which the page belongs to */
+ pgcache_page_no_t pageno; /* number of the page in the file */
+};
+
+/* element in the chain of a hash table bucket */
+struct st_pagecache_hash_link
+{
+ struct st_pagecache_hash_link
+ *next, **prev; /* to connect links in the same bucket */
+ struct st_pagecache_block_link
+ *block; /* reference to the block for the page: */
+ PAGECACHE_FILE file; /* from such a file */
+ pgcache_page_no_t pageno; /* this page */
+ uint requests; /* number of requests for the page */
+};
+
+/* simple states of a block */
+#define PCBLOCK_ERROR 1 /* an error occurred when performing disk i/o */
+#define PCBLOCK_READ 2 /* the is page in the block buffer */
+#define PCBLOCK_IN_SWITCH 4 /* block is preparing to read new page */
+#define PCBLOCK_REASSIGNED 8 /* block does not accept requests for old page */
+#define PCBLOCK_IN_FLUSH 16 /* block is in flush operation */
+#define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */
+#define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */
+
+/* page status, returned by find_block */
+#define PAGE_READ 0
+#define PAGE_TO_BE_READ 1
+#define PAGE_WAIT_TO_BE_READ 2
+
+/* block temperature determines in which (sub-)chain the block currently is */
+enum PCBLOCK_TEMPERATURE { PCBLOCK_COLD /*free*/ , PCBLOCK_WARM , PCBLOCK_HOT };
+
+/* debug info */
+#ifndef DBUG_OFF
+static const char *page_cache_page_type_str[]=
+{
+ /* used only for control page type changing during debugging */
+ "EMPTY",
+ "PLAIN",
+ "LSN",
+ "READ_UNKNOWN"
+};
+
+static const char *page_cache_page_write_mode_str[]=
+{
+ "DELAY",
+ "DONE"
+};
+
+static const char *page_cache_page_lock_str[]=
+{
+ "free -> free",
+ "read -> read",
+ "write -> write",
+ "free -> read",
+ "free -> write",
+ "read -> free",
+ "write -> free",
+ "write -> read"
+};
+
+static const char *page_cache_page_pin_str[]=
+{
+ "pinned -> pinned",
+ "unpinned -> unpinned",
+ "unpinned -> pinned",
+ "pinned -> unpinned"
+};
+
+
+typedef struct st_pagecache_pin_info
+{
+ struct st_pagecache_pin_info *next, **prev;
+ struct st_my_thread_var *thread;
+} PAGECACHE_PIN_INFO;
+
+/*
+ st_pagecache_lock_info structure should be kept in next, prev, thread part
+ compatible with st_pagecache_pin_info to be compatible in functions.
+*/
+
+typedef struct st_pagecache_lock_info
+{
+ struct st_pagecache_lock_info *next, **prev;
+ struct st_my_thread_var *thread;
+ my_bool write_lock;
+} PAGECACHE_LOCK_INFO;
+
+
+/* service functions maintain debugging info about pin & lock */
+
+
+/*
+ Links information about thread pinned/locked the block to the list
+
+ SYNOPSIS
+ info_link()
+ list the list to link in
+ node the node which should be linked
+*/
+
+static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
+{
+ if ((node->next= *list))
+ node->next->prev= &(node->next);
+ *list= node;
+ node->prev= list;
+}
+
+
+/*
+ Unlinks information about thread pinned/locked the block from the list
+
+ SYNOPSIS
+ info_unlink()
+ node the node which should be unlinked
+*/
+
+static void info_unlink(PAGECACHE_PIN_INFO *node)
+{
+ if ((*node->prev= node->next))
+ node->next->prev= node->prev;
+}
+
+
+/*
+ Finds information about given thread in the list of threads which
+ pinned/locked this block.
+
+ SYNOPSIS
+ info_find()
+ list the list where to find the thread
+ thread thread ID (reference to the st_my_thread_var
+ of the thread)
+
+ RETURN
+ 0 - the thread was not found
+ pointer to the information node of the thread in the list
+*/
+
+static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
+ struct st_my_thread_var *thread)
+{
+ register PAGECACHE_PIN_INFO *i= list;
+ for(; i != 0; i= i->next)
+ if (i->thread == thread)
+ return i;
+ return 0;
+}
+
+#endif /* !DBUG_OFF */
+
+/* page cache block */
+struct st_pagecache_block_link
+{
+ struct st_pagecache_block_link
+ *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
+ struct st_pagecache_block_link
+ *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
+ struct st_pagecache_hash_link
+ *hash_link; /* backward ptr to referring hash_link */
+#ifndef DBUG_OFF
+ PAGECACHE_PIN_INFO *pin_list;
+ PAGECACHE_LOCK_INFO *lock_list;
+#endif
+ KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
+ uchar *buffer; /* buffer for the block page */
+ PAGECACHE_FILE *write_locker;
+ ulonglong last_hit_time; /* timestamp of the last hit */
+ WQUEUE
+ wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */
+ uint requests; /* number of requests for the block */
+ uint status; /* state of the block */
+ uint pins; /* pin counter */
+ uint wlocks; /* write locks counter */
+ enum PCBLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
+ enum pagecache_page_type type; /* type of the block */
+ uint hits_left; /* number of hits left until promotion */
+ /** @brief LSN when first became dirty; LSN_MAX means "not yet set" */
+ LSN rec_lsn;
+};
+
+/** @brief information describing a run of flush_pagecache_blocks_int() */
+struct st_file_in_flush
+{
+ File file;
+ /**
+ @brief threads waiting for the thread currently flushing this file to be
+ done
+ */
+ WQUEUE flush_queue;
+ /**
+ @brief if the thread currently flushing the file has a non-empty
+ first_in_switch list.
+ */
+ my_bool first_in_switch;
+};
+
+#ifndef DBUG_OFF
+/* debug checks */
+
+#ifdef NOT_USED
+static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_pin mode
+ __attribute__((unused)))
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread);
+ DBUG_ENTER("info_check_pin");
+ DBUG_PRINT("enter", ("thread: 0x%lx pin: %s",
+ (ulong) thread, page_cache_page_pin_str[mode]));
+ if (info)
+ {
+ if (mode == PAGECACHE_PIN_LEFT_UNPINNED)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_UNPINNED!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ else if (mode == PAGECACHE_PIN)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block: 0x%lx ; PIN!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ }
+ else
+ {
+ if (mode == PAGECACHE_PIN_LEFT_PINNED)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_PINNED!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ else if (mode == PAGECACHE_UNPIN)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block: 0x%lx ; UNPIN!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Debug function which checks current lock/pin state and requested changes
+
+ SYNOPSIS
+ info_check_lock()
+ lock requested lock changes
+ pin requested pin changes
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin)
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list,
+ thread);
+ DBUG_ENTER("info_check_lock");
+ switch(lock) {
+ case PAGECACHE_LOCK_LEFT_UNLOCKED:
+ if (pin != PAGECACHE_PIN_LEFT_UNPINNED ||
+ info)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_LEFT_READLOCKED:
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_PIN_LEFT_PINNED) ||
+ info == 0 || info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_LEFT_WRITELOCKED:
+ if (pin != PAGECACHE_PIN_LEFT_PINNED ||
+ info == 0 || !info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_READ:
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_PIN) ||
+ info != 0)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_WRITE:
+ if (pin != PAGECACHE_PIN ||
+ info != 0)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_READ_UNLOCK:
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_UNPIN) ||
+ info == 0 || info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_WRITE_UNLOCK:
+ if (pin != PAGECACHE_UNPIN ||
+ info == 0 || !info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_WRITE_TO_READ:
+ if ((pin != PAGECACHE_PIN_LEFT_PINNED &&
+ pin != PAGECACHE_UNPIN) ||
+ info == 0 || !info->write_lock)
+ goto error;
+ break;
+ }
+ DBUG_RETURN(0);
+error:
+ DBUG_PRINT("info",
+ ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d,"
+ "to lock: %s, to pin: %s",
+ (ulong)thread, (ulong)block, test(info),
+ (info ? info->write_lock : 0),
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ DBUG_RETURN(1);
+}
+#endif /* NOT_USED */
+#endif /* !DBUG_OFF */
+
+#define FLUSH_CACHE 2000 /* sort this many blocks at once */
+
+static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block);
+#ifndef DBUG_OFF
+static void test_key_cache(PAGECACHE *pagecache,
+ const char *where, my_bool lock);
+#endif
+
+#define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \
+ (ulong) (f).file) & (p->hash_entries-1))
+#define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1))
+
+#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log"
+
+#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG)
+#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG
+#endif
+
+#if defined(PAGECACHE_DEBUG_LOG)
+static FILE *pagecache_debug_log= NULL;
+static void pagecache_debug_print _VARARGS((const char *fmt, ...));
+#define PAGECACHE_DEBUG_OPEN \
+ if (!pagecache_debug_log) \
+ { \
+ pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \
+ (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \
+ }
+
+#define PAGECACHE_DEBUG_CLOSE \
+ if (pagecache_debug_log) \
+ { \
+ fclose(pagecache_debug_log); \
+ pagecache_debug_log= 0; \
+ }
+#else
+#define PAGECACHE_DEBUG_OPEN
+#define PAGECACHE_DEBUG_CLOSE
+#endif /* defined(PAGECACHE_DEBUG_LOG) */
+
+#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG)
+#define KEYCACHE_DBUG_PRINT(l, m) \
+ { if (pagecache_debug_log) \
+ fprintf(pagecache_debug_log, "%s: ", l); \
+ pagecache_debug_print m; }
+
+#define KEYCACHE_DBUG_ASSERT(a) \
+ { if (! (a) && pagecache_debug_log) \
+ fclose(pagecache_debug_log); \
+ assert(a); }
+#else
+#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
+#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
+#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */
+
+#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF)
+#ifdef THREAD
+static long pagecache_thread_id;
+#define KEYCACHE_THREAD_TRACE(l) \
+ KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id))
+
+#define KEYCACHE_THREAD_TRACE_BEGIN(l) \
+ { struct st_my_thread_var *thread_var= my_thread_var; \
+ pagecache_thread_id= thread_var->id; \
+ KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) }
+
+#define KEYCACHE_THREAD_TRACE_END(l) \
+ KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id))
+#else /* THREAD */
+#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,(""))
+#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,(""))
+#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,(""))
+#endif /* THREAD */
+#else
+#define KEYCACHE_THREAD_TRACE_BEGIN(l)
+#define KEYCACHE_THREAD_TRACE_END(l)
+#define KEYCACHE_THREAD_TRACE(l)
+#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */
+
+#define PCBLOCK_NUMBER(p, b) \
+ ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK)))
+#define PAGECACHE_HASH_LINK_NUMBER(p, h) \
+ ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \
+ sizeof(PAGECACHE_HASH_LINK)))
+
+#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG)
+static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex);
+#else
+#define pagecache_pthread_cond_wait pthread_cond_wait
+#endif
+
+#if defined(PAGECACHE_DEBUG)
+static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex);
+static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex);
+static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
+#define pagecache_pthread_mutex_lock(M) \
+{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \
+ ___pagecache_pthread_mutex_lock(M);}
+#define pagecache_pthread_mutex_unlock(M) \
+{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \
+ ___pagecache_pthread_mutex_unlock(M);}
+#define pagecache_pthread_cond_signal(M) \
+{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \
+ ___pagecache_pthread_cond_signal(M);}
+#else
+#define pagecache_pthread_mutex_lock pthread_mutex_lock
+#define pagecache_pthread_mutex_unlock pthread_mutex_unlock
+#define pagecache_pthread_cond_signal pthread_cond_signal
+#endif /* defined(PAGECACHE_DEBUG) */
+
+extern my_bool translog_flush(LSN lsn);
+
+/*
+ Write page to the disk
+
+ SYNOPSIS
+ pagecache_fwrite()
+ pagecache - page cache pointer
+ filedesc - pagecache file descriptor structure
+ buffer - buffer which we will write
+ type - page type (plain or with LSN)
+ flags - MYF() flags
+
+ RETURN
+ 0 - OK
+ !=0 - Error
+*/
+
+static uint pagecache_fwrite(PAGECACHE *pagecache,
+ PAGECACHE_FILE *filedesc,
+ uchar *buffer,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_type type,
+ myf flags)
+{
+ DBUG_ENTER("pagecache_fwrite");
+ DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
+ /**
+ @todo RECOVERY BUG Here, we should call a callback get_lsn(): it will use
+ lsn_korr() for LSN pages, and translog_get_horizon() for bitmap pages.
+ */
+ if (type == PAGECACHE_LSN_PAGE)
+ {
+ LSN lsn;
+ DBUG_PRINT("info", ("Log handler call"));
+ /* TODO: integrate with page format */
+ lsn= lsn_korr(buffer + PAGE_LSN_OFFSET);
+ DBUG_ASSERT(LSN_VALID(lsn));
+ if (translog_flush(lsn))
+ {
+ (*filedesc->write_fail)(filedesc->callback_data);
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_PRINT("info", ("write_callback: 0x%lx data: 0x%lx",
+ (ulong) filedesc->write_callback,
+ (ulong) filedesc->callback_data));
+ if ((filedesc->write_callback)(buffer, pageno, filedesc->callback_data))
+ {
+ DBUG_PRINT("error", ("write callback problem"));
+ DBUG_RETURN(1);
+ }
+
+ if (my_pwrite(filedesc->file, buffer, pagecache->block_size,
+ (pageno)<<(pagecache->shift), flags))
+ {
+ (*filedesc->write_fail)(filedesc->callback_data);
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read page from the disk
+
+ SYNOPSIS
+ pagecache_fread()
+ pagecache - page cache pointer
+ filedesc - pagecache file descriptor structure
+ buffer - buffer in which we will read
+ pageno - page number
+ flags - MYF() flags
+*/
+#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \
+ my_pread((filedesc)->file, buffer, pagecache->block_size, \
+ (pageno)<<(pagecache->shift), flags)
+
+
+/**
+ @brief set rec_lsn of pagecache block (if it is needed)
+
+ @param block block where to set rec_lsn
+ @param first_REDO_LSN_for_page the LSN to set
+*/
+
+static inline void pagecache_set_block_rec_lsn(PAGECACHE_BLOCK_LINK *block,
+ LSN first_REDO_LSN_for_page)
+{
+ if (block->rec_lsn == LSN_MAX)
+ block->rec_lsn= first_REDO_LSN_for_page;
+ else
+ DBUG_ASSERT(cmp_translog_addr(block->rec_lsn,
+ first_REDO_LSN_for_page) <= 0);
+}
+
+
+/*
+ next_power(value) is 2 at the power of (1+floor(log2(value)));
+ e.g. next_power(2)=4, next_power(3)=4.
+*/
+static inline uint next_power(uint value)
+{
+ return (uint) my_round_up_to_next_power((uint32) value) << 1;
+}
+
+
+/*
+ Initialize a page cache
+
+ SYNOPSIS
+ init_pagecache()
+ pagecache pointer to a page cache data structure
+ key_cache_block_size size of blocks to keep cached data
+ use_mem total memory to use for the key cache
+ division_limit division limit (may be zero)
+ age_threshold age threshold (may be zero)
+ block_size size of block (should be power of 2)
+ my_read_flags Flags used for all pread/pwrite calls
+ Usually MY_WME in case of recovery
+
+ RETURN VALUE
+ number of blocks in the key cache, if successful,
+ 0 - otherwise.
+
+ NOTES.
+ if pagecache->inited != 0 we assume that the key cache
+ is already initialized. This is for now used by myisamchk, but shouldn't
+ be something that a program should rely on!
+
+ It's assumed that no two threads call this function simultaneously
+ referring to the same key cache handle.
+
+*/
+
+ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
+ uint division_limit, uint age_threshold,
+ uint block_size, myf my_readwrite_flags)
+{
+ ulong blocks, hash_links, length;
+ int error;
+ DBUG_ENTER("init_pagecache");
+ DBUG_ASSERT(block_size >= 512);
+
+ PAGECACHE_DEBUG_OPEN;
+ if (pagecache->inited && pagecache->disk_blocks > 0)
+ {
+ DBUG_PRINT("warning",("key cache already in use"));
+ DBUG_RETURN(0);
+ }
+
+ pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0;
+ pagecache->global_cache_read= pagecache->global_cache_write= 0;
+ pagecache->disk_blocks= -1;
+ if (! pagecache->inited)
+ {
+ if (pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST) ||
+ hash_init(&pagecache->files_in_flush, &my_charset_bin, 32,
+ offsetof(struct st_file_in_flush, file),
+ sizeof(((struct st_file_in_flush *)NULL)->file),
+ NULL, NULL, 0))
+ goto err;
+ pagecache->inited= 1;
+ pagecache->in_init= 0;
+ pagecache->resize_queue.last_thread= NULL;
+ }
+
+ pagecache->mem_size= use_mem;
+ pagecache->block_size= block_size;
+ pagecache->shift= my_bit_log2(block_size);
+ pagecache->readwrite_flags= my_readwrite_flags | MY_NABP | MY_WAIT_IF_FULL;
+ pagecache->org_readwrite_flags= pagecache->readwrite_flags;
+ DBUG_PRINT("info", ("block_size: %u", block_size));
+ DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size);
+
+ blocks= (ulong) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) +
+ 2 * sizeof(PAGECACHE_HASH_LINK) +
+ sizeof(PAGECACHE_HASH_LINK*) *
+ 5/4 + block_size));
+ /*
+ We need to support page cache with just one block to be able to do
+ scanning of rows-in-block files
+ */
+ for ( ; ; )
+ {
+ if (blocks < 8)
+ {
+ my_errno= ENOMEM;
+ goto err;
+ }
+ /* Set my_hash_entries to the next bigger 2 power */
+ if ((pagecache->hash_entries= next_power(blocks)) <
+ (blocks) * 5/4)
+ pagecache->hash_entries<<= 1;
+ hash_links= 2 * blocks;
+#if defined(MAX_THREADS)
+ if (hash_links < MAX_THREADS + blocks - 1)
+ hash_links= MAX_THREADS + blocks - 1;
+#endif
+ while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) +
+ ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) +
+ ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) *
+ pagecache->hash_entries))) +
+ (blocks << pagecache->shift) > use_mem)
+ blocks--;
+ /* Allocate memory for cache page buffers */
+ if ((pagecache->block_mem=
+ my_large_malloc((ulong) blocks * pagecache->block_size,
+ MYF(MY_WME))))
+ {
+ /*
+ Allocate memory for blocks, hash_links and hash entries;
+ For each block 2 hash links are allocated
+ */
+ if ((pagecache->block_root=
+ (PAGECACHE_BLOCK_LINK*) my_malloc((size_t) length, MYF(0))))
+ break;
+ my_large_free(pagecache->block_mem, MYF(0));
+ pagecache->block_mem= 0;
+ }
+ blocks= blocks / 4*3;
+ }
+ pagecache->blocks_unused= blocks;
+ pagecache->disk_blocks= (long) blocks;
+ pagecache->hash_links= hash_links;
+ pagecache->hash_root=
+ (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root +
+ ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK)));
+ pagecache->hash_link_root=
+ (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root +
+ ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) *
+ pagecache->hash_entries)));
+ bzero((uchar*) pagecache->block_root,
+ pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK));
+ bzero((uchar*) pagecache->hash_root,
+ pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*));
+ bzero((uchar*) pagecache->hash_link_root,
+ pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK));
+ pagecache->hash_links_used= 0;
+ pagecache->free_hash_list= NULL;
+ pagecache->blocks_used= pagecache->blocks_changed= 0;
+
+ pagecache->global_blocks_changed= 0;
+ pagecache->blocks_available=0; /* For debugging */
+
+ /* The LRU chain is empty after initialization */
+ pagecache->used_last= NULL;
+ pagecache->used_ins= NULL;
+ pagecache->free_block_list= NULL;
+ pagecache->time= 0;
+ pagecache->warm_blocks= 0;
+ pagecache->min_warm_blocks= (division_limit ?
+ blocks * division_limit / 100 + 1 :
+ blocks);
+ pagecache->age_threshold= (age_threshold ?
+ blocks * age_threshold / 100 :
+ blocks);
+
+ pagecache->cnt_for_resize_op= 0;
+ pagecache->resize_in_flush= 0;
+ pagecache->can_be_used= 1;
+
+ pagecache->waiting_for_hash_link.last_thread= NULL;
+ pagecache->waiting_for_block.last_thread= NULL;
+ DBUG_PRINT("exit",
+ ("disk_blocks: %ld block_root: 0x%lx hash_entries: %ld\
+ hash_root: 0x%lx hash_links: %ld hash_link_root: 0x%lx",
+ pagecache->disk_blocks, (long) pagecache->block_root,
+ pagecache->hash_entries, (long) pagecache->hash_root,
+ pagecache->hash_links, (long) pagecache->hash_link_root));
+ bzero((uchar*) pagecache->changed_blocks,
+ sizeof(pagecache->changed_blocks[0]) *
+ PAGECACHE_CHANGED_BLOCKS_HASH);
+ bzero((uchar*) pagecache->file_blocks,
+ sizeof(pagecache->file_blocks[0]) *
+ PAGECACHE_CHANGED_BLOCKS_HASH);
+
+ pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0;
+ DBUG_RETURN((ulong) pagecache->disk_blocks);
+
+err:
+ error= my_errno;
+ pagecache->disk_blocks= 0;
+ pagecache->blocks= 0;
+ if (pagecache->block_mem)
+ {
+ my_large_free((uchar*) pagecache->block_mem, MYF(0));
+ pagecache->block_mem= NULL;
+ }
+ if (pagecache->block_root)
+ {
+ my_free((uchar*) pagecache->block_root, MYF(0));
+ pagecache->block_root= NULL;
+ }
+ my_errno= error;
+ pagecache->can_be_used= 0;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Flush all blocks in the key cache to disk
+*/
+
+#ifdef NOT_USED
+static int flush_all_key_blocks(PAGECACHE *pagecache)
+{
+#if defined(PAGECACHE_DEBUG)
+ uint cnt=0;
+#endif
+ while (pagecache->blocks_changed > 0)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->used_last->next_used ; ; block=block->next_used)
+ {
+ if (block->hash_link)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file,
+ FLUSH_RELEASE, NULL, NULL))
+ return 1;
+ break;
+ }
+ if (block == pagecache->used_last)
+ break;
+ }
+ }
+ return 0;
+}
+#endif /* NOT_USED */
+
+/*
+ Resize a key cache
+
+ SYNOPSIS
+ resize_pagecache()
+ pagecache pointer to a page cache data structure
+ use_mem total memory to use for the new key cache
+ division_limit new division limit (if not zero)
+ age_threshold new age threshold (if not zero)
+
+ RETURN VALUE
+ number of blocks in the key cache, if successful,
+ 0 - otherwise.
+
+ NOTES.
+ The function first compares the memory size parameter
+ with the key cache value.
+
+ If they differ the function free the the memory allocated for the
+ old key cache blocks by calling the end_pagecache function and
+ then rebuilds the key cache with new blocks by calling
+ init_key_cache.
+
+ The function starts the operation only when all other threads
+ performing operations with the key cache let her to proceed
+ (when cnt_for_resize=0).
+
+ Before being usable, this function needs:
+ - to receive fixes for BUG#17332 "changing key_buffer_size on a running
+ server can crash under load" similar to those done to the key cache
+ - to have us (Sanja) look at the additional constraints placed on
+ resizing, due to the page locking specific to this page cache.
+ So we disable it for now.
+*/
+#if NOT_USED /* keep disabled until code is fixed see above !! */
+ulong resize_pagecache(PAGECACHE *pagecache,
+ size_t use_mem, uint division_limit,
+ uint age_threshold)
+{
+ ulong blocks;
+#ifdef THREAD
+ struct st_my_thread_var *thread;
+ WQUEUE *wqueue;
+
+#endif
+ DBUG_ENTER("resize_pagecache");
+
+ if (!pagecache->inited)
+ DBUG_RETURN(pagecache->disk_blocks);
+
+ if(use_mem == pagecache->mem_size)
+ {
+ change_pagecache_param(pagecache, division_limit, age_threshold);
+ DBUG_RETURN(pagecache->disk_blocks);
+ }
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+#ifdef THREAD
+ wqueue= &pagecache->resize_queue;
+ thread= my_thread_var;
+ wqueue_link_into_queue(wqueue, thread);
+
+ while (wqueue->last_thread->next != thread)
+ {
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ }
+#endif
+
+ pagecache->resize_in_flush= 1;
+ if (flush_all_key_blocks(pagecache))
+ {
+ /* TODO: if this happens, we should write a warning in the log file ! */
+ pagecache->resize_in_flush= 0;
+ blocks= 0;
+ pagecache->can_be_used= 0;
+ goto finish;
+ }
+ pagecache->resize_in_flush= 0;
+ pagecache->can_be_used= 0;
+#ifdef THREAD
+ while (pagecache->cnt_for_resize_op)
+ {
+ KEYCACHE_DBUG_PRINT("resize_pagecache: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ }
+#else
+ KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0);
+#endif
+
+ end_pagecache(pagecache, 0); /* Don't free mutex */
+ /* The following will work even if use_mem is 0 */
+ blocks= init_pagecache(pagecache, pagecache->block_size, use_mem,
+ division_limit, age_threshold,
+ pagecache->readwrite_flags);
+
+finish:
+#ifdef THREAD
+ wqueue_unlink_from_queue(wqueue, thread);
+ /* Signal for the next resize request to proceeed if any */
+ if (wqueue->last_thread)
+ {
+ KEYCACHE_DBUG_PRINT("resize_pagecache: signal",
+ ("thread %ld", wqueue->last_thread->next->id));
+ pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend);
+ }
+#endif
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(blocks);
+}
+#endif /* 0 */
+
+
+/*
+ Increment counter blocking resize key cache operation
+*/
+static inline void inc_counter_for_resize_op(PAGECACHE *pagecache)
+{
+ pagecache->cnt_for_resize_op++;
+}
+
+
+/*
+ Decrement counter blocking resize key cache operation;
+ Signal the operation to proceed when counter becomes equal zero
+*/
+static inline void dec_counter_for_resize_op(PAGECACHE *pagecache)
+{
+#ifdef THREAD
+ struct st_my_thread_var *last_thread;
+ if (!--pagecache->cnt_for_resize_op &&
+ (last_thread= pagecache->resize_queue.last_thread))
+ {
+ KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal",
+ ("thread %ld", last_thread->next->id));
+ pagecache_pthread_cond_signal(&last_thread->next->suspend);
+ }
+#else
+ pagecache->cnt_for_resize_op--;
+#endif
+}
+
+/*
+ Change the page cache parameters
+
+ SYNOPSIS
+ change_pagecache_param()
+ pagecache pointer to a page cache data structure
+ division_limit new division limit (if not zero)
+ age_threshold new age threshold (if not zero)
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Presently the function resets the key cache parameters
+ concerning midpoint insertion strategy - division_limit and
+ age_threshold.
+*/
+
+void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
+ uint age_threshold)
+{
+ DBUG_ENTER("change_pagecache_param");
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (division_limit)
+ pagecache->min_warm_blocks= (pagecache->disk_blocks *
+ division_limit / 100 + 1);
+ if (age_threshold)
+ pagecache->age_threshold= (pagecache->disk_blocks *
+ age_threshold / 100);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Removes page cache from memory. Does NOT flush pages to disk.
+
+ SYNOPSIS
+ end_pagecache()
+ pagecache page cache handle
+ cleanup Complete free (Free also mutex for key cache)
+
+ RETURN VALUE
+ none
+*/
+
+void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
+{
+ DBUG_ENTER("end_pagecache");
+ DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache));
+
+ if (!pagecache->inited)
+ DBUG_VOID_RETURN;
+
+ if (pagecache->disk_blocks > 0)
+ {
+ if (pagecache->block_mem)
+ {
+ my_large_free((uchar*) pagecache->block_mem, MYF(0));
+ pagecache->block_mem= NULL;
+ my_free((uchar*) pagecache->block_root, MYF(0));
+ pagecache->block_root= NULL;
+ }
+ pagecache->disk_blocks= -1;
+ /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
+ pagecache->blocks_changed= 0;
+ }
+
+ DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
+ "writes: %lu r_requests: %lu reads: %lu",
+ pagecache->blocks_used,
+ pagecache->global_blocks_changed,
+ (ulong) pagecache->global_cache_w_requests,
+ (ulong) pagecache->global_cache_write,
+ (ulong) pagecache->global_cache_r_requests,
+ (ulong) pagecache->global_cache_read));
+
+ if (cleanup)
+ {
+ hash_free(&pagecache->files_in_flush);
+ pthread_mutex_destroy(&pagecache->cache_lock);
+ pagecache->inited= pagecache->can_be_used= 0;
+ PAGECACHE_DEBUG_CLOSE;
+ }
+ DBUG_VOID_RETURN;
+} /* end_pagecache */
+
+
+/*
+ Unlink a block from the chain of dirty/clean blocks
+*/
+
+static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block)
+{
+ if (block->next_changed)
+ block->next_changed->prev_changed= block->prev_changed;
+ *block->prev_changed= block->next_changed;
+}
+
+
+/*
+ Link a block into the chain of dirty/clean blocks
+*/
+
+static inline void link_changed(PAGECACHE_BLOCK_LINK *block,
+ PAGECACHE_BLOCK_LINK **phead)
+{
+ block->prev_changed= phead;
+ if ((block->next_changed= *phead))
+ (*phead)->prev_changed= &block->next_changed;
+ *phead= block;
+}
+
+
+/*
+ Unlink a block from the chain of dirty/clean blocks, if it's asked for,
+ and link it to the chain of clean blocks for the specified file
+*/
+
+static void link_to_file_list(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ PAGECACHE_FILE *file, my_bool unlink_flag)
+{
+ if (unlink_flag)
+ unlink_changed(block);
+ link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]);
+ if (block->status & PCBLOCK_CHANGED)
+ {
+ block->status&= ~PCBLOCK_CHANGED;
+ block->rec_lsn= LSN_MAX;
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ }
+}
+
+
+/*
+ Unlink a block from the chain of clean blocks for the specified
+ file and link it to the chain of dirty blocks for this file
+*/
+
+static inline void link_to_changed_list(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block)
+{
+ unlink_changed(block);
+ link_changed(block,
+ &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]);
+ block->status|=PCBLOCK_CHANGED;
+ pagecache->blocks_changed++;
+ pagecache->global_blocks_changed++;
+}
+
+
+/*
+ Link a block to the LRU chain at the beginning or at the end of
+ one of two parts.
+
+ SYNOPSIS
+ link_block()
+ pagecache pointer to a page cache data structure
+ block pointer to the block to link to the LRU chain
+ hot <-> to link the block into the hot subchain
+ at_end <-> to link the block at the end of the subchain
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ The LRU chain is represented by a circular list of block structures.
+ The list is double-linked of the type (**prev,*next) type.
+ The LRU chain is divided into two parts - hot and warm.
+ There are two pointers to access the last blocks of these two
+ parts. The beginning of the warm part follows right after the
+ end of the hot part.
+ Only blocks of the warm part can be used for replacement.
+ The first block from the beginning of this subchain is always
+ taken for eviction (pagecache->last_used->next)
+
+ LRU chain: +------+ H O T +------+
+ +----| end |----...<----| beg |----+
+ | +------+last +------+ |
+ v<-link in latest hot (new end) |
+ | link in latest warm (new end)->^
+ | +------+ W A R M +------+ |
+ +----| beg |---->...----| end |----+
+ +------+ +------+ins
+ first for eviction
+*/
+
+static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
+ my_bool hot, my_bool at_end)
+{
+ PAGECACHE_BLOCK_LINK *ins;
+ PAGECACHE_BLOCK_LINK **ptr_ins;
+
+ PCBLOCK_INFO(block);
+ KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
+#ifdef THREAD
+ if (!hot && pagecache->waiting_for_block.last_thread)
+ {
+ /* Signal that in the LRU warm sub-chain an available block has appeared */
+ struct st_my_thread_var *last_thread=
+ pagecache->waiting_for_block.last_thread;
+ struct st_my_thread_var *first_thread= last_thread->next;
+ struct st_my_thread_var *next_thread= first_thread;
+ PAGECACHE_HASH_LINK *hash_link=
+ (PAGECACHE_HASH_LINK *) first_thread->opt_info;
+ struct st_my_thread_var *thread;
+ do
+ {
+ thread= next_thread;
+ next_thread= thread->next;
+ /*
+ We notify about the event all threads that ask
+ for the same page as the first thread in the queue
+ */
+ if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link)
+ {
+ KEYCACHE_DBUG_PRINT("link_block: signal", ("thread: %ld", thread->id));
+ pagecache_pthread_cond_signal(&thread->suspend);
+ wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
+ block->requests++;
+ }
+ }
+ while (thread != last_thread);
+ hash_link->block= block;
+ KEYCACHE_THREAD_TRACE("link_block: after signaling");
+#if defined(PAGECACHE_DEBUG)
+ KEYCACHE_DBUG_PRINT("link_block",
+ ("linked,unlinked block: %u status: %x #requests: %u #available: %u",
+ PCBLOCK_NUMBER(pagecache, block), block->status,
+ block->requests, pagecache->blocks_available));
+#endif
+ return;
+ }
+#else /* THREAD */
+ KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread));
+ /* Condition not transformed using DeMorgan, to keep the text identical */
+#endif /* THREAD */
+ ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last;
+ ins= *ptr_ins;
+ if (ins)
+ {
+ ins->next_used->prev_used= &block->next_used;
+ block->next_used= ins->next_used;
+ block->prev_used= &ins->next_used;
+ ins->next_used= block;
+ if (at_end)
+ *ptr_ins= block;
+ }
+ else
+ {
+ /* The LRU chain is empty */
+ pagecache->used_last= pagecache->used_ins= block->next_used= block;
+ block->prev_used= &block->next_used;
+ }
+ KEYCACHE_THREAD_TRACE("link_block");
+#if defined(PAGECACHE_DEBUG)
+ pagecache->blocks_available++;
+ KEYCACHE_DBUG_PRINT("link_block",
+ ("linked block: %u:%1u status: %x #requests: %u #available: %u",
+ PCBLOCK_NUMBER(pagecache, block), at_end, block->status,
+ block->requests, pagecache->blocks_available));
+ KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <=
+ pagecache->blocks_used);
+#endif
+}
+
+
+/*
+ Unlink a block from the LRU chain
+
+ SYNOPSIS
+ unlink_block()
+ pagecache pointer to a page cache data structure
+ block pointer to the block to unlink from the LRU chain
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See NOTES for link_block
+*/
+
+static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("unlink_block");
+ DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block));
+ if (block->next_used == block)
+ {
+ /* The list contains only one member */
+ pagecache->used_last= pagecache->used_ins= NULL;
+ }
+ else
+ {
+ block->next_used->prev_used= block->prev_used;
+ *block->prev_used= block->next_used;
+ if (pagecache->used_last == block)
+ pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
+ next_used, block->prev_used);
+ if (pagecache->used_ins == block)
+ pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
+ next_used, block->prev_used);
+ }
+ block->next_used= NULL;
+
+ KEYCACHE_THREAD_TRACE("unlink_block");
+#if defined(PAGECACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
+ pagecache->blocks_available--;
+ KEYCACHE_DBUG_PRINT("unlink_block",
+ ("unlinked block: 0x%lx (%u) status: %x #requests: %u #available: %u",
+ (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ block->status,
+ block->requests, pagecache->blocks_available));
+ PCBLOCK_INFO(block);
+#endif
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Register requests for a block
+
+ SYNOPSIS
+ reg_requests()
+ pagecache this page cache reference
+ block the block we request reference
+ count how many requests we register (it is 1 everywhere)
+
+ NOTE
+ Registration of request means we are going to use this block so we exclude
+ it from the LRU if it is first request
+*/
+static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
+ int count)
+{
+ DBUG_ENTER("reg_requests");
+ DBUG_PRINT("enter", ("block: 0x%lx (%u) status: %x reqs: %u",
+ (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ block->status, block->requests));
+ PCBLOCK_INFO(block);
+ if (! block->requests)
+ /* First request for the block unlinks it */
+ unlink_block(pagecache, block);
+ block->requests+= count;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unregister request for a block
+ linking it to the LRU chain if it's the last request
+
+ SYNOPSIS
+ unreg_request()
+ pagecache pointer to a page cache data structure
+ block pointer to the block to link to the LRU chain
+ at_end <-> to link the block at the end of the LRU chain
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Every linking to the LRU chain decrements by one a special block
+ counter (if it's positive). If the at_end parameter is TRUE the block is
+ added either at the end of warm sub-chain or at the end of hot sub-chain.
+ It is added to the hot subchain if its counter is zero and number of
+ blocks in warm sub-chain is not less than some low limit (determined by
+ the division_limit parameter). Otherwise the block is added to the warm
+ sub-chain. If the at_end parameter is FALSE the block is always added
+ at beginning of the warm sub-chain.
+ Thus a warm block can be promoted to the hot sub-chain when its counter
+ becomes zero for the first time.
+ At the same time the block at the very beginning of the hot subchain
+ might be moved to the beginning of the warm subchain if it stays untouched
+ for a too long time (this time is determined by parameter age_threshold).
+*/
+
+static void unreg_request(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block, int at_end)
+{
+ DBUG_ENTER("unreg_request");
+ DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x reqs: %u",
+ (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ block->status, block->requests));
+ PCBLOCK_INFO(block);
+ DBUG_ASSERT(block->requests > 0);
+ if (! --block->requests)
+ {
+ my_bool hot;
+ if (block->hits_left)
+ block->hits_left--;
+ hot= !block->hits_left && at_end &&
+ pagecache->warm_blocks > pagecache->min_warm_blocks;
+ if (hot)
+ {
+ if (block->temperature == PCBLOCK_WARM)
+ pagecache->warm_blocks--;
+ block->temperature= PCBLOCK_HOT;
+ KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
+ pagecache->warm_blocks));
+ }
+ link_block(pagecache, block, hot, (my_bool)at_end);
+ block->last_hit_time= pagecache->time;
+ pagecache->time++;
+
+ block= pagecache->used_ins;
+ /* Check if we should link a hot block to the warm block */
+ if (block && pagecache->time - block->last_hit_time >
+ pagecache->age_threshold)
+ {
+ unlink_block(pagecache, block);
+ link_block(pagecache, block, 0, 0);
+ if (block->temperature != PCBLOCK_WARM)
+ {
+ pagecache->warm_blocks++;
+ block->temperature= PCBLOCK_WARM;
+ }
+ KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
+ pagecache->warm_blocks));
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Remove a reader of the page in block
+*/
+
+static inline void remove_reader(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("remove_reader");
+ PCBLOCK_INFO(block);
+ DBUG_ASSERT(block->hash_link->requests > 0);
+#ifdef THREAD
+ if (! --block->hash_link->requests && block->condvar)
+ pagecache_pthread_cond_signal(block->condvar);
+#else
+ --block->hash_link->requests;
+#endif
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Wait until the last reader of the page in block
+ signals on its termination
+*/
+
+static inline void wait_for_readers(PAGECACHE *pagecache
+ __attribute__((unused)),
+ PAGECACHE_BLOCK_LINK *block)
+{
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ while (block->hash_link->requests)
+ {
+ KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
+ ("suspend thread: %ld block: %u",
+ thread->id, PCBLOCK_NUMBER(pagecache, block)));
+ block->condvar= &thread->suspend;
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ block->condvar= NULL;
+ }
+#else
+ KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0);
+#endif
+}
+
+
+/*
+ Add a hash link to a bucket in the hash_table
+*/
+
+static inline void link_hash(PAGECACHE_HASH_LINK **start,
+ PAGECACHE_HASH_LINK *hash_link)
+{
+ if (*start)
+ (*start)->prev= &hash_link->next;
+ hash_link->next= *start;
+ hash_link->prev= start;
+ *start= hash_link;
+}
+
+
+/*
+ Remove a hash link from the hash table
+*/
+
+static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
+{
+ KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
+ (uint) hash_link->file.file, (ulong) hash_link->pageno,
+ hash_link->requests));
+ KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
+ if ((*hash_link->prev= hash_link->next))
+ hash_link->next->prev= hash_link->prev;
+ hash_link->block= NULL;
+#ifdef THREAD
+ if (pagecache->waiting_for_hash_link.last_thread)
+ {
+ /* Signal that a free hash link has appeared */
+ struct st_my_thread_var *last_thread=
+ pagecache->waiting_for_hash_link.last_thread;
+ struct st_my_thread_var *first_thread= last_thread->next;
+ struct st_my_thread_var *next_thread= first_thread;
+ PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info);
+ struct st_my_thread_var *thread;
+
+ hash_link->file= first_page->file;
+ hash_link->pageno= first_page->pageno;
+ do
+ {
+ PAGECACHE_PAGE *page;
+ thread= next_thread;
+ page= (PAGECACHE_PAGE *) thread->opt_info;
+ next_thread= thread->next;
+ /*
+ We notify about the event all threads that ask
+ for the same page as the first thread in the queue
+ */
+ if (page->file.file == hash_link->file.file &&
+ page->pageno == hash_link->pageno)
+ {
+ KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
+ pagecache_pthread_cond_signal(&thread->suspend);
+ wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
+ }
+ }
+ while (thread != last_thread);
+ link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache,
+ hash_link->file,
+ hash_link->pageno)],
+ hash_link);
+ return;
+ }
+#else /* THREAD */
+ KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread));
+#endif /* THREAD */
+ hash_link->next= pagecache->free_hash_list;
+ pagecache->free_hash_list= hash_link;
+}
+
+
+/*
+ Get the hash link for the page if it is in the cache (do not put the
+ page in the cache if it is absent there)
+
+ SYNOPSIS
+ get_present_hash_link()
+ pagecache Pagecache reference
+ file file ID
+ pageno page number in the file
+ start where to put pointer to found hash bucket (for
+ direct referring it)
+
+ RETURN
+ found hashlink pointer
+*/
+
+static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ PAGECACHE_HASH_LINK ***start)
+{
+ reg1 PAGECACHE_HASH_LINK *hash_link;
+#if defined(PAGECACHE_DEBUG)
+ int cnt;
+#endif
+ DBUG_ENTER("get_present_hash_link");
+
+ KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
+ (uint) file->file, (ulong) pageno));
+
+ /*
+ Find the bucket in the hash table for the pair (file, pageno);
+ start contains the head of the bucket list,
+ hash_link points to the first member of the list
+ */
+ hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache,
+ *file, pageno)]);
+#if defined(PAGECACHE_DEBUG)
+ cnt= 0;
+#endif
+ /* Look for an element for the pair (file, pageno) in the bucket chain */
+ while (hash_link &&
+ (hash_link->pageno != pageno ||
+ hash_link->file.file != file->file))
+ {
+ hash_link= hash_link->next;
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ if (! (cnt <= pagecache->hash_links_used))
+ {
+ int i;
+ for (i=0, hash_link= **start ;
+ i < cnt ; i++, hash_link= hash_link->next)
+ {
+ KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
+ (uint) hash_link->file.file, (ulong) hash_link->pageno));
+ }
+ }
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used);
+#endif
+ }
+ if (hash_link)
+ {
+ /* Register the request for the page */
+ hash_link->requests++;
+ }
+ /*
+ As soon as the caller will release the page cache's lock, "hash_link"
+ will be potentially obsolete (unusable) information.
+ */
+ DBUG_RETURN(hash_link);
+}
+
+
+/*
+ Get the hash link for a page
+*/
+
+static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno)
+{
+ reg1 PAGECACHE_HASH_LINK *hash_link;
+ PAGECACHE_HASH_LINK **start;
+
+ KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
+ (uint) file->file, (ulong) pageno));
+
+restart:
+ /* try to find the page in the cache */
+ hash_link= get_present_hash_link(pagecache, file, pageno,
+ &start);
+ if (!hash_link)
+ {
+ /* There is no hash link in the hash table for the pair (file, pageno) */
+ if (pagecache->free_hash_list)
+ {
+ hash_link= pagecache->free_hash_list;
+ pagecache->free_hash_list= hash_link->next;
+ }
+ else if (pagecache->hash_links_used < pagecache->hash_links)
+ {
+ hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++];
+ }
+ else
+ {
+#ifdef THREAD
+ /* Wait for a free hash link */
+ struct st_my_thread_var *thread= my_thread_var;
+ PAGECACHE_PAGE page;
+ KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
+ page.file= *file;
+ page.pageno= pageno;
+ thread->opt_info= (void *) &page;
+ wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
+ KEYCACHE_DBUG_PRINT("get_hash_link: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ thread->opt_info= NULL;
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+#endif
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+ hash_link->file= *file;
+ hash_link->pageno= pageno;
+ link_hash(start, hash_link);
+ /* Register the request for the page */
+ hash_link->requests++;
+ }
+
+ return hash_link;
+}
+
+
+/*
+ Get a block for the file page requested by a pagecache read/write operation;
+ If the page is not in the cache return a free block, if there is none
+ return the lru block after saving its buffer if the page is dirty.
+
+ SYNOPSIS
+
+ find_block()
+ pagecache pointer to a page cache data structure
+ file handler for the file to read page from
+ pageno number of the page in the file
+ init_hits_left how initialize the block counter for the page
+ wrmode <-> get for writing
+ reg_req Register request to thye page
+ page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
+
+ RETURN VALUE
+ Pointer to the found block if successful, 0 - otherwise
+
+ NOTES.
+ For the page from file positioned at pageno the function checks whether
+ the page is in the key cache specified by the first parameter.
+ If this is the case it immediately returns the block.
+ If not, the function first chooses a block for this page. If there is
+ no not used blocks in the key cache yet, the function takes the block
+ at the very beginning of the warm sub-chain. It saves the page in that
+ block if it's dirty before returning the pointer to it.
+ The function returns in the page_st parameter the following values:
+ PAGE_READ - if page already in the block,
+ PAGE_TO_BE_READ - if it is to be read yet by the current thread
+ WAIT_TO_BE_READ - if it is to be read by another thread
+ If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
+ It might happen that there are no blocks in LRU chain (in warm part) -
+ all blocks are unlinked for some read/write operations. Then the function
+ waits until first of this operations links any block back.
+*/
+
+static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ int init_hits_left,
+ my_bool wrmode,
+ my_bool reg_req,
+ int *page_st)
+{
+ PAGECACHE_HASH_LINK *hash_link;
+ PAGECACHE_BLOCK_LINK *block;
+ int error= 0;
+ int page_status;
+
+ DBUG_ENTER("find_block");
+ KEYCACHE_THREAD_TRACE("find_block:begin");
+ DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
+ file->file, (ulong) pageno, wrmode));
+ KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d",
+ file->file, (ulong) pageno,
+ wrmode));
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "start of find_block", 0););
+#endif
+
+restart:
+ /* Find the hash link for the requested page (file, pageno) */
+ hash_link= get_hash_link(pagecache, file, pageno);
+
+ page_status= -1;
+ if ((block= hash_link->block) &&
+ block->hash_link == hash_link && (block->status & PCBLOCK_READ))
+ page_status= PAGE_READ;
+
+ if (wrmode && pagecache->resize_in_flush)
+ {
+ /* This is a write request during the flush phase of a resize operation */
+
+ if (page_status != PAGE_READ)
+ {
+ /* We don't need the page in the cache: we are going to write on disk */
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ unlink_hash(pagecache, hash_link);
+ return 0;
+ }
+ if (!(block->status & PCBLOCK_IN_FLUSH))
+ {
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ /*
+ Remove block to invalidate the page in the block buffer
+ as we are going to write directly on disk.
+ Although we have an exclusive lock for the updated key part
+ the control can be yielded by the current thread as we might
+ have unfinished readers of other key parts in the block
+ buffer. Still we are guaranteed not to have any readers
+ of the key part we are writing into until the block is
+ removed from the cache as we set the PCBLOCK_REASSIGNED
+ flag (see the code below that handles reading requests).
+ */
+ free_block(pagecache, block);
+ return 0;
+ }
+ /* Wait until the page is flushed on disk */
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("find_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while(thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /*
+ Given the use of "resize_in_flush", it seems impossible
+ that this whole branch is ever entered in single-threaded case
+ because "(wrmode && pagecache->resize_in_flush)" cannot be true.
+ TODO: Check this, and then put the whole branch into the
+ "#ifdef THREAD" guard.
+ */
+#endif
+ }
+ /* Invalidate page in the block if it has not been done yet */
+ if (block->status)
+ free_block(pagecache, block);
+ return 0;
+ }
+
+ if (page_status == PAGE_READ &&
+ (block->status & (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)))
+ {
+ /* This is a request for a page to be removed from cache */
+
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("request for old page in block: %u "
+ "wrmode: %d block->status: %d",
+ PCBLOCK_NUMBER(pagecache, block), wrmode,
+ block->status));
+ /*
+ Only reading requests can proceed until the old dirty page is flushed,
+ all others are to be suspended, then resubmitted
+ */
+ if (!wrmode && !(block->status & PCBLOCK_REASSIGNED))
+ {
+ if (reg_req)
+ reg_requests(pagecache, block, 1);
+ }
+ else
+ {
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("request waiting for old page to be saved"));
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ /* Put the request into the queue of those waiting for the old page */
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ /* Wait until the request can be resubmitted */
+ do
+ {
+ KEYCACHE_DBUG_PRINT("find_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while(thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /* No parallel requests in single-threaded case */
+#endif
+ }
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("request for old page resubmitted"));
+ DBUG_PRINT("info", ("restarting..."));
+ /* Resubmit the request */
+ goto restart;
+ }
+ block->status&= ~PCBLOCK_IN_SWITCH;
+ }
+ else
+ {
+ /* This is a request for a new page or for a page not to be removed */
+ if (! block)
+ {
+ /* No block is assigned for the page yet */
+ if (pagecache->blocks_unused)
+ {
+ if (pagecache->free_block_list)
+ {
+ /* There is a block in the free list. */
+ block= pagecache->free_block_list;
+ pagecache->free_block_list= block->next_used;
+ block->next_used= NULL;
+ }
+ else
+ {
+ /* There are some never used blocks, take first of them */
+ block= &pagecache->block_root[pagecache->blocks_used];
+ block->buffer= ADD_TO_PTR(pagecache->block_mem,
+ ((ulong) pagecache->blocks_used*
+ pagecache->block_size),
+ uchar*);
+ pagecache->blocks_used++;
+ }
+ pagecache->blocks_unused--;
+ DBUG_ASSERT(block->wlocks == 0);
+ DBUG_ASSERT(block->pins == 0);
+ block->status= 0;
+#ifndef DBUG_OFF
+ block->type= PAGECACHE_EMPTY_PAGE;
+#endif
+ block->requests= 1;
+ block->temperature= PCBLOCK_COLD;
+ block->hits_left= init_hits_left;
+ block->last_hit_time= 0;
+ block->rec_lsn= LSN_MAX;
+ link_to_file_list(pagecache, block, file, 0);
+ block->hash_link= hash_link;
+ hash_link->block= block;
+ page_status= PAGE_TO_BE_READ;
+ DBUG_PRINT("info", ("page to be read set for page 0x%lx",
+ (ulong)block));
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("got free or never used block %u",
+ PCBLOCK_NUMBER(pagecache, block)));
+ }
+ else
+ {
+ /* There are no never used blocks, use a block from the LRU chain */
+
+ /*
+ Wait until a new block is added to the LRU chain;
+ several threads might wait here for the same page,
+ all of them must get the same block
+ */
+
+#ifdef THREAD
+ if (! pagecache->used_last)
+ {
+ struct st_my_thread_var *thread= my_thread_var;
+ thread->opt_info= (void *) hash_link;
+ wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("find_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+ thread->opt_info= NULL;
+ }
+#else
+ KEYCACHE_DBUG_ASSERT(pagecache->used_last);
+#endif
+ block= hash_link->block;
+ if (! block)
+ {
+ /*
+ Take the first block from the LRU chain
+ unlinking it from the chain
+ */
+ block= pagecache->used_last->next_used;
+ block->hits_left= init_hits_left;
+ block->last_hit_time= 0;
+ if (reg_req)
+ reg_requests(pagecache, block, 1);
+ hash_link->block= block;
+ }
+ PCBLOCK_INFO(block);
+ DBUG_ASSERT(block->wlocks == 0);
+ DBUG_ASSERT(block->pins == 0);
+
+ if (block->hash_link != hash_link &&
+ ! (block->status & PCBLOCK_IN_SWITCH) )
+ {
+ /* this is a primary request for a new page */
+ DBUG_ASSERT(block->wlocks == 0);
+ DBUG_ASSERT(block->pins == 0);
+ block->status|= PCBLOCK_IN_SWITCH;
+
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("got block %u for new page",
+ PCBLOCK_NUMBER(pagecache, block)));
+
+ if (block->status & PCBLOCK_CHANGED)
+ {
+ /* The block contains a dirty page - push it out of the cache */
+
+ KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ The call is thread safe because only the current
+ thread might change the block->hash_link value
+ */
+ DBUG_ASSERT(block->pins == 0);
+ error= pagecache_fwrite(pagecache,
+ &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno,
+ block->type,
+ pagecache->readwrite_flags);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ pagecache->global_cache_write++;
+ }
+
+ block->status|= PCBLOCK_REASSIGNED;
+ if (block->hash_link)
+ {
+ /*
+ Wait until all pending read requests
+ for this page are executed
+ (we could have avoided this waiting, if we had read
+ a page in the cache in a sweep, without yielding control)
+ */
+ wait_for_readers(pagecache, block);
+
+ /* Remove the hash link for this page from the hash table */
+ unlink_hash(pagecache, block->hash_link);
+ /* All pending requests for this page must be resubmitted */
+#ifdef THREAD
+ if (block->wqueue[COND_FOR_SAVED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
+#endif
+ }
+ link_to_file_list(pagecache, block, file,
+ (my_bool)(block->hash_link ? 1 : 0));
+ PCBLOCK_INFO(block);
+ block->status= error? PCBLOCK_ERROR : 0;
+#ifndef DBUG_OFF
+ block->type= PAGECACHE_EMPTY_PAGE;
+#endif
+ block->hash_link= hash_link;
+ page_status= PAGE_TO_BE_READ;
+ DBUG_PRINT("info", ("page to be read set for page 0x%lx",
+ (ulong)block));
+
+ KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
+ KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
+ }
+ else
+ {
+ /* This is for secondary requests for a new page only */
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("block->hash_link: %p hash_link: %p "
+ "block->status: %u", block->hash_link,
+ hash_link, block->status ));
+ page_status= (((block->hash_link == hash_link) &&
+ (block->status & PCBLOCK_READ)) ?
+ PAGE_READ : PAGE_WAIT_TO_BE_READ);
+ }
+ }
+ pagecache->global_cache_read++;
+ }
+ else
+ {
+ if (reg_req)
+ reg_requests(pagecache, block, 1);
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("block->hash_link: %p hash_link: %p "
+ "block->status: %u", block->hash_link,
+ hash_link, block->status ));
+ page_status= (((block->hash_link == hash_link) &&
+ (block->status & PCBLOCK_READ)) ?
+ PAGE_READ : PAGE_WAIT_TO_BE_READ);
+ }
+ }
+
+ KEYCACHE_DBUG_ASSERT(page_status != -1);
+ *page_st= page_status;
+ DBUG_PRINT("info",
+ ("block: 0x%lx fd: %u pos: %lu block->status: %u page_status: %u",
+ (ulong) block, (uint) file->file,
+ (ulong) pageno, block->status, (uint) page_status));
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d",
+ (ulong) block,
+ file->file, (ulong) pageno, block->status,
+ page_status));
+
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "end of find_block",0););
+#endif
+ KEYCACHE_THREAD_TRACE("find_block:end");
+ DBUG_RETURN(block);
+}
+
+
+static void add_pin(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("add_pin");
+ DBUG_PRINT("enter", ("block: 0x%lx pins: %u",
+ (ulong) block,
+ block->pins));
+ PCBLOCK_INFO(block);
+ block->pins++;
+#ifndef DBUG_OFF
+ {
+ PAGECACHE_PIN_INFO *info=
+ (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0));
+ info->thread= my_thread_var;
+ info_link(&block->pin_list, info);
+ }
+#endif
+ DBUG_VOID_RETURN;
+}
+
+static void remove_pin(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("remove_pin");
+ DBUG_PRINT("enter", ("block: 0x%lx pins: %u",
+ (ulong) block,
+ block->pins));
+ PCBLOCK_INFO(block);
+ DBUG_ASSERT(block->pins > 0);
+ block->pins--;
+#ifndef DBUG_OFF
+ {
+ PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var);
+ DBUG_ASSERT(info != 0);
+ info_unlink(info);
+ my_free((uchar*) info, MYF(0));
+ }
+#endif
+ DBUG_VOID_RETURN;
+}
+#ifndef DBUG_OFF
+static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
+{
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0));
+ info->thread= my_thread_var;
+ info->write_lock= wl;
+ info_link((PAGECACHE_PIN_INFO **)&block->lock_list,
+ (PAGECACHE_PIN_INFO *)info);
+}
+static void info_remove_lock(PAGECACHE_BLOCK_LINK *block)
+{
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
+ my_thread_var);
+ DBUG_ASSERT(info != 0);
+ info_unlink((PAGECACHE_PIN_INFO *)info);
+ my_free((uchar*)info, MYF(0));
+}
+static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
+{
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
+ my_thread_var);
+ DBUG_ASSERT(info != 0);
+ DBUG_ASSERT(info->write_lock != wl);
+ info->write_lock= wl;
+}
+#else
+#define info_add_lock(B,W)
+#define info_remove_lock(B)
+#define info_change_lock(B,W)
+#endif
+
+/*
+ Put on the block write lock
+
+ SYNOPSIS
+ get_wrlock()
+ pagecache pointer to a page cache data structure
+ block the block to work with
+ user_file Unique handler per handler file. Used to check if
+ we request many write locks withing the same
+ statement
+
+ RETURN
+ 0 - OK
+ 1 - Can't lock this block, need retry
+*/
+
+static my_bool get_wrlock(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ PAGECACHE_FILE *user_file)
+{
+ PAGECACHE_FILE file= block->hash_link->file;
+ pgcache_page_no_t pageno= block->hash_link->pageno;
+ DBUG_ENTER("get_wrlock");
+ DBUG_PRINT("info", ("the block 0x%lx "
+ "files %d(%d) pages %d(%d)",
+ (ulong)block,
+ file.file, block->hash_link->file.file,
+ pageno, block->hash_link->pageno));
+ PCBLOCK_INFO(block);
+ while (block->wlocks && block->write_locker != user_file)
+ {
+ /* Lock failed we will wait */
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block));
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
+ dec_counter_for_resize_op(pagecache);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("get_wrlock: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while(thread->next);
+#else
+ DBUG_ASSERT(0);
+#endif
+ PCBLOCK_INFO(block);
+ if ((block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH)) ||
+ file.file != block->hash_link->file.file ||
+ pageno != block->hash_link->pageno)
+ {
+ DBUG_PRINT("info", ("the block 0x%lx changed => need retry"
+ "status %x files %d != %d or pages %d !=%d",
+ (ulong)block, block->status,
+ file.file, block->hash_link->file.file,
+ pageno, block->hash_link->pageno));
+ DBUG_RETURN(1);
+ }
+ }
+ /* we are doing it by global cache mutex protection, so it is OK */
+ block->wlocks++;
+ block->write_locker= user_file;
+ DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Remove write lock from the block
+
+ SYNOPSIS
+ release_wrlock()
+ pagecache pointer to a page cache data structure
+ block the block to work with
+
+ RETURN
+ 0 - OK
+*/
+
+static void release_wrlock(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("release_wrlock");
+ PCBLOCK_INFO(block);
+ DBUG_ASSERT(block->wlocks > 0);
+ DBUG_ASSERT(block->pins > 0);
+ block->wlocks--;
+ if (block->wlocks > 0)
+ DBUG_VOID_RETURN; /* Multiple write locked */
+ DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block));
+#ifdef THREAD
+ /* release all threads waiting for write lock */
+ if (block->wqueue[COND_FOR_WRLOCK].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]);
+#endif
+ PCBLOCK_INFO(block);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Try to lock/unlock and pin/unpin the block
+
+ SYNOPSIS
+ make_lock_and_pin()
+ pagecache pointer to a page cache data structure
+ block the block to work with
+ lock lock change mode
+ pin pinchange mode
+ file File handler requesting pin
+
+ RETURN
+ 0 - OK
+ 1 - Try to lock the block failed
+*/
+
+static my_bool make_lock_and_pin(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ PAGECACHE_FILE *file)
+{
+ DBUG_ENTER("make_lock_and_pin");
+
+ DBUG_PRINT("enter", ("block: 0x%lx", (ulong)block));
+#ifndef DBUG_OFF
+ if (block)
+ {
+ DBUG_PRINT("enter", ("block: 0x%lx (%u) wrlocks: %u pins: %u lock: %s pin: %s",
+ (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ block->wlocks,
+ block->pins,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ PCBLOCK_INFO(block);
+ }
+#endif
+
+ switch (lock) {
+ case PAGECACHE_LOCK_WRITE: /* free -> write */
+ /* Writelock and pin the buffer */
+ if (get_wrlock(pagecache, block, file))
+ {
+ /* can't lock => need retry */
+ goto retry;
+ }
+
+ /* The cache is locked so nothing afraid of */
+ add_pin(block);
+ info_add_lock(block, 1);
+ break;
+ case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */
+ case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */
+ /*
+ Removes write lock and puts read lock (which is nothing in our
+ implementation)
+ */
+ release_wrlock(block);
+ /* fall through */
+ case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */
+ case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */
+ if (pin == PAGECACHE_UNPIN)
+ {
+ remove_pin(block);
+ }
+ if (lock == PAGECACHE_LOCK_WRITE_TO_READ)
+ {
+ info_change_lock(block, 0);
+ }
+ else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_READ_UNLOCK)
+ {
+ info_remove_lock(block);
+ }
+ break;
+ case PAGECACHE_LOCK_READ: /* free -> read */
+ if (pin == PAGECACHE_PIN)
+ {
+ /* The cache is locked so nothing afraid off */
+ add_pin(block);
+ }
+ info_add_lock(block, 0);
+ break;
+ case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */
+ case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */
+ break; /* do nothing */
+ default:
+ DBUG_ASSERT(0); /* Never should happened */
+ }
+
+#ifndef DBUG_OFF
+ if (block)
+ PCBLOCK_INFO(block);
+#endif
+ DBUG_RETURN(0);
+retry:
+ DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block));
+ PCBLOCK_INFO(block);
+ DBUG_ASSERT(block->hash_link->requests > 0);
+ block->hash_link->requests--;
+ DBUG_ASSERT(block->requests > 0);
+ unreg_request(pagecache, block, 1);
+ PCBLOCK_INFO(block);
+ DBUG_RETURN(1);
+
+}
+
+
+/*
+ Read into a key cache block buffer from disk.
+
+ SYNOPSIS
+
+ read_block()
+ pagecache pointer to a page cache data structure
+ block block to which buffer the data is to be read
+ primary <-> the current thread will read the data
+
+ RETURN VALUE
+ None
+
+ NOTES.
+ The function either reads a page data from file to the block buffer,
+ or waits until another thread reads it. What page to read is determined
+ by a block parameter - reference to a hash link for this page.
+ If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
+
+ On entry cache_lock is locked
+*/
+
+static void read_block(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ my_bool primary)
+{
+
+ DBUG_ENTER("read_block");
+ if (primary)
+ {
+ size_t error;
+ /*
+ This code is executed only by threads
+ that submitted primary requests
+ */
+
+ DBUG_PRINT("read_block",
+ ("page to be read by primary request"));
+
+ /* Page is not in buffer yet, is to be read from disk */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ Here other threads may step in and register as secondary readers.
+ They will register in block->wqueue[COND_FOR_REQUESTED].
+ */
+ error= pagecache_fread(pagecache, &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno,
+ pagecache->readwrite_flags);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (error)
+ block->status|= PCBLOCK_ERROR;
+ else
+ {
+ block->status|= PCBLOCK_READ;
+ if ((*block->hash_link->file.read_callback)(block->buffer,
+ block->hash_link->pageno,
+ block->hash_link->
+ file.callback_data))
+ {
+ DBUG_PRINT("error", ("read callback problem"));
+ block->status|= PCBLOCK_ERROR;
+ }
+ }
+ DBUG_PRINT("read_block",
+ ("primary request: new page in cache"));
+ /* Signal that all pending requests for this page now can be processed */
+#ifdef THREAD
+ if (block->wqueue[COND_FOR_REQUESTED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
+#endif
+ }
+ else
+ {
+ /*
+ This code is executed only by threads
+ that submitted secondary requests
+ */
+ DBUG_PRINT("read_block",
+ ("secondary request waiting for new page to be read"));
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ /* Put the request into a queue and wait until it can be processed */
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
+ do
+ {
+ DBUG_PRINT("read_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /* No parallel requests in single-threaded case */
+#endif
+ }
+ DBUG_PRINT("read_block",
+ ("secondary request: new page in cache"));
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Set LSN on the page to the given one if the given LSN is bigger
+
+ @param pagecache pointer to a page cache data structure
+ @param lsn LSN to set
+ @param block block to check and set
+*/
+
+static void check_and_set_lsn(PAGECACHE *pagecache,
+ LSN lsn, PAGECACHE_BLOCK_LINK *block)
+{
+ LSN old;
+ DBUG_ENTER("check_and_set_lsn");
+ /*
+ In recovery, we can _ma_unpin_all_pages() to put a LSN on page, though
+ page would be PAGECACHE_PLAIN_PAGE (transactionality temporarily disabled
+ to not log REDOs).
+ */
+ DBUG_ASSERT((block->type == PAGECACHE_LSN_PAGE) || maria_in_recovery);
+ old= lsn_korr(block->buffer + PAGE_LSN_OFFSET);
+ DBUG_PRINT("info", ("old lsn: (%lu, 0x%lx) new lsn: (%lu, 0x%lx)",
+ LSN_IN_PARTS(old), LSN_IN_PARTS(lsn)));
+ if (cmp_translog_addr(lsn, old) > 0)
+ {
+
+ DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE);
+ lsn_store(block->buffer + PAGE_LSN_OFFSET, lsn);
+ /* we stored LSN in page so we dirtied it */
+ if (!(block->status & PCBLOCK_CHANGED))
+ link_to_changed_list(pagecache, block);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Unlock/unpin page and put LSN stamp if it need
+
+ @param pagecache pointer to a page cache data structure
+ @pagam file handler for the file for the block of data to be read
+ @param pageno number of the block of data in the file
+ @param lock lock change
+ @param pin pin page
+ @param first_REDO_LSN_for_page do not set it if it is zero
+ @param lsn if it is not LSN_IMPOSSIBLE (0) and it
+ is bigger then LSN on the page it will be written on
+ the page
+ @param was_changed should be true if the page was write locked with
+ direct link giving and the page was changed
+
+ @note
+ Pininig uses requests registration mechanism it works following way:
+ | beginnig | ending |
+ | of func. | of func. |
+ ----------------------------+-------------+---------------+
+ PAGECACHE_PIN_LEFT_PINNED | - | - |
+ PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request |
+ PAGECACHE_PIN | reg request | - |
+ PAGECACHE_UNPIN | - | unreg request |
+
+
+*/
+
+void pagecache_unlock(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page,
+ LSN lsn, my_bool was_changed)
+{
+ PAGECACHE_BLOCK_LINK *block;
+ int page_st;
+ DBUG_ENTER("pagecache_unlock");
+ DBUG_PRINT("enter", ("fd: %u page: %lu %s %s",
+ (uint) file->file, (ulong) pageno,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ /* we do not allow any lock/pin increasing here */
+ DBUG_ASSERT(pin != PAGECACHE_PIN);
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock because want
+ to unlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+
+ inc_counter_for_resize_op(pagecache);
+ /* See NOTE for pagecache_unlock about registering requests */
+ block= find_block(pagecache, file, pageno, 0, 0,
+ test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st);
+ PCBLOCK_INFO(block);
+ DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
+ if (first_REDO_LSN_for_page)
+ {
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK);
+ DBUG_ASSERT(pin == PAGECACHE_UNPIN);
+ pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
+ }
+ if (lsn != LSN_IMPOSSIBLE)
+ check_and_set_lsn(pagecache, lsn, block);
+
+ /* if we lock for write we must link the block to changed blocks */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
+ /*
+ if was_changed then status should be PCBLOCK_DIRECT_W or marked
+ as dirty
+ */
+ DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
+ (block->status & PCBLOCK_CHANGED));
+ if ((block->status & PCBLOCK_DIRECT_W) &&
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK))
+ {
+ if (!(block->status & PCBLOCK_CHANGED) && was_changed)
+ link_to_changed_list(pagecache, block);
+ block->status&= ~PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
+ }
+
+ if (make_lock_and_pin(pagecache, block, lock, pin, file))
+ {
+ DBUG_ASSERT(0); /* should not happend */
+ }
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock about registering requests.
+ */
+ if (pin != PAGECACHE_PIN_LEFT_PINNED)
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unpin page
+
+ SYNOPSIS
+ pagecache_unpin()
+ pagecache pointer to a page cache data structure
+ file handler for the file for the block of data to be read
+ pageno number of the block of data in the file
+ lsn if it is not LSN_IMPOSSIBLE (0) and it
+ is bigger then LSN on the page it will be written on
+ the page
+*/
+
+void pagecache_unpin(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ LSN lsn)
+{
+ PAGECACHE_BLOCK_LINK *block;
+ int page_st;
+ DBUG_ENTER("pagecache_unpin");
+ DBUG_PRINT("enter", ("fd: %u page: %lu",
+ (uint) file->file, (ulong) pageno));
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock bacause want
+ aunlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+
+ inc_counter_for_resize_op(pagecache);
+ /* See NOTE for pagecache_unlock about registering requests */
+ block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
+ DBUG_ASSERT(block != 0);
+ DBUG_ASSERT(page_st == PAGE_READ);
+ /* we can't unpin such page without unlock */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
+
+ if (lsn != LSN_IMPOSSIBLE)
+ check_and_set_lsn(pagecache, lsn, block);
+
+ /*
+ we can just unpin only with keeping read lock because:
+ a) we can't pin without any lock
+ b) we can't unpin keeping write lock
+ */
+ if (make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_LEFT_READLOCKED,
+ PAGECACHE_UNPIN, file))
+ DBUG_ASSERT(0); /* should not happend */
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock about registering requests
+ */
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Unlock/unpin page and put LSN stamp if it need
+ (uses direct block/page pointer)
+
+ @param pagecache pointer to a page cache data structure
+ @param link direct link to page (returned by read or write)
+ @param lock lock change
+ @param pin pin page
+ @param first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0)
+ @param lsn if it is not LSN_IMPOSSIBLE and it is bigger then
+ LSN on the page it will be written on the page
+ @param was_changed should be true if the page was write locked with
+ direct link giving and the page was changed
+*/
+
+void pagecache_unlock_by_link(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page,
+ LSN lsn, my_bool was_changed)
+{
+ DBUG_ENTER("pagecache_unlock_by_link");
+ DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu changed: %d %s %s",
+ (ulong) block,
+ (uint) block->hash_link->file.file,
+ (ulong) block->hash_link->pageno, was_changed,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ /*
+ We do not allow any lock/pin increasing here and page can't be
+ unpinned because we use direct link.
+ */
+ DBUG_ASSERT(pin != PAGECACHE_PIN);
+ DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED);
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
+ if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
+ lock == PAGECACHE_LOCK_READ_UNLOCK)
+ {
+ /* block do not need here so we do not provide it */
+ if (make_lock_and_pin(pagecache, 0, lock, pin, 0))
+ DBUG_ASSERT(0); /* should not happend */
+ DBUG_VOID_RETURN;
+ }
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock because want
+ unlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+
+ inc_counter_for_resize_op(pagecache);
+ if (was_changed)
+ {
+ if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE)
+ {
+ /*
+ LOCK_READ_UNLOCK is ok here as the page may have first locked
+ with WRITE lock that was temporarly converted to READ lock before
+ it's unpinned
+ */
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_READ_UNLOCK);
+ DBUG_ASSERT(pin == PAGECACHE_UNPIN);
+ pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
+ }
+ if (lsn != LSN_IMPOSSIBLE)
+ check_and_set_lsn(pagecache, lsn, block);
+ block->status&= ~PCBLOCK_ERROR;
+ }
+
+ /* if we lock for write we must link the block to changed blocks */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
+ /*
+ If was_changed then status should be PCBLOCK_DIRECT_W or marked
+ as dirty
+ */
+ DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
+ (block->status & PCBLOCK_CHANGED));
+ if ((block->status & PCBLOCK_DIRECT_W) &&
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK))
+ {
+ if (!(block->status & PCBLOCK_CHANGED) && was_changed)
+ link_to_changed_list(pagecache, block);
+ block->status&= ~PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
+ }
+
+ if (make_lock_and_pin(pagecache, block, lock, pin, 0))
+ DBUG_ASSERT(0); /* should not happend */
+
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock about registering requests.
+ */
+ if (pin != PAGECACHE_PIN_LEFT_PINNED)
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unpin page
+ (uses direct block/page pointer)
+
+ SYNOPSIS
+ pagecache_unpin_by_link()
+ pagecache pointer to a page cache data structure
+ link direct link to page (returned by read or write)
+ lsn if it is not LSN_IMPOSSIBLE (0) and it
+ is bigger then LSN on the page it will be written on
+ the page
+*/
+
+void pagecache_unpin_by_link(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ LSN lsn)
+{
+ DBUG_ENTER("pagecache_unpin_by_link");
+ DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu",
+ (ulong) block,
+ (uint) block->hash_link->file.file,
+ (ulong) block->hash_link->pageno));
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock because want
+ unlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+ /* we can't unpin such page without unlock */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
+
+ inc_counter_for_resize_op(pagecache);
+
+ if (lsn != LSN_IMPOSSIBLE)
+ check_and_set_lsn(pagecache, lsn, block);
+
+ /*
+ We can just unpin only with keeping read lock because:
+ a) we can't pin without any lock
+ b) we can't unpin keeping write lock
+ */
+ if (make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_LEFT_READLOCKED,
+ PAGECACHE_UNPIN, 0))
+ DBUG_ASSERT(0); /* should not happend */
+
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock about registering requests.
+ */
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ @brief Read a block of data from a cached file into a buffer;
+
+ @param pagecache pointer to a page cache data structure
+ @param file handler for the file for the block of data to be read
+ @param pageno number of the block of data in the file
+ @param level determines the weight of the data
+ @param buff buffer to where the data must be placed
+ @param type type of the page
+ @param lock lock change
+ @param link link to the page if we pin it
+
+ @return address from where the data is placed if successful, 0 - otherwise.
+
+ @note Pin will be chosen according to lock parameter (see lock_to_pin)
+*/
+static enum pagecache_page_pin lock_to_pin[2][8]=
+{
+ {
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
+ PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
+ PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/,
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
+ PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
+ PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/
+ },
+ {
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
+ PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
+ PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
+ PAGECACHE_PIN /*PAGECACHE_LOCK_READ*/,
+ PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/,
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
+ PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
+ PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_WRITE_TO_READ*/
+ }
+};
+
+uchar *pagecache_read(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ uchar *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ PAGECACHE_BLOCK_LINK **page_link)
+{
+ int error= 0;
+ enum pagecache_page_pin pin= lock_to_pin[test(buff==0)][lock];
+ PAGECACHE_BLOCK_LINK *fake_link;
+ DBUG_ENTER("pagecache_valid_read");
+ DBUG_PRINT("enter", ("fd: %u page: %lu buffer: 0x%lx level: %u "
+ "t:%s %s %s",
+ (uint) file->file, (ulong) pageno,
+ (ulong) buff, level,
+ page_cache_page_type_str[type],
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ DBUG_ASSERT(buff != 0 || (buff == 0 && (pin == PAGECACHE_PIN ||
+ pin == PAGECACHE_PIN_LEFT_PINNED)));
+
+ if (!page_link)
+ page_link= &fake_link;
+ *page_link= 0; /* Catch errors */
+
+restart:
+
+ if (pagecache->can_be_used)
+ {
+ /* Key cache is used */
+ PAGECACHE_BLOCK_LINK *block;
+ uint status;
+ int page_st;
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (!pagecache->can_be_used)
+ {
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ goto no_key_cache;
+ }
+
+ inc_counter_for_resize_op(pagecache);
+ pagecache->global_cache_r_requests++;
+ /* See NOTE for pagecache_unlock about registering requests. */
+ block= find_block(pagecache, file, pageno, level,
+ test(lock == PAGECACHE_LOCK_WRITE),
+ test((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
+ (pin == PAGECACHE_PIN)),
+ &page_st);
+ DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
+ block->type == type ||
+ type == PAGECACHE_LSN_PAGE ||
+ type == PAGECACHE_READ_UNKNOWN_PAGE ||
+ block->type == PAGECACHE_READ_UNKNOWN_PAGE);
+ if (type != PAGECACHE_READ_UNKNOWN_PAGE ||
+ block->type == PAGECACHE_EMPTY_PAGE)
+ block->type= type;
+ if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ))
+ {
+ DBUG_PRINT("info", ("read block 0x%lx", (ulong)block));
+ /* The requested page is to be read into the block buffer */
+ read_block(pagecache, block,
+ (my_bool)(page_st == PAGE_TO_BE_READ));
+ DBUG_PRINT("info", ("read is done"));
+ }
+
+ if (make_lock_and_pin(pagecache, block, lock, pin, file))
+ {
+ /*
+ We failed to write lock the block, cache is unlocked,
+ we will try to get the block again.
+ */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+
+ status= block->status;
+ if (!buff)
+ {
+ buff= block->buffer;
+ /* possibly we will write here (resolved on unlock) */
+ if ((lock == PAGECACHE_LOCK_WRITE ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED) &&
+ !(block->status & PCBLOCK_CHANGED))
+ {
+ block->status|= PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
+ }
+ }
+ else
+ {
+ if (!(status & PCBLOCK_ERROR))
+ {
+#if !defined(SERIALIZED_READ_FROM_CACHE)
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+#endif
+
+ DBUG_ASSERT((pagecache->block_size & 511) == 0);
+ /* Copy data from the cache buffer */
+ bmove512(buff, block->buffer, pagecache->block_size);
+
+#if !defined(SERIALIZED_READ_FROM_CACHE)
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+#endif
+ }
+ }
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock about registering requests.
+ */
+ if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
+ unreg_request(pagecache, block, 1);
+ else
+ *page_link= block;
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ if (status & PCBLOCK_ERROR)
+ {
+ DBUG_ASSERT(my_errno != 0);
+ DBUG_PRINT("error", ("Got error %d when doing page read", my_errno));
+ DBUG_RETURN((uchar *) 0);
+ }
+
+ DBUG_RETURN(buff);
+ }
+
+no_key_cache: /* Key cache is not used */
+
+ /* We can't use mutex here as the key cache may not be initialized */
+ pagecache->global_cache_r_requests++;
+ pagecache->global_cache_read++;
+ if (pagecache_fread(pagecache, file, (uchar*) buff, pageno,
+ pagecache->readwrite_flags))
+ error= 1;
+ DBUG_RETURN(error ? (uchar*) 0 : buff);
+}
+
+
+/*
+ Delete page from the buffer
+
+ SYNOPSIS
+ pagecache_delete()
+ pagecache pointer to a page cache data structure
+ file handler for the file for the block of data to be read
+ pageno number of the block of data in the file
+ lock lock change
+ flush flush page if it is dirty
+
+ RETURN VALUE
+ 0 - deleted or was not present at all
+ 1 - error
+
+ NOTES.
+ lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was write locked
+ before) or PAGECACHE_LOCK_WRITE (delete will write lock page before delete)
+*/
+my_bool pagecache_delete(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ my_bool flush)
+{
+ int error= 0;
+ enum pagecache_page_pin pin= lock_to_pin[0][lock];
+ DBUG_ENTER("pagecache_delete");
+ DBUG_PRINT("enter", ("fd: %u page: %lu %s %s",
+ (uint) file->file, (ulong) pageno,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
+ DBUG_ASSERT(pin == PAGECACHE_PIN ||
+ pin == PAGECACHE_PIN_LEFT_PINNED);
+restart:
+
+ if (pagecache->can_be_used)
+ {
+ /* Key cache is used */
+ reg1 PAGECACHE_BLOCK_LINK *block;
+ PAGECACHE_HASH_LINK **unused_start, *page_link;
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (!pagecache->can_be_used)
+ goto end;
+
+ inc_counter_for_resize_op(pagecache);
+ page_link= get_present_hash_link(pagecache, file, pageno, &unused_start);
+ if (!page_link)
+ {
+ DBUG_PRINT("info", ("There is no such page in the cache"));
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(0);
+ }
+ block= page_link->block;
+ /* See NOTE for pagecache_unlock about registering requests. */
+ if (pin == PAGECACHE_PIN)
+ reg_requests(pagecache, block, 1);
+ DBUG_ASSERT(block != 0);
+ if (make_lock_and_pin(pagecache, block, lock, pin, file))
+ {
+ /*
+ We failed to writelock the block, cache is unlocked, and last write
+ lock is released, we will try to get the block again.
+ */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+
+ /* we can't delete with opened direct link for write */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
+
+ if (block->status & PCBLOCK_CHANGED)
+ {
+ if (flush)
+ {
+ /* The block contains a dirty page - push it out of the cache */
+
+ KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ The call is thread safe because only the current
+ thread might change the block->hash_link value
+ */
+ DBUG_ASSERT(block->pins == 1);
+ error= pagecache_fwrite(pagecache,
+ &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno,
+ block->type,
+ pagecache->readwrite_flags);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ pagecache->global_cache_write++;
+
+ if (error)
+ {
+ block->status|= PCBLOCK_ERROR;
+ goto err;
+ }
+ }
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ /*
+ free_block() will change the status and rec_lsn of the block so no
+ need to change them here.
+ */
+ }
+ /* Cache is locked, so we can relese page before freeing it */
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, file);
+ DBUG_ASSERT(page_link->requests > 0);
+ page_link->requests--;
+ /* See NOTE for pagecache_unlock about registering requests. */
+ free_block(pagecache, block);
+
+err:
+ dec_counter_for_resize_op(pagecache);
+end:
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ }
+
+ DBUG_RETURN(error);
+}
+
+
+my_bool pagecache_delete_pages(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint page_count,
+ enum pagecache_page_lock lock,
+ my_bool flush)
+{
+ ulong page_end;
+ DBUG_ENTER("pagecache_delete_pages");
+ DBUG_ASSERT(page_count > 0);
+
+ page_end= pageno + page_count;
+ do
+ {
+ if (pagecache_delete(pagecache, file, pageno,
+ lock, flush))
+ DBUG_RETURN(1);
+ } while (++pageno != page_end);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Writes a buffer into a cached file.
+
+ @param pagecache pointer to a page cache data structure
+ @param file handler for the file to write data to
+ @param pageno number of the block of data in the file
+ @param level determines the weight of the data
+ @param buff buffer with the data
+ @param type type of the page
+ @param lock lock change
+ @param pin pin page
+ @param write_mode how to write page
+ @param link link to the page if we pin it
+ @param first_REDO_LSN_for_page the lsn to set rec_lsn
+ @param offset offset in the page
+ @param size size of data
+ @param validator read page validator
+ @param validator_data the validator data
+
+ @retval 0 if a success.
+ @retval 1 Error.
+*/
+
+/* description of how to change lock before and after write */
+struct write_lock_change
+{
+ int need_lock_change; /* need changing of lock at the end of write */
+ enum pagecache_page_lock new_lock; /* lock at the beginning */
+ enum pagecache_page_lock unlock_lock; /* lock at the end */
+};
+
+static struct write_lock_change write_lock_change_table[]=
+{
+ {1,
+ PAGECACHE_LOCK_WRITE,
+ PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
+ {0, /*unsupported (we can't write having the block read locked) */
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
+ {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
+ {1,
+ PAGECACHE_LOCK_WRITE,
+ PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/,
+ {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/,
+ {0, /*unsupported (we can't write having the block read locked) */
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/,
+ {1,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
+ {1,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_WRITE_TO_READ*/
+};
+
+/* description of how to change pin before and after write */
+struct write_pin_change
+{
+ enum pagecache_page_pin new_pin; /* pin status at the beginning */
+ enum pagecache_page_pin unlock_pin; /* pin status at the end */
+};
+
+static struct write_pin_change write_pin_change_table[]=
+{
+ {PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/,
+ {PAGECACHE_PIN,
+ PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/,
+ {PAGECACHE_PIN,
+ PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/,
+ {PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/
+};
+
+my_bool pagecache_write_part(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ uchar *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ enum pagecache_write_mode write_mode,
+ PAGECACHE_BLOCK_LINK **page_link,
+ LSN first_REDO_LSN_for_page,
+ uint offset, uint size)
+{
+ PAGECACHE_BLOCK_LINK *block= NULL;
+ PAGECACHE_BLOCK_LINK *fake_link;
+ int error= 0;
+ int need_lock_change= write_lock_change_table[lock].need_lock_change;
+ DBUG_ENTER("pagecache_write_part");
+ DBUG_PRINT("enter", ("fd: %u page: %lu level: %u type: %s lock: %s "
+ "pin: %s mode: %s offset: %u size %u",
+ (uint) file->file, (ulong) pageno, level,
+ page_cache_page_type_str[type],
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin],
+ page_cache_page_write_mode_str[write_mode],
+ offset, size));
+ DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED);
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK);
+ DBUG_ASSERT(offset + size <= pagecache->block_size);
+
+ if (!page_link)
+ page_link= &fake_link;
+ *page_link= 0;
+
+restart:
+
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "start of key_cache_write", 1););
+#endif
+
+ if (pagecache->can_be_used)
+ {
+ /* Key cache is used */
+ int page_st;
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (!pagecache->can_be_used)
+ {
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ goto no_key_cache;
+ }
+
+ inc_counter_for_resize_op(pagecache);
+ pagecache->global_cache_w_requests++;
+ /* See NOTE for pagecache_unlock about registering requests. */
+ block= find_block(pagecache, file, pageno, level,
+ test(write_mode != PAGECACHE_WRITE_DONE &&
+ lock != PAGECACHE_LOCK_LEFT_WRITELOCKED &&
+ lock != PAGECACHE_LOCK_WRITE_UNLOCK &&
+ lock != PAGECACHE_LOCK_WRITE_TO_READ),
+ test((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
+ (pin == PAGECACHE_PIN)),
+ &page_st);
+ if (!block)
+ {
+ DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE);
+ /* It happens only for requests submitted during resize operation */
+ dec_counter_for_resize_op(pagecache);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /* Write to the disk key cache is in resize at the moment*/
+ goto no_key_cache;
+ }
+
+ DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
+ block->type == PAGECACHE_READ_UNKNOWN_PAGE ||
+ block->type == type ||
+ (block->type == PAGECACHE_PLAIN_PAGE &&
+ type == PAGECACHE_LSN_PAGE));
+ block->type= type;
+ /* we write to the page so it has no sense to keep the flag */
+ block->status&= ~PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
+
+ if (make_lock_and_pin(pagecache, block,
+ write_lock_change_table[lock].new_lock,
+ (need_lock_change ?
+ write_pin_change_table[pin].new_pin :
+ pin), file))
+ {
+ /*
+ We failed to writelock the block, cache is unlocked, and last write
+ lock is released, we will try to get the block again.
+ */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+
+ if (write_mode == PAGECACHE_WRITE_DONE)
+ {
+ if (block->status & PCBLOCK_ERROR)
+ {
+ DBUG_PRINT("warning", ("Writing on page with error"));
+ }
+ else
+ {
+ /* Copy data from buff */
+ if (!(size & 511))
+ bmove512(block->buffer + offset, buff, size);
+ else
+ memcpy(block->buffer + offset, buff, size);
+ block->status= PCBLOCK_READ;
+ /*
+ The read_callback can change the page content (removing page
+ protection) so it have to be called
+ */
+ DBUG_PRINT("info", ("read_callback: 0x%lx data: 0x%lx",
+ (ulong) block->hash_link->file.read_callback,
+ (ulong) block->hash_link->file.callback_data));
+ if ((*block->hash_link->file.read_callback)(block->buffer,
+ block->hash_link->pageno,
+ block->hash_link->
+ file.callback_data))
+ {
+ DBUG_PRINT("error", ("read callback problem"));
+ block->status|= PCBLOCK_ERROR;
+ }
+ KEYCACHE_DBUG_PRINT("key_cache_insert",
+ ("Page injection"));
+#ifdef THREAD
+ /* Signal that all pending requests for this now can be processed. */
+ if (block->wqueue[COND_FOR_REQUESTED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
+#endif
+ }
+ }
+ else
+ {
+ if (! (block->status & PCBLOCK_CHANGED))
+ link_to_changed_list(pagecache, block);
+
+ if (!(size & 511))
+ bmove512(block->buffer + offset, buff, size);
+ else
+ memcpy(block->buffer + offset, buff, size);
+ block->status|= PCBLOCK_READ;
+ /* Page is correct again if we made a full write in it */
+ if (size == pagecache->block_size)
+ block->status&= ~PCBLOCK_ERROR;
+ }
+
+ if (first_REDO_LSN_for_page)
+ {
+ /* single write action of the last write action */
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_LEFT_UNLOCKED);
+ DBUG_ASSERT(pin == PAGECACHE_UNPIN ||
+ pin == PAGECACHE_PIN_LEFT_UNPINNED);
+ pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
+ }
+
+ if (need_lock_change)
+ {
+ /*
+ We don't set rec_lsn of the block; this is ok as for the
+ Maria-block-record's pages, we always keep pages pinned here.
+ */
+ if (make_lock_and_pin(pagecache, block,
+ write_lock_change_table[lock].unlock_lock,
+ write_pin_change_table[pin].unlock_pin, file))
+ DBUG_ASSERT(0);
+ }
+
+ /* Unregister the request */
+ DBUG_ASSERT(block->hash_link->requests > 0);
+ block->hash_link->requests--;
+ /* See NOTE for pagecache_unlock about registering requests. */
+ if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
+ unreg_request(pagecache, block, 1);
+ else
+ *page_link= block;
+
+ if (block->status & PCBLOCK_ERROR)
+ error= 1;
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ goto end;
+ }
+
+no_key_cache:
+ /* Key cache is not used */
+ if (write_mode == PAGECACHE_WRITE_DELAY)
+ {
+ pagecache->global_cache_w_requests++;
+ pagecache->global_cache_write++;
+ if (pagecache_fwrite(pagecache, file, (uchar*) buff, pageno, type,
+ pagecache->readwrite_flags))
+ error=1;
+ }
+
+end:
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("exec",
+ test_key_cache(pagecache, "end of key_cache_write", 1););
+#endif
+ if (block)
+ PCBLOCK_INFO(block);
+ else
+ DBUG_PRINT("info", ("No block"));
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Free block: remove reference to it from hash table,
+ remove it from the chain file of dirty/clean blocks
+ and add it to the free list.
+*/
+
+static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
+{
+ KEYCACHE_THREAD_TRACE("free block");
+ KEYCACHE_DBUG_PRINT("free_block",
+ ("block: %u hash_link 0x%lx",
+ PCBLOCK_NUMBER(pagecache, block),
+ (long) block->hash_link));
+ if (block->hash_link)
+ {
+ /*
+ While waiting for readers to finish, new readers might request the
+ block. But since we set block->status|= PCBLOCK_REASSIGNED, they
+ will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
+ later.
+ */
+ block->status|= PCBLOCK_REASSIGNED;
+ wait_for_readers(pagecache, block);
+ unlink_hash(pagecache, block->hash_link);
+ }
+
+ unlink_changed(block);
+ DBUG_ASSERT(block->wlocks == 0);
+ DBUG_ASSERT(block->pins == 0);
+ block->status= 0;
+#ifndef DBUG_OFF
+ block->type= PAGECACHE_EMPTY_PAGE;
+#endif
+ block->rec_lsn= LSN_MAX;
+ KEYCACHE_THREAD_TRACE("free block");
+ KEYCACHE_DBUG_PRINT("free_block",
+ ("block is freed"));
+ unreg_request(pagecache, block, 0);
+ block->hash_link= NULL;
+
+ /* Remove the free block from the LRU ring. */
+ unlink_block(pagecache, block);
+ if (block->temperature == PCBLOCK_WARM)
+ pagecache->warm_blocks--;
+ block->temperature= PCBLOCK_COLD;
+ /* Insert the free block in the free list. */
+ block->next_used= pagecache->free_block_list;
+ pagecache->free_block_list= block;
+ /* Keep track of the number of currently unused blocks. */
+ pagecache->blocks_unused++;
+
+#ifdef THREAD
+ /* All pending requests for this page must be resubmitted. */
+ if (block->wqueue[COND_FOR_SAVED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
+#endif
+}
+
+
+static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b)
+{
+ return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 :
+ ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0);
+}
+
+
+/**
+ @brief Flush a portion of changed blocks to disk, free used blocks
+ if requested
+
+ @param pagecache This page cache reference.
+ @param file File which should be flushed
+ @param cache Beginning of array of the block.
+ @param end Reference to the block after last in the array.
+ @param flush_type Type of the flush.
+ @param first_errno Where to store first errno of the flush.
+
+
+ @return Operation status
+ @retval PCFLUSH_OK OK
+ @retval PCFLUSH_ERROR There was errors during the flush process.
+ @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
+ @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
+*/
+
+static int flush_cached_blocks(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ PAGECACHE_BLOCK_LINK **cache,
+ PAGECACHE_BLOCK_LINK **end,
+ enum flush_type type,
+ int *first_errno)
+{
+ int rc= PCFLUSH_OK;
+ int error;
+ uint count= (uint) (end-cache);
+ DBUG_ENTER("flush_cached_blocks");
+ *first_errno= 0;
+
+ /* Don't lock the cache during the flush */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ As all blocks referred in 'cache' are marked by PCBLOCK_IN_FLUSH
+ we are guaranteed that no thread will change them
+ */
+ qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ for (; cache != end; cache++)
+ {
+ PAGECACHE_BLOCK_LINK *block= *cache;
+
+ if (block->pins)
+ {
+ KEYCACHE_DBUG_PRINT("flush_cached_blocks",
+ ("block: %u (0x%lx) pinned",
+ PCBLOCK_NUMBER(pagecache, block), (ulong)block));
+ DBUG_PRINT("info", ("block: %u (0x%lx) pinned",
+ PCBLOCK_NUMBER(pagecache, block), (ulong)block));
+ PCBLOCK_INFO(block);
+ /* undo the mark put by flush_pagecache_blocks_int(): */
+ block->status&= ~PCBLOCK_IN_FLUSH;
+ rc|= PCFLUSH_PINNED;
+ DBUG_PRINT("warning", ("Page pinned"));
+ unreg_request(pagecache, block, 1);
+ if (!*first_errno)
+ *first_errno= HA_ERR_INTERNAL_ERROR;
+ continue;
+ }
+ /* if the block is not pinned then it is not write locked */
+ DBUG_ASSERT(block->wlocks == 0);
+ DBUG_ASSERT(block->pins == 0);
+ if (make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE, PAGECACHE_PIN, 0))
+ DBUG_ASSERT(0);
+
+ KEYCACHE_DBUG_PRINT("flush_cached_blocks",
+ ("block: %u (0x%lx) to be flushed",
+ PCBLOCK_NUMBER(pagecache, block), (ulong)block));
+ DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed",
+ PCBLOCK_NUMBER(pagecache, block), (ulong)block));
+ PCBLOCK_INFO(block);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("block: %u (0x%lx) pins: %u",
+ PCBLOCK_NUMBER(pagecache, block), (ulong)block,
+ block->pins));
+ DBUG_ASSERT(block->pins == 1);
+ /**
+ @todo If page is contiguous with next page to flush, group flushes in
+ one single my_pwrite().
+ */
+ error= pagecache_fwrite(pagecache, &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno,
+ block->type,
+ pagecache->readwrite_flags);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, 0);
+
+ pagecache->global_cache_write++;
+ if (error)
+ {
+ block->status|= PCBLOCK_ERROR;
+ if (!*first_errno)
+ *first_errno= my_errno ? my_errno : -1;
+ rc|= PCFLUSH_ERROR;
+ }
+#ifdef THREAD
+ /*
+ Let to proceed for possible waiting requests to write to the block page.
+ It might happen only during an operation to resize the key cache.
+ */
+ if (block->wqueue[COND_FOR_SAVED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
+#endif
+ /* type will never be FLUSH_IGNORE_CHANGED here */
+ if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
+ type == FLUSH_FORCE_WRITE))
+ {
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ free_block(pagecache, block);
+ }
+ else
+ {
+ block->status&= ~PCBLOCK_IN_FLUSH;
+ link_to_file_list(pagecache, block, file, 1);
+ unreg_request(pagecache, block, 1);
+ }
+ }
+ DBUG_RETURN(rc);
+}
+
+
+/**
+ @brief flush all blocks for a file to disk but don't do any mutex locks
+
+ @param pagecache pointer to a pagecache data structure
+ @param file handler for the file to flush to
+ @param flush_type type of the flush
+ @param filter optional function which tells what blocks to flush;
+ can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
+ or FLUSH_FORCE_WRITE.
+ @param filter_arg an argument to pass to 'filter'. Information about
+ the block will be passed too.
+
+ @note
+ This function doesn't do any mutex locks because it needs to be called
+ both from flush_pagecache_blocks and flush_all_key_blocks (the later one
+ does the mutex lock in the resize_pagecache() function).
+
+ @note
+ This function can cause problems if two threads call it
+ concurrently on the same file (look for "PageCacheFlushConcurrencyBugs"
+ in ma_checkpoint.c); to avoid them, it has internal logic to serialize in
+ this situation.
+
+ @return Operation status
+ @retval PCFLUSH_OK OK
+ @retval PCFLUSH_ERROR There was errors during the flush process.
+ @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
+ @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
+*/
+
+static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ enum flush_type type,
+ PAGECACHE_FLUSH_FILTER filter,
+ void *filter_arg)
+{
+ PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
+ int last_errno= 0;
+ int rc= PCFLUSH_OK;
+ DBUG_ENTER("flush_pagecache_blocks_int");
+ DBUG_PRINT("enter",
+ ("fd: %d blocks_used: %lu blocks_changed: %lu type: %d",
+ file->file, pagecache->blocks_used, pagecache->blocks_changed,
+ type));
+
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache,
+ "start of flush_pagecache_blocks", 0););
+#endif
+
+ cache= cache_buff;
+ if (pagecache->disk_blocks > 0 &&
+ (!my_disable_flush_pagecache_blocks ||
+ (type != FLUSH_KEEP && type != FLUSH_KEEP_LAZY)))
+ {
+ /*
+ Key cache exists. If my_disable_flush_pagecache_blocks is true it
+ disables the operation but only FLUSH_KEEP[_LAZY]: other flushes still
+ need to be allowed: FLUSH_RELEASE has to free blocks, and
+ FLUSH_FORCE_WRITE is to overrule my_disable_flush_pagecache_blocks.
+ */
+ int error= 0;
+ uint count= 0;
+ PAGECACHE_BLOCK_LINK **pos, **end;
+ PAGECACHE_BLOCK_LINK *first_in_switch= NULL;
+ PAGECACHE_BLOCK_LINK *block, *next;
+#if defined(PAGECACHE_DEBUG)
+ uint cnt= 0;
+#endif
+
+#ifdef THREAD
+ struct st_file_in_flush us_flusher, *other_flusher;
+ us_flusher.file= file->file;
+ us_flusher.flush_queue.last_thread= NULL;
+ us_flusher.first_in_switch= FALSE;
+ while ((other_flusher= (struct st_file_in_flush *)
+ hash_search(&pagecache->files_in_flush, (uchar *)&file->file,
+ sizeof(file->file))))
+ {
+ /*
+ File is in flush already: wait, unless FLUSH_KEEP_LAZY. "Flusher"
+ means "who can mark PCBLOCK_IN_FLUSH", i.e. caller of
+ flush_pagecache_blocks_int().
+ */
+ struct st_my_thread_var *thread;
+ if (type == FLUSH_KEEP_LAZY)
+ {
+ DBUG_PRINT("info",("FLUSH_KEEP_LAZY skips"));
+ DBUG_RETURN(0);
+ }
+ thread= my_thread_var;
+ wqueue_add_to_queue(&other_flusher->flush_queue, thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait1",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+ }
+ /* we are the only flusher of this file now */
+ while (my_hash_insert(&pagecache->files_in_flush, (uchar *)&us_flusher))
+ {
+ /*
+ Out of memory, wait for flushers to empty the hash and retry; should
+ rarely happen. Other threads are flushing the file; when done, they
+ are going to remove themselves from the hash, and thus memory will
+ appear again. However, this memory may be stolen by yet another thread
+ (for a purpose unrelated to page cache), before we retry
+ hash_insert(). So the loop may run for long. Only if the thread was
+ killed do we abort the loop, returning 1 (error) which can cause the
+ table to be marked as corrupted (cf maria_chk_size(), maria_close())
+ and thus require a table check.
+ */
+ DBUG_ASSERT(0);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ if (my_thread_var->abort)
+ DBUG_RETURN(1); /* End if aborted by user */
+ sleep(10);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ }
+#endif
+
+ if (type != FLUSH_IGNORE_CHANGED)
+ {
+ /*
+ Count how many key blocks we have to cache to be able
+ to flush all dirty pages with minimum seek moves.
+ */
+ for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
+ block;
+ block= block->next_changed)
+ {
+ if (block->hash_link->file.file == file->file)
+ {
+ count++;
+ KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used);
+ }
+ }
+ /* Allocate a new buffer only if its bigger than the one we have */
+ if (count > FLUSH_CACHE &&
+ !(cache=
+ (PAGECACHE_BLOCK_LINK**)
+ my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0))))
+ {
+ cache= cache_buff;
+ count= FLUSH_CACHE;
+ }
+ }
+
+ /* Retrieve the blocks and write them to a buffer to be flushed */
+restart:
+ end= (pos= cache)+count;
+ for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
+ block;
+ block= next)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ next= block->next_changed;
+ if (block->hash_link->file.file != file->file)
+ continue;
+ if (filter != NULL)
+ {
+ int filter_res= (*filter)(block->type, block->hash_link->pageno,
+ block->rec_lsn, filter_arg);
+ DBUG_PRINT("info",("filter returned %d", filter_res));
+ if (filter_res == FLUSH_FILTER_SKIP_TRY_NEXT)
+ continue;
+ if (filter_res == FLUSH_FILTER_SKIP_ALL)
+ break;
+ DBUG_ASSERT(filter_res == FLUSH_FILTER_OK);
+ }
+ {
+ /*
+ Mark the block with BLOCK_IN_FLUSH in order not to let
+ other threads to use it for new pages and interfere with
+ our sequence of flushing dirty file pages
+ */
+ block->status|= PCBLOCK_IN_FLUSH;
+
+ if (! (block->status & PCBLOCK_IN_SWITCH))
+ {
+ /*
+ We care only for the blocks for which flushing was not
+ initiated by other threads as a result of page swapping
+ */
+ reg_requests(pagecache, block, 1);
+ if (type != FLUSH_IGNORE_CHANGED)
+ {
+ /* It's not a temporary file */
+ if (pos == end)
+ {
+ /*
+ This happens only if there is not enough
+ memory for the big block
+ */
+ if ((rc|= flush_cached_blocks(pagecache, file, cache,
+ end, type, &error)) &
+ PCFLUSH_ERROR)
+ last_errno=error;
+ DBUG_PRINT("info", ("restarting..."));
+ /*
+ Restart the scan as some other thread might have changed
+ the changed blocks chain: the blocks that were in switch
+ state before the flush started have to be excluded
+ */
+ goto restart;
+ }
+ *pos++= block;
+ }
+ else
+ {
+ /* It's a temporary file */
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ free_block(pagecache, block);
+ }
+ }
+ else if (type != FLUSH_KEEP_LAZY)
+ {
+ /*
+ Link the block into a list of blocks 'in switch', and then we will
+ wait for this list to be empty, which means they have been flushed
+ */
+ unlink_changed(block);
+ link_changed(block, &first_in_switch);
+ us_flusher.first_in_switch= TRUE;
+ }
+ }
+ }
+ if (pos != cache)
+ {
+ if ((rc|= flush_cached_blocks(pagecache, file, cache, pos, type,
+ &error)) & PCFLUSH_ERROR)
+ last_errno= error;
+ }
+ /* Wait until list of blocks in switch is empty */
+ while (first_in_switch)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt= 0;
+#endif
+ block= first_in_switch;
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait2",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /* No parallel requests in single-threaded case */
+#endif
+ }
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ }
+ us_flusher.first_in_switch= FALSE;
+ /* The following happens very seldom */
+ if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
+ type == FLUSH_FORCE_WRITE))
+ {
+ /*
+ this code would free all blocks while filter maybe handled only a
+ few, that is not possible.
+ */
+ DBUG_ASSERT(filter == NULL);
+#if defined(PAGECACHE_DEBUG)
+ cnt=0;
+#endif
+ for (block= pagecache->file_blocks[FILE_HASH(*file)] ;
+ block;
+ block= next)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ next= block->next_changed;
+ if (block->hash_link->file.file == file->file &&
+ (! (block->status & PCBLOCK_CHANGED)
+ || type == FLUSH_IGNORE_CHANGED))
+ {
+ reg_requests(pagecache, block, 1);
+ free_block(pagecache, block);
+ }
+ }
+ }
+#ifdef THREAD
+ /* wake up others waiting to flush this file */
+ hash_delete(&pagecache->files_in_flush, (uchar *)&us_flusher);
+ if (us_flusher.flush_queue.last_thread)
+ wqueue_release_queue(&us_flusher.flush_queue);
+#endif
+ }
+
+#ifndef DBUG_OFF
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "end of flush_pagecache_blocks", 0););
+#endif
+ if (cache != cache_buff)
+ my_free((uchar*) cache, MYF(0));
+ if (rc != 0)
+ {
+ if (last_errno)
+ my_errno= last_errno; /* Return first error */
+ DBUG_PRINT("error", ("Got error: %d", my_errno));
+ }
+ DBUG_RETURN(rc);
+}
+
+
+/**
+ @brief flush all blocks for a file to disk
+
+ @param pagecache pointer to a pagecache data structure
+ @param file handler for the file to flush to
+ @param flush_type type of the flush
+ @param filter optional function which tells what blocks to flush;
+ can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
+ or FLUSH_FORCE_WRITE.
+ @param filter_arg an argument to pass to 'filter'. Information about
+ the block will be passed too.
+
+ @return Operation status
+ @retval PCFLUSH_OK OK
+ @retval PCFLUSH_ERROR There was errors during the flush process.
+ @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
+ @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
+*/
+
+int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ enum flush_type type,
+ PAGECACHE_FLUSH_FILTER filter,
+ void *filter_arg)
+{
+ int res;
+ DBUG_ENTER("flush_pagecache_blocks");
+ DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache));
+
+ if (pagecache->disk_blocks <= 0)
+ DBUG_RETURN(0);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ inc_counter_for_resize_op(pagecache);
+ res= flush_pagecache_blocks_int(pagecache, file, type, filter, filter_arg);
+ dec_counter_for_resize_op(pagecache);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Reset the counters of a key cache.
+
+ SYNOPSIS
+ reset_pagecache_counters()
+ name the name of a key cache
+ pagecache pointer to the pagecache to be reset
+
+ DESCRIPTION
+ This procedure is used to reset the counters of all currently used key
+ caches, both the default one and the named ones.
+
+ RETURN
+ 0 on success (always because it can't fail)
+*/
+
+int reset_pagecache_counters(const char *name __attribute__((unused)),
+ PAGECACHE *pagecache)
+{
+ DBUG_ENTER("reset_pagecache_counters");
+ if (!pagecache->inited)
+ {
+ DBUG_PRINT("info", ("Key cache %s not initialized.", name));
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
+
+ pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
+ pagecache->global_cache_r_requests= 0; /* Key_read_requests */
+ pagecache->global_cache_read= 0; /* Key_reads */
+ pagecache->global_cache_w_requests= 0; /* Key_write_requests */
+ pagecache->global_cache_write= 0; /* Key_writes */
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Allocates a buffer and stores in it some info about all dirty pages
+
+ Does the allocation because the caller cannot know the size itself.
+ Memory freeing is to be done by the caller (if the "str" member of the
+ LEX_STRING is not NULL).
+ Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they
+ are not interesting for a checkpoint record.
+ The caller has the intention of doing checkpoints.
+
+ @param pagecache pointer to the page cache
+ @param[out] str pointer to where the allocated buffer, and
+ its size, will be put
+ @param[out] min_rec_lsn pointer to where the minimum rec_lsn of all
+ relevant dirty pages will be put
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
+ LEX_STRING *str,
+ LSN *min_rec_lsn)
+{
+ my_bool error= 0;
+ ulong stored_list_size= 0;
+ uint file_hash;
+ char *ptr;
+ LSN minimum_rec_lsn= LSN_MAX;
+ DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN");
+
+ DBUG_ASSERT(NULL == str->str);
+ /*
+ We lock the entire cache but will be quick, just reading/writing a few MBs
+ of memory at most.
+ */
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+#ifdef THREAD
+ for (;;)
+ {
+ struct st_file_in_flush *other_flusher;
+ for (file_hash= 0;
+ (other_flusher= (struct st_file_in_flush *)
+ hash_element(&pagecache->files_in_flush, file_hash)) != NULL &&
+ !other_flusher->first_in_switch;
+ file_hash++)
+ {}
+ if (other_flusher == NULL)
+ break;
+ /*
+ other_flusher.first_in_switch is true: some thread is flushing a file
+ and has removed dirty blocks from changed_blocks[] while they were still
+ dirty (they were being evicted (=>flushed) by yet another thread, which
+ may not have flushed the block yet so it may still be dirty).
+ If Checkpoint proceeds now, it will not see the page. If there is a
+ crash right after writing the checkpoint record, before the page is
+ flushed, at recovery the page will be wrongly ignored because it won't
+ be in the dirty pages list in the checkpoint record. So wait.
+ */
+ {
+ struct st_my_thread_var *thread= my_thread_var;
+ wqueue_add_to_queue(&other_flusher->flush_queue, thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("pagecache_collect_çhanged_blocks_with_lsn: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+ }
+ }
+#endif
+
+ /* Count how many dirty pages are interesting */
+ for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->changed_blocks[file_hash] ;
+ block;
+ block= block->next_changed)
+ {
+ /*
+ Q: is there something subtle with block->hash_link: can it be NULL?
+ does it have to be == hash_link->block... ?
+ */
+ DBUG_ASSERT(block->hash_link != NULL);
+ DBUG_ASSERT(block->status & PCBLOCK_CHANGED);
+ if (block->type != PAGECACHE_LSN_PAGE)
+ continue; /* no need to store it */
+ stored_list_size++;
+ }
+ }
+
+ compile_time_assert(sizeof(pagecache->blocks) <= 8);
+ str->length= 8 + /* number of dirty pages */
+ (4 + /* file */
+ 4 + /* pageno */
+ LSN_STORE_SIZE /* rec_lsn */
+ ) * stored_list_size;
+ if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME))))
+ goto err;
+ ptr= str->str;
+ int8store(ptr, (ulonglong)stored_list_size);
+ ptr+= 8;
+ if (!stored_list_size)
+ goto end;
+ for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->changed_blocks[file_hash] ;
+ block;
+ block= block->next_changed)
+ {
+ if (block->type != PAGECACHE_LSN_PAGE)
+ continue; /* no need to store it in the checkpoint record */
+ compile_time_assert(sizeof(block->hash_link->file.file) <= 4);
+ compile_time_assert(sizeof(block->hash_link->pageno) <= 4);
+ /**
+ @todo RECOVERY when we have a pointer to MARIA_SHARE, store share->id
+ instead of this file.
+ */
+ int4store(ptr, block->hash_link->file.file);
+ ptr+= 4;
+ int4store(ptr, block->hash_link->pageno);
+ ptr+= 4;
+ lsn_store(ptr, block->rec_lsn);
+ ptr+= LSN_STORE_SIZE;
+ if (block->rec_lsn != LSN_MAX)
+ {
+ DBUG_ASSERT(LSN_VALID(block->rec_lsn));
+ if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0)
+ minimum_rec_lsn= block->rec_lsn;
+ } /* otherwise, some trn->rec_lsn should hold the correct info */
+ }
+ }
+end:
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ *min_rec_lsn= minimum_rec_lsn;
+ DBUG_RETURN(error);
+
+err:
+ error= 1;
+ goto end;
+}
+
+
+#ifndef DBUG_OFF
+/*
+ Test if disk-cache is ok
+*/
+static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)),
+ const char *where __attribute__((unused)),
+ my_bool lock __attribute__((unused)))
+{
+ /* TODO */
+}
+#endif
+
+uchar *pagecache_block_link_to_buffer(PAGECACHE_BLOCK_LINK *block)
+{
+ return block->buffer;
+}
+
+#if defined(PAGECACHE_TIMEOUT)
+
+#define KEYCACHE_DUMP_FILE "pagecache_dump.txt"
+#define MAX_QUEUE_LEN 100
+
+
+static void pagecache_dump(PAGECACHE *pagecache)
+{
+ FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
+ struct st_my_thread_var *last;
+ struct st_my_thread_var *thread;
+ PAGECACHE_BLOCK_LINK *block;
+ PAGECACHE_HASH_LINK *hash_link;
+ PAGECACHE_PAGE *page;
+ uint i;
+
+ fprintf(pagecache_dump_file, "thread:%u\n", thread->id);
+
+ i=0;
+ thread=last=waiting_for_hash_link.last_thread;
+ fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n");
+ if (thread)
+ do
+ {
+ thread= thread->next;
+ page= (PAGECACHE_PAGE *) thread->opt_info;
+ fprintf(pagecache_dump_file,
+ "thread:%u, (file,pageno)=(%u,%lu)\n",
+ thread->id,(uint) page->file.file,(ulong) page->pageno);
+ if (++i == MAX_QUEUE_LEN)
+ break;
+ }
+ while (thread != last);
+
+ i=0;
+ thread=last=waiting_for_block.last_thread;
+ fprintf(pagecache_dump_file, "queue of threads waiting for block\n");
+ if (thread)
+ do
+ {
+ thread=thread->next;
+ hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info;
+ fprintf(pagecache_dump_file,
+ "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n",
+ thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
+ (uint) hash_link->file.file,(ulong) hash_link->pageno);
+ if (++i == MAX_QUEUE_LEN)
+ break;
+ }
+ while (thread != last);
+
+ for (i=0 ; i < pagecache->blocks_used ; i++)
+ {
+ int j;
+ block= &pagecache->block_root[i];
+ hash_link= block->hash_link;
+ fprintf(pagecache_dump_file,
+ "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
+ i, (int) (hash_link ?
+ PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) :
+ -1),
+ block->status, block->requests, block->condvar ? 1 : 0);
+ for (j=0 ; j < COND_SIZE; j++)
+ {
+ PAGECACHE_WQUEUE *wqueue=&block->wqueue[j];
+ thread= last= wqueue->last_thread;
+ fprintf(pagecache_dump_file, "queue #%d\n", j);
+ if (thread)
+ {
+ do
+ {
+ thread=thread->next;
+ fprintf(pagecache_dump_file,
+ "thread:%u\n", thread->id);
+ if (++i == MAX_QUEUE_LEN)
+ break;
+ }
+ while (thread != last);
+ }
+ }
+ }
+ fprintf(pagecache_dump_file, "LRU chain:");
+ block= pagecache= used_last;
+ if (block)
+ {
+ do
+ {
+ block= block->next_used;
+ fprintf(pagecache_dump_file,
+ "block:%u, ", PCBLOCK_NUMBER(pagecache, block));
+ }
+ while (block != pagecache->used_last);
+ }
+ fprintf(pagecache_dump_file, "\n");
+
+ fclose(pagecache_dump_file);
+}
+
+#endif /* defined(PAGECACHE_TIMEOUT) */
+
+#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)
+
+
+static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex)
+{
+ int rc;
+ struct timeval now; /* time when we started waiting */
+ struct timespec timeout; /* timeout value for the wait function */
+ struct timezone tz;
+#if defined(PAGECACHE_DEBUG)
+ int cnt=0;
+#endif
+
+ /* Get current time */
+ gettimeofday(&now, &tz);
+ /* Prepare timeout value */
+ timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT;
+ /*
+ timeval uses microseconds.
+ timespec uses nanoseconds.
+ 1 nanosecond = 1000 micro seconds
+ */
+ timeout.tv_nsec= now.tv_usec * 1000;
+ KEYCACHE_THREAD_TRACE_END("started waiting");
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ if (cnt % 100 == 0)
+ fprintf(pagecache_debug_log, "waiting...\n");
+ fflush(pagecache_debug_log);
+#endif
+ rc= pthread_cond_timedwait(cond, mutex, &timeout);
+ KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
+ if (rc == ETIMEDOUT || rc == ETIME)
+ {
+#if defined(PAGECACHE_DEBUG)
+ fprintf(pagecache_debug_log,"aborted by pagecache timeout\n");
+ fclose(pagecache_debug_log);
+ abort();
+#endif
+ pagecache_dump();
+ }
+
+#if defined(PAGECACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
+#else
+ assert(rc != ETIMEDOUT);
+#endif
+ return rc;
+}
+#else
+#if defined(PAGECACHE_DEBUG)
+static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex)
+{
+ int rc;
+ KEYCACHE_THREAD_TRACE_END("started waiting");
+ rc= pthread_cond_wait(cond, mutex);
+ KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
+ return rc;
+}
+#endif
+#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
+
+#if defined(PAGECACHE_DEBUG)
+static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+ int rc;
+ rc= pthread_mutex_lock(mutex);
+ KEYCACHE_THREAD_TRACE_BEGIN("");
+ return rc;
+}
+
+
+static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+ KEYCACHE_THREAD_TRACE_END("");
+ pthread_mutex_unlock(mutex);
+}
+
+
+static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond)
+{
+ int rc;
+ KEYCACHE_THREAD_TRACE("signal");
+ rc= pthread_cond_signal(cond);
+ return rc;
+}
+
+
+#if defined(PAGECACHE_DEBUG_LOG)
+
+
+static void pagecache_debug_print(const char * fmt, ...)
+{
+ va_list args;
+ va_start(args,fmt);
+ if (pagecache_debug_log)
+ {
+ VOID(vfprintf(pagecache_debug_log, fmt, args));
+ VOID(fputc('\n',pagecache_debug_log));
+ }
+ va_end(args);
+}
+#endif /* defined(PAGECACHE_DEBUG_LOG) */
+
+#if defined(PAGECACHE_DEBUG_LOG)
+
+
+void pagecache_debug_log_close(void)
+{
+ if (pagecache_debug_log)
+ fclose(pagecache_debug_log);
+}
+#endif /* defined(PAGECACHE_DEBUG_LOG) */
+
+#endif /* defined(PAGECACHE_DEBUG) */
diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h
new file mode 100644
index 00000000000..88130bffb73
--- /dev/null
+++ b/storage/maria/ma_pagecache.h
@@ -0,0 +1,307 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Page cache variable structures */
+
+#ifndef _ma_pagecache_h
+#define _ma_pagecache_h
+C_MODE_START
+
+#include "ma_loghandler_lsn.h"
+#include <m_string.h>
+#include <hash.h>
+
+/* Type of the page */
+enum pagecache_page_type
+{
+ /*
+ Used only for control page type changing during debugging. This define
+ should only be using when using DBUG.
+ */
+ PAGECACHE_EMPTY_PAGE,
+ /* the page does not contain LSN */
+ PAGECACHE_PLAIN_PAGE,
+ /* the page contain LSN (maria tablespace page) */
+ PAGECACHE_LSN_PAGE,
+ /* Page type used when scanning file and we don't care about the type */
+ PAGECACHE_READ_UNKNOWN_PAGE
+};
+
+/*
+ This enum describe lock status changing. every type of page cache will
+ interpret WRITE/READ lock as it need.
+*/
+enum pagecache_page_lock
+{
+ PAGECACHE_LOCK_LEFT_UNLOCKED, /* free -> free */
+ PAGECACHE_LOCK_LEFT_READLOCKED, /* read -> read */
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, /* write -> write */
+ PAGECACHE_LOCK_READ, /* free -> read */
+ PAGECACHE_LOCK_WRITE, /* free -> write */
+ PAGECACHE_LOCK_READ_UNLOCK, /* read -> free */
+ PAGECACHE_LOCK_WRITE_UNLOCK, /* write -> free */
+ PAGECACHE_LOCK_WRITE_TO_READ /* write -> read */
+};
+/*
+ This enum describe pin status changing
+*/
+enum pagecache_page_pin
+{
+ PAGECACHE_PIN_LEFT_PINNED, /* pinned -> pinned */
+ PAGECACHE_PIN_LEFT_UNPINNED, /* unpinned -> unpinned */
+ PAGECACHE_PIN, /* unpinned -> pinned */
+ PAGECACHE_UNPIN /* pinned -> unpinned */
+};
+/* How to write the page */
+enum pagecache_write_mode
+{
+ /* do not write immediately, i.e. it will be dirty page */
+ PAGECACHE_WRITE_DELAY,
+ /* page already is in the file. (key cache insert analogue) */
+ PAGECACHE_WRITE_DONE
+};
+
+/* page number for maria */
+typedef uint32 pgcache_page_no_t;
+
+/* file descriptor for Maria */
+typedef struct st_pagecache_file
+{
+ File file;
+ my_bool (*read_callback)(uchar *page, pgcache_page_no_t offset,
+ uchar *data);
+ my_bool (*write_callback)(uchar *page, pgcache_page_no_t offset,
+ uchar *data);
+ void (*write_fail)(uchar *data);
+ uchar *callback_data;
+} PAGECACHE_FILE;
+
+/* declare structures that is used by st_pagecache */
+
+struct st_pagecache_block_link;
+typedef struct st_pagecache_block_link PAGECACHE_BLOCK_LINK;
+struct st_pagecache_page;
+typedef struct st_pagecache_page PAGECACHE_PAGE;
+struct st_pagecache_hash_link;
+typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK;
+
+#include <wqueue.h>
+
+#define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */
+#define PAGECACHE_PRIORITY_LOW 0
+#define PAGECACHE_PRIORITY_DEFAULT 3
+#define PAGECACHE_PRIORITY_HIGH 6
+
+/*
+ The page cache structure
+ It also contains read-only statistics parameters.
+*/
+
+typedef struct st_pagecache
+{
+ size_t mem_size; /* specified size of the cache memory */
+ ulong min_warm_blocks; /* min number of warm blocks; */
+ ulong age_threshold; /* age threshold for hot blocks */
+ ulonglong time; /* total number of block link operations */
+ ulong hash_entries; /* max number of entries in the hash table */
+ long hash_links; /* max number of hash links */
+ long hash_links_used; /* number of hash links taken from free links pool */
+ long disk_blocks; /* max number of blocks in the cache */
+ ulong blocks_used; /* maximum number of concurrently used blocks */
+ ulong blocks_unused; /* number of currently unused blocks */
+ ulong blocks_changed; /* number of currently dirty blocks */
+ ulong warm_blocks; /* number of blocks in warm sub-chain */
+ ulong cnt_for_resize_op; /* counter to block resize operation */
+ ulong blocks_available; /* number of blocks available in the LRU chain */
+ long blocks; /* max number of blocks in the cache */
+ uint32 block_size; /* size of the page buffer of a cache block */
+ PAGECACHE_HASH_LINK **hash_root;/* arr. of entries into hash table buckets */
+ PAGECACHE_HASH_LINK *hash_link_root;/* memory for hash table links */
+ PAGECACHE_HASH_LINK *free_hash_list;/* list of free hash links */
+ PAGECACHE_BLOCK_LINK *free_block_list;/* list of free blocks */
+ PAGECACHE_BLOCK_LINK *block_root;/* memory for block links */
+ uchar HUGE_PTR *block_mem; /* memory for block buffers */
+ PAGECACHE_BLOCK_LINK *used_last;/* ptr to the last block of the LRU chain */
+ PAGECACHE_BLOCK_LINK *used_ins;/* ptr to the insertion block in LRU chain */
+ pthread_mutex_t cache_lock; /* to lock access to the cache structure */
+ WQUEUE resize_queue; /* threads waiting during resize operation */
+ WQUEUE waiting_for_hash_link;/* waiting for a free hash link */
+ WQUEUE waiting_for_block; /* requests waiting for a free block */
+ /* hash for dirty file bl.*/
+ PAGECACHE_BLOCK_LINK *changed_blocks[PAGECACHE_CHANGED_BLOCKS_HASH];
+ /* hash for other file bl.*/
+ PAGECACHE_BLOCK_LINK *file_blocks[PAGECACHE_CHANGED_BLOCKS_HASH];
+
+ /*
+ The following variables are and variables used to hold parameters for
+ initializing the key cache.
+ */
+
+ ulonglong param_buff_size; /* size the memory allocated for the cache */
+ ulong param_block_size; /* size of the blocks in the key cache */
+ ulong param_division_limit; /* min. percentage of warm blocks */
+ ulong param_age_threshold; /* determines when hot block is downgraded */
+
+ /* Statistics variables. These are reset in reset_pagecache_counters(). */
+ ulong global_blocks_changed; /* number of currently dirty blocks */
+ ulonglong global_cache_w_requests;/* number of write requests (write hits) */
+ ulonglong global_cache_write; /* number of writes from cache to files */
+ ulonglong global_cache_r_requests;/* number of read requests (read hits) */
+ ulonglong global_cache_read; /* number of reads from files to cache */
+
+ uint shift; /* block size = 2 ^ shift */
+ myf readwrite_flags; /* Flags to pread/pwrite() */
+ myf org_readwrite_flags; /* Flags to pread/pwrite() at init */
+ my_bool inited;
+ my_bool resize_in_flush; /* true during flush of resize operation */
+ my_bool can_be_used; /* usage of cache for read/write is allowed */
+ my_bool in_init; /* Set to 1 in MySQL during init/resize */
+ HASH files_in_flush; /**< files in flush_pagecache_blocks_int() */
+} PAGECACHE;
+
+/** @brief Return values for PAGECACHE_FLUSH_FILTER */
+enum pagecache_flush_filter_result
+{
+ FLUSH_FILTER_SKIP_TRY_NEXT= 0,/**< skip page and move on to next one */
+ FLUSH_FILTER_OK, /**< flush page and move on to next one */
+ FLUSH_FILTER_SKIP_ALL /**< skip page and all next ones */
+};
+/** @brief a filter function type for flush_pagecache_blocks_with_filter() */
+typedef enum pagecache_flush_filter_result
+(*PAGECACHE_FLUSH_FILTER)(enum pagecache_page_type type, pgcache_page_no_t page,
+ LSN rec_lsn, void *arg);
+
+/* The default key cache */
+extern PAGECACHE dflt_pagecache_var, *dflt_pagecache;
+
+extern ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
+ uint division_limit, uint age_threshold,
+ uint block_size, myf my_read_flags);
+extern ulong resize_pagecache(PAGECACHE *pagecache,
+ size_t use_mem, uint division_limit,
+ uint age_threshold);
+extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
+ uint age_threshold);
+
+extern uchar *pagecache_read(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ uchar *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ PAGECACHE_BLOCK_LINK **link);
+
+#define pagecache_write(P,F,N,L,B,T,O,I,M,K,R) \
+ pagecache_write_part(P,F,N,L,B,T,O,I,M,K,R,0,(P)->block_size)
+
+#define pagecache_inject(P,F,N,L,B,T,O,I,K,R) \
+ pagecache_write_part(P,F,N,L,B,T,O,I,PAGECACHE_WRITE_DONE, \
+ K,R,0,(P)->block_size)
+
+extern my_bool pagecache_write_part(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ uchar *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ enum pagecache_write_mode write_mode,
+ PAGECACHE_BLOCK_LINK **link,
+ LSN first_REDO_LSN_for_page,
+ uint offset,
+ uint size);
+extern void pagecache_unlock(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page,
+ LSN lsn, my_bool was_changed);
+extern void pagecache_unlock_by_link(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page,
+ LSN lsn, my_bool was_changed);
+extern void pagecache_unpin(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ LSN lsn);
+extern void pagecache_unpin_by_link(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *link,
+ LSN lsn);
+
+
+/* Results of flush operation (bit field in fact) */
+
+/* The flush is done. */
+#define PCFLUSH_OK 0
+/* There was errors during the flush process. */
+#define PCFLUSH_ERROR 1
+/* Pinned blocks was met and skipped. */
+#define PCFLUSH_PINNED 2
+/* PCFLUSH_ERROR and PCFLUSH_PINNED. */
+#define PCFLUSH_PINNED_AND_ERROR (PCFLUSH_ERROR|PCFLUSH_PINNED)
+
+#define pagecache_file_init(F,RC,WC,WF,D) \
+ do{ \
+ (F).read_callback= (RC); (F).write_callback= (WC); \
+ (F).write_fail= (WF); \
+ (F).callback_data= (uchar*)(D); \
+ } while(0)
+
+#define flush_pagecache_blocks(A,B,C) \
+ flush_pagecache_blocks_with_filter(A,B,C,NULL,NULL)
+extern int flush_pagecache_blocks_with_filter(PAGECACHE *keycache,
+ PAGECACHE_FILE *file,
+ enum flush_type type,
+ PAGECACHE_FLUSH_FILTER filter,
+ void *filter_arg);
+extern my_bool pagecache_delete(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ my_bool flush);
+extern my_bool pagecache_delete_pages(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint page_count,
+ enum pagecache_page_lock lock,
+ my_bool flush);
+extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup);
+extern my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
+ LEX_STRING *str,
+ LSN *min_lsn);
+extern int reset_pagecache_counters(const char *name, PAGECACHE *pagecache);
+extern uchar *pagecache_block_link_to_buffer(PAGECACHE_BLOCK_LINK *block);
+
+
+/* Functions to handle multiple key caches */
+extern my_bool multi_pagecache_init(void);
+extern void multi_pagecache_free(void);
+extern PAGECACHE *multi_pagecache_search(uchar *key, uint length,
+ PAGECACHE *def);
+extern my_bool multi_pagecache_set(const uchar *key, uint length,
+ PAGECACHE *pagecache);
+extern void multi_pagecache_change(PAGECACHE *old_data,
+ PAGECACHE *new_data);
+extern int reset_pagecache_counters(const char *name,
+ PAGECACHE *pagecache);
+
+C_MODE_END
+#endif /* _keycache_h */
diff --git a/storage/maria/ma_pagecaches.c b/storage/maria/ma_pagecaches.c
new file mode 100644
index 00000000000..a9460be10c5
--- /dev/null
+++ b/storage/maria/ma_pagecaches.c
@@ -0,0 +1,105 @@
+/* Copyright (C) 2003-2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Handling of multiple key caches
+
+ The idea is to have a thread safe hash on the table name,
+ with a default key cache value that is returned if the table name is not in
+ the cache.
+*/
+
+#include "maria_def.h"
+#include "ma_pagecache.h"
+#include <hash.h>
+#include <m_string.h>
+#include "../../mysys/my_safehash.h"
+
+/*****************************************************************************
+ Functions to handle the pagecache objects
+*****************************************************************************/
+
+/* Variable to store all key cache objects */
+static SAFE_HASH pagecache_hash;
+
+
+my_bool multi_pagecache_init(void)
+{
+ return safe_hash_init(&pagecache_hash, 16, (uchar*) maria_pagecache);
+}
+
+
+void multi_pagecache_free(void)
+{
+ safe_hash_free(&pagecache_hash);
+}
+
+/*
+ Get a key cache to be used for a specific table.
+
+ SYNOPSIS
+ multi_pagecache_search()
+ key key to find (usually table path)
+ uint length Length of key.
+ def Default value if no key cache
+
+ NOTES
+ This function is coded in such a way that we will return the
+ default key cache even if one never called multi_pagecache_init.
+ This will ensure that it works with old MyISAM clients.
+
+ RETURN
+ key cache to use
+*/
+
+PAGECACHE *multi_pagecache_search(uchar *key, uint length,
+ PAGECACHE *def)
+{
+ if (!pagecache_hash.hash.records)
+ return def;
+ return (PAGECACHE*) safe_hash_search(&pagecache_hash, key, length,
+ (void*) def);
+}
+
+
+/*
+ Assosiate a key cache with a key
+
+
+ SYONOPSIS
+ multi_pagecache_set()
+ key key (path to table etc..)
+ length Length of key
+ pagecache cache to assococite with the table
+
+ NOTES
+ This can be used both to insert a new entry and change an existing
+ entry
+*/
+
+
+my_bool multi_pagecache_set(const uchar *key, uint length,
+ PAGECACHE *pagecache)
+{
+ return safe_hash_set(&pagecache_hash, key, length, (uchar*) pagecache);
+}
+
+
+void multi_pagecache_change(PAGECACHE *old_data,
+ PAGECACHE *new_data)
+{
+ safe_hash_change(&pagecache_hash, (uchar*) old_data, (uchar*) new_data);
+}
diff --git a/storage/maria/ma_pagecrc.c b/storage/maria/ma_pagecrc.c
new file mode 100644
index 00000000000..3fb6b659686
--- /dev/null
+++ b/storage/maria/ma_pagecrc.c
@@ -0,0 +1,302 @@
+/* TODO: copyright & Co */
+
+#include "maria_def.h"
+
+
+/**
+ @brief calculate crc of the page avoiding special values
+
+ @param start The value to start CRC (we use page number here)
+ @param data data pointer
+ @param length length of the data
+
+ @return crc of the page without special values
+*/
+
+static uint32 maria_page_crc(ulong start, uchar *data, uint length)
+{
+ uint32 crc= crc32(start, data, length);
+
+ /* we need this assert to get following comparison working */
+ compile_time_assert(MARIA_NO_CRC_BITMAP_PAGE ==
+ MARIA_NO_CRC_NORMAL_PAGE - 1 &&
+ MARIA_NO_CRC_NORMAL_PAGE == 0xffffffff);
+ if (crc >= MARIA_NO_CRC_BITMAP_PAGE)
+ crc= MARIA_NO_CRC_BITMAP_PAGE - 1;
+
+ return(crc);
+}
+
+/**
+ @brief Maria pages read callback (checks the page CRC)
+
+ @param page The page data to check
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr pointer to MARIA_SHARE
+ @param no_crc_val Value which means CRC absence
+ (MARIA_NO_CRC_NORMAL_PAGE or MARIA_NO_CRC_BITMAP_PAGE)
+ @param data_length length of data to calculate CRC
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+static my_bool maria_page_crc_check(uchar *page,
+ pgcache_page_no_t page_no,
+ MARIA_SHARE *share,
+ uint32 no_crc_val,
+ int data_length)
+{
+ uint32 crc= uint4korr(page + share->block_size - CRC_SIZE), new_crc;
+ my_bool res;
+ DBUG_ENTER("maria_page_crc_check");
+
+ DBUG_ASSERT((uint)data_length <= share->block_size - CRC_SIZE);
+
+ /* we need this assert to get following comparison working */
+ compile_time_assert(MARIA_NO_CRC_BITMAP_PAGE ==
+ MARIA_NO_CRC_NORMAL_PAGE - 1 &&
+ MARIA_NO_CRC_NORMAL_PAGE == 0xffffffff);
+ /*
+ If crc is no_crc_val then
+ the page has no crc, so there is nothing to check.
+ */
+ if (crc >= MARIA_NO_CRC_BITMAP_PAGE)
+ {
+ DBUG_PRINT("info", ("No crc: %lu crc: %lu page: %lu ",
+ (ulong) no_crc_val, (ulong) crc, (ulong) page_no));
+ if (crc != no_crc_val)
+ {
+ my_errno= HA_ERR_WRONG_CRC;
+ DBUG_PRINT("error", ("Wrong no CRC value"));
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+ }
+ new_crc= maria_page_crc(page_no, page, data_length);
+ DBUG_ASSERT(new_crc != no_crc_val);
+ res= test(new_crc != crc);
+ if (res)
+ {
+ DBUG_PRINT("error", ("Page: %lu crc: %lu calculated crc: %lu",
+ (ulong) page_no, (ulong) crc, (ulong) new_crc));
+ my_errno= HA_ERR_WRONG_CRC;
+ }
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Maria pages write callback (sets the page CRC for data and index
+ files)
+
+ @param page The page data to set
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Write callback data pointer (pointer to MARIA_SHARE)
+
+ @retval 0 OK
+*/
+
+my_bool maria_page_crc_set_normal(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr)
+{
+ MARIA_SHARE *share= (MARIA_SHARE *)data_ptr;
+ int data_length= share->block_size - CRC_SIZE;
+ uint32 crc= maria_page_crc(page_no, page, data_length);
+ DBUG_ENTER("maria_page_crc_set");
+ DBUG_PRINT("info", ("Page %lu crc: %lu", (ulong) page_no, (ulong)crc));
+
+ /* crc is on the stack so it is aligned, pagecache buffer is aligned, too */
+ int4store_aligned(page + data_length, crc);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Maria pages write callback (sets the page CRC for keys)
+
+ @param page The page data to set
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Write callback data pointer (pointer to MARIA_SHARE)
+
+ @retval 0 OK
+*/
+
+my_bool maria_page_crc_set_index(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr)
+{
+ MARIA_SHARE *share= (MARIA_SHARE *)data_ptr;
+ int data_length= _ma_get_page_used(share, page);
+ uint32 crc= maria_page_crc(page_no, page, data_length);
+ DBUG_ENTER("maria_page_crc_set");
+
+ DBUG_PRINT("info", ("Page %lu crc: %lu",
+ (ulong) page_no, (ulong) crc));
+ DBUG_ASSERT((uint)data_length <= share->block_size - CRC_SIZE);
+ /* crc is on the stack so it is aligned, pagecache buffer is aligned, too */
+ int4store_aligned(page + share->block_size - CRC_SIZE, crc);
+ DBUG_RETURN(0);
+}
+
+
+/* interface functions */
+
+
+/**
+ @brief Maria pages read callback (checks the page CRC) for index/data pages
+
+ @param page The page data to check
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Read callback data pointer (pointer to MARIA_SHARE)
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool maria_page_crc_check_data(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr)
+{
+ MARIA_SHARE *share= (MARIA_SHARE *)data_ptr;
+ return (maria_page_crc_check(page, page_no, share,
+ MARIA_NO_CRC_NORMAL_PAGE,
+ share->block_size - CRC_SIZE));
+}
+
+
+/**
+ @brief Maria pages read callback (checks the page CRC) for bitmap pages
+
+ @param page The page data to check
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Read callback data pointer (pointer to MARIA_SHARE)
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool maria_page_crc_check_bitmap(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr)
+{
+ MARIA_SHARE *share= (MARIA_SHARE *)data_ptr;
+ return (maria_page_crc_check(page, page_no, share,
+ MARIA_NO_CRC_BITMAP_PAGE,
+ share->block_size - CRC_SIZE));
+}
+
+
+/**
+ @brief Maria pages read callback (checks the page CRC) for index pages
+
+ @param page The page data to check
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Read callback data pointer (pointer to MARIA_SHARE)
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool maria_page_crc_check_index(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr)
+{
+ MARIA_SHARE *share= (MARIA_SHARE *)data_ptr;
+ uint length= _ma_get_page_used(share, page);
+ if (length > share->block_size - CRC_SIZE)
+ {
+ DBUG_PRINT("error", ("Wrong page length: %u", length));
+ return (my_errno= HA_ERR_WRONG_CRC);
+ }
+ return maria_page_crc_check(page, page_no, share,
+ MARIA_NO_CRC_NORMAL_PAGE,
+ length);
+}
+
+
+/**
+ @brief Maria pages dumme read callback for temporary tables
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool maria_page_crc_check_none(uchar *page __attribute__((unused)),
+ pgcache_page_no_t page_no
+ __attribute__((unused)),
+ uchar *data_ptr __attribute__((unused)))
+{
+ return 0;
+}
+
+
+/**
+ @brief Maria pages write callback (sets the page filler for index/data)
+
+ @param page The page data to set
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Write callback data pointer (pointer to MARIA_SHARE)
+
+ @retval 0 OK
+*/
+
+my_bool maria_page_filler_set_normal(uchar *page,
+ pgcache_page_no_t page_no
+ __attribute__((unused)),
+ uchar *data_ptr)
+{
+ DBUG_ENTER("maria_page_filler_set_normal");
+ DBUG_ASSERT(page_no != 0); /* Catches some simple bugs */
+ int4store_aligned(page + ((MARIA_SHARE *)data_ptr)->block_size - CRC_SIZE,
+ MARIA_NO_CRC_NORMAL_PAGE);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Maria pages write callback (sets the page filler for bitmap)
+
+ @param page The page data to set
+ @param page_no The page number (<offset>/<page length>)
+ @param data_ptr Write callback data pointer (pointer to MARIA_SHARE)
+
+ @retval 0 OK
+*/
+
+my_bool maria_page_filler_set_bitmap(uchar *page,
+ pgcache_page_no_t page_no
+ __attribute__((unused)),
+ uchar *data_ptr)
+{
+ DBUG_ENTER("maria_page_filler_set_bitmap");
+ int4store_aligned(page + ((MARIA_SHARE *)data_ptr)->block_size - CRC_SIZE,
+ MARIA_NO_CRC_BITMAP_PAGE);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Maria pages dummy write callback for temporary tables
+
+ @retval 0 OK
+*/
+
+my_bool maria_page_filler_set_none(uchar *page __attribute__((unused)),
+ pgcache_page_no_t page_no
+ __attribute__((unused)),
+ uchar *data_ptr __attribute__((unused)))
+{
+ return 0;
+}
+
+/**
+ @brief Write failure callback (mark table as corrupted)
+
+ @param data_ptr Write callback data pointer (pointer to MARIA_SHARE)
+*/
+void maria_page_write_failure (uchar* data_ptr)
+{
+ maria_mark_crashed_share((MARIA_SHARE *)data_ptr);
+}
diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c
new file mode 100644
index 00000000000..867abfd1c62
--- /dev/null
+++ b/storage/maria/ma_panic.c
@@ -0,0 +1,147 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "ma_fulltext.h"
+
+/*
+ Stop usage of Maria
+
+ SYNOPSIS
+ maria_panic()
+ flag HA_PANIC_CLOSE: All maria files (tables and log) are closed.
+ maria_end() is called.
+ HA_PANIC_WRITE: All misam files are unlocked and
+ all changed data in single user maria is
+ written to file
+ HA_PANIC_READ All maria files that was locked when
+ maria_panic(HA_PANIC_WRITE) was done is
+ locked. A maria_readinfo() is done for
+ all single user files to get changes
+ in database
+
+ RETURN
+ 0 ok
+ # error number in case of error
+*/
+
+int maria_panic(enum ha_panic_function flag)
+{
+ int error=0;
+ LIST *list_element,*next_open;
+ MARIA_HA *info;
+ DBUG_ENTER("maria_panic");
+
+ if (!maria_inited)
+ DBUG_RETURN(0);
+ pthread_mutex_lock(&THR_LOCK_maria);
+ for (list_element=maria_open_list ; list_element ; list_element=next_open)
+ {
+ next_open=list_element->next; /* Save if close */
+ info=(MARIA_HA*) list_element->data;
+ switch (flag) {
+ case HA_PANIC_CLOSE:
+ /*
+ If bad luck (if some tables would be used now, which normally does not
+ happen in MySQL), as we release the mutex, the list may change and so
+ we may crash.
+ */
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ if (maria_close(info))
+ error=my_errno;
+ pthread_mutex_lock(&THR_LOCK_maria);
+ break;
+ case HA_PANIC_WRITE: /* Do this to free databases */
+#ifdef CANT_OPEN_FILES_TWICE
+ if (info->s->options & HA_OPTION_READ_ONLY_DATA)
+ break;
+#endif
+ if (flush_pagecache_blocks(info->s->pagecache, &info->s->kfile,
+ FLUSH_RELEASE))
+ error=my_errno;
+ if (info->opt_flag & WRITE_CACHE_USED)
+ if (flush_io_cache(&info->rec_cache))
+ error=my_errno;
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (flush_io_cache(&info->rec_cache))
+ error=my_errno;
+ reinit_io_cache(&info->rec_cache,READ_CACHE,0,
+ (pbool) (info->lock_type != F_UNLCK),1);
+ }
+ if (info->lock_type != F_UNLCK && ! info->was_locked)
+ {
+ info->was_locked=info->lock_type;
+ if (maria_lock_database(info,F_UNLCK))
+ error=my_errno;
+ }
+#ifdef CANT_OPEN_FILES_TWICE
+ if (info->s->kfile.file >= 0 && my_close(info->s->kfile.file, MYF(0)))
+ error = my_errno;
+ if (info->dfile.file >= 0 && my_close(info->dfile.file, MYF(0)))
+ error = my_errno;
+ info->s->kfile.file= info->dfile.file= -1;/* Files aren't open anymore */
+ break;
+#endif
+ case HA_PANIC_READ: /* Restore to before WRITE */
+#ifdef CANT_OPEN_FILES_TWICE
+ { /* Open closed files */
+ char name_buff[FN_REFLEN];
+ if (info->s->kfile.file < 0)
+ {
+
+ if ((info->s->kfile.file= my_open(fn_format(name_buff,
+ info->filename, "",
+ N_NAME_IEXT,4),
+ info->mode,
+ MYF(MY_WME))) < 0)
+ error = my_errno;
+ pagecache_file_init(info->s->kfile, &maria_page_crc_check_index,
+ (info->s->options & HA_OPTION_PAGE_CHECKSUM ?
+ &maria_page_crc_set_index :
+ &maria_page_filler_set_normal),
+ &maria_page_write_failure, info->s);
+ }
+ if (info->dfile.file < 0)
+ {
+ if ((info->dfile.file= my_open(fn_format(name_buff, info->filename,
+ "", N_NAME_DEXT, 4),
+ info->mode,
+ MYF(MY_WME))) < 0)
+ error = my_errno;
+ pagecache_file_init(info->dfile, &maria_page_crc_check_data,
+ (share->options & HA_OPTION_PAGE_CHECKSUM ?
+ &maria_page_crc_set_normal:
+ &maria_page_filler_set_normal),
+ &maria_page_write_failure, share);
+ info->rec_cache.file= info->dfile.file;
+ }
+ }
+#endif
+ if (info->was_locked)
+ {
+ if (maria_lock_database(info, info->was_locked))
+ error=my_errno;
+ info->was_locked=0;
+ }
+ break;
+ }
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ if (flag == HA_PANIC_CLOSE)
+ maria_end();
+ if (!error)
+ DBUG_RETURN(0);
+ DBUG_RETURN(my_errno=error);
+} /* maria_panic */
diff --git a/storage/maria/ma_preload.c b/storage/maria/ma_preload.c
new file mode 100644
index 00000000000..c2a6d405932
--- /dev/null
+++ b/storage/maria/ma_preload.c
@@ -0,0 +1,104 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Preload indexes into key cache
+*/
+
+#include "maria_def.h"
+
+
+/*
+ Preload pages of the index file for a table into the key cache
+
+ SYNOPSIS
+ maria_preload()
+ info open table
+ map map of indexes to preload into key cache
+ ignore_leaves only non-leaves pages are to be preloaded
+
+ RETURN VALUE
+ 0 if a success. error code - otherwise.
+
+ NOTES.
+ At present pages for all indexes are preloaded.
+ In future only pages for indexes specified in the key_map parameter
+ of the table will be preloaded.
+*/
+
+int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves)
+{
+ ulong length, block_length= 0;
+ uchar *buff= NULL;
+ MARIA_SHARE* share= info->s;
+ uint keys= share->state.header.keys;
+ my_off_t key_file_length= share->state.state.key_file_length;
+ my_off_t pos= share->base.keystart;
+ DBUG_ENTER("maria_preload");
+
+ if (!keys || !maria_is_any_key_active(key_map) || key_file_length == pos)
+ DBUG_RETURN(0);
+
+ block_length= share->pagecache->block_size;
+ length= info->preload_buff_size/block_length * block_length;
+ set_if_bigger(length, block_length);
+
+ if (!(buff= (uchar *) my_malloc(length, MYF(MY_WME))))
+ DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
+
+ if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_RELEASE))
+ goto err;
+
+ do
+ {
+ uchar *end;
+ /* Read the next block of index file into the preload buffer */
+ if ((my_off_t) length > (key_file_length-pos))
+ length= (ulong) (key_file_length-pos);
+ if (my_pread(share->kfile.file, (uchar*) buff, length, pos,
+ MYF(MY_FAE|MY_FNABP)))
+ goto err;
+
+ for (end= buff + length ; buff < end ; buff+= block_length)
+ {
+ uint keynr= _ma_get_keynr(share, buff);
+ if ((ignore_leaves && !_ma_test_if_nod(share, buff)) ||
+ keynr == MARIA_DELETE_KEY_NR ||
+ !(key_map & ((ulonglong) 1 << keynr)))
+ {
+ DBUG_ASSERT(share->pagecache->block_size == block_length);
+ if (pagecache_write(share->pagecache,
+ &share->kfile, pos / block_length,
+ DFLT_INIT_HITS,
+ (uchar*) buff,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DONE, 0,
+ LSN_IMPOSSIBLE))
+ goto err;
+ }
+ pos+= block_length;
+ }
+ }
+ while (pos != key_file_length);
+
+ my_free((char*) buff, MYF(0));
+ DBUG_RETURN(0);
+
+err:
+ my_free((char*) buff, MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(my_errno= errno);
+}
diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c
new file mode 100644
index 00000000000..056629319e5
--- /dev/null
+++ b/storage/maria/ma_range.c
@@ -0,0 +1,297 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Gives a approximated number of how many records there is between two keys.
+ Used when optimizing querries.
+ */
+
+#include "maria_def.h"
+#include "ma_rt_index.h"
+
+static ha_rows _ma_record_pos(MARIA_HA *,const uchar *, key_part_map,
+ enum ha_rkey_function);
+static double _ma_search_pos(MARIA_HA *, MARIA_KEYDEF *, uchar *,
+ uint, uint, my_off_t);
+static uint _ma_keynr(MARIA_HA *, MARIA_KEYDEF *, uchar *, uchar *, uint *);
+
+
+/**
+ @brief Estimate how many records there is in a given range
+
+ @param info MARIA handler
+ @param inx Index to use
+ @param min_key Min key. Is = 0 if no min range
+ @param max_key Max key. Is = 0 if no max range
+
+ @note
+ We should ONLY return 0 if there is no rows in range
+
+ @return Estimated number of rows or error
+ @retval HA_POS_ERROR error (or we can't estimate number of rows)
+ @retval number Estimated number of rows
+*/
+
+ha_rows maria_records_in_range(MARIA_HA *info, int inx, key_range *min_key,
+ key_range *max_key)
+{
+ ha_rows start_pos,end_pos,res;
+ DBUG_ENTER("maria_records_in_range");
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(HA_POS_ERROR);
+
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(HA_POS_ERROR);
+ info->update&= (HA_STATE_CHANGED+HA_STATE_ROW_CHANGED);
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+
+ switch(info->s->keyinfo[inx].key_alg){
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ {
+ uchar *key_buff;
+ uint start_key_len;
+
+ /*
+ The problem is that the optimizer doesn't support
+ RTree keys properly at the moment.
+ Hope this will be fixed some day.
+ But now NULL in the min_key means that we
+ didn't make the task for the RTree key
+ and expect BTree functionality from it.
+ As it's not able to handle such request
+ we return the error.
+ */
+ if (!min_key)
+ {
+ res= HA_POS_ERROR;
+ break;
+ }
+ key_buff= info->lastkey+info->s->base.max_key_length;
+ start_key_len= _ma_pack_key(info,inx, key_buff,
+ min_key->key, min_key->keypart_map,
+ (HA_KEYSEG**) 0);
+ res= maria_rtree_estimate(info, inx, key_buff, start_key_len,
+ maria_read_vec[min_key->flag]);
+ res= res ? res : 1; /* Don't return 0 */
+ break;
+ }
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ start_pos= (min_key ?
+ _ma_record_pos(info, min_key->key, min_key->keypart_map,
+ min_key->flag) :
+ (ha_rows) 0);
+ end_pos= (max_key ?
+ _ma_record_pos(info, max_key->key, max_key->keypart_map,
+ max_key->flag) :
+ info->state->records + (ha_rows) 1);
+ res= (end_pos < start_pos ? (ha_rows) 0 :
+ (end_pos == start_pos ? (ha_rows) 1 : end_pos-start_pos));
+ if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR)
+ res=HA_POS_ERROR;
+ }
+
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->key_root_lock[inx]);
+ fast_ma_writeinfo(info);
+
+ /**
+ @todo LOCK
+ If res==0 (no rows), if we need to guarantee repeatability of the search,
+ we will need to set a next-key lock in this statement.
+ Also SELECT COUNT(*)...
+ */
+
+ DBUG_PRINT("info",("records: %ld",(ulong) (res)));
+ DBUG_RETURN(res);
+}
+
+
+ /* Find relative position (in records) for key in index-tree */
+
+static ha_rows _ma_record_pos(MARIA_HA *info, const uchar *key,
+ key_part_map keypart_map,
+ enum ha_rkey_function search_flag)
+{
+ uint inx=(uint) info->lastinx, nextflag, key_len;
+ MARIA_KEYDEF *keyinfo=info->s->keyinfo+inx;
+ uchar *key_buff;
+ double pos;
+ DBUG_ENTER("_ma_record_pos");
+ DBUG_PRINT("enter",("search_flag: %d",search_flag));
+ DBUG_ASSERT(keypart_map);
+
+ key_buff=info->lastkey+info->s->base.max_key_length;
+ key_len= _ma_pack_key(info, inx, key_buff, key, keypart_map,
+ (HA_KEYSEG**) 0);
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg,
+ key_buff, key_len););
+ nextflag=maria_read_vec[search_flag];
+ if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST)))
+ key_len=USE_WHOLE_KEY;
+
+ /*
+ my_handler.c:mi_compare_text() has a flag 'skip_end_space'.
+ This is set in my_handler.c:ha_key_cmp() in dependence on the
+ compare flags 'nextflag' and the column type.
+
+ TEXT columns are of type HA_KEYTYPE_VARTEXT. In this case the
+ condition is skip_end_space= ((nextflag & (SEARCH_FIND |
+ SEARCH_UPDATE)) == SEARCH_FIND).
+
+ SEARCH_FIND is used for an exact key search. The combination
+ SEARCH_FIND | SEARCH_UPDATE is used in write/update/delete
+ operations with a comment like "Not real duplicates", whatever this
+ means. From the condition above we can see that 'skip_end_space' is
+ always false for these operations. The result is that trailing space
+ counts in key comparison and hence, emtpy strings ('', string length
+ zero, but not NULL) compare less that strings starting with control
+ characters and these in turn compare less than strings starting with
+ blanks.
+
+ When estimating the number of records in a key range, we request an
+ exact search for the minimum key. This translates into a plain
+ SEARCH_FIND flag. Using this alone would lead to a 'skip_end_space'
+ compare. Empty strings would be expected above control characters.
+ Their keys would not be found because they are located below control
+ characters.
+
+ This is the reason that we add the SEARCH_UPDATE flag here. It makes
+ the key estimation compare in the same way like key write operations
+ do. Olny so we will find the keys where they have been inserted.
+
+ Adding the flag unconditionally does not hurt as it is used in the
+ above mentioned condition only. So it can safely be used together
+ with other flags.
+ */
+ pos= _ma_search_pos(info,keyinfo, key_buff, key_len,
+ nextflag | SEARCH_SAVE_BUFF | SEARCH_UPDATE,
+ info->s->state.key_root[inx]);
+ if (pos >= 0.0)
+ {
+ DBUG_PRINT("exit",("pos: %ld",(ulong) (pos*info->state->records)));
+ DBUG_RETURN((ulong) (pos*info->state->records+0.5));
+ }
+ DBUG_RETURN(HA_POS_ERROR);
+}
+
+
+ /* This is a modified version of _ma_search */
+ /* Returns offset for key in indextable (decimal 0.0 <= x <= 1.0) */
+
+static double _ma_search_pos(register MARIA_HA *info,
+ register MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_len, uint nextflag,
+ register my_off_t pos)
+{
+ int flag;
+ uint nod_flag,keynr,max_keynr;
+ my_bool after_key;
+ uchar *keypos, *buff;
+ double offset;
+ DBUG_ENTER("_ma_search_pos");
+ LINT_INIT(max_keynr);
+
+ if (pos == HA_OFFSET_ERROR)
+ DBUG_RETURN(0.5);
+
+ if (!(buff= _ma_fetch_keypage(info,keyinfo, pos,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
+ info->buff, 1, 0)))
+ goto err;
+ flag=(*keyinfo->bin_search)(info, keyinfo, buff, key, key_len, nextflag,
+ &keypos,info->lastkey, &after_key);
+ nod_flag=_ma_test_if_nod(info->s, buff);
+ keynr= _ma_keynr(info,keyinfo,buff,keypos,&max_keynr);
+
+ if (flag)
+ {
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ DBUG_RETURN(-1); /* error */
+ /*
+ Didn't found match. keypos points at next (bigger) key
+ Try to find a smaller, better matching key.
+ Matches keynr + [0-1]
+ */
+ if (flag > 0 && ! nod_flag)
+ offset= 1.0;
+ else if ((offset= _ma_search_pos(info,keyinfo,key,key_len,nextflag,
+ _ma_kpos(nod_flag,keypos))) < 0)
+ DBUG_RETURN(offset);
+ }
+ else
+ {
+ /*
+ Found match. Keypos points at the start of the found key
+ Matches keynr+1
+ */
+ offset=1.0; /* Matches keynr+1 */
+ if ((nextflag & SEARCH_FIND) && nod_flag &&
+ ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME ||
+ key_len != USE_WHOLE_KEY))
+ {
+ /*
+ There may be identical keys in the tree. Try to match on of those.
+ Matches keynr + [0-1]
+ */
+ if ((offset= _ma_search_pos(info,keyinfo,key,key_len,SEARCH_FIND,
+ _ma_kpos(nod_flag,keypos))) < 0)
+ DBUG_RETURN(offset); /* Read error */
+ }
+ }
+ DBUG_PRINT("info",("keynr: %d offset: %g max_keynr: %d nod: %d flag: %d",
+ keynr,offset,max_keynr,nod_flag,flag));
+ DBUG_RETURN((keynr+offset)/(max_keynr+1));
+err:
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ DBUG_RETURN (-1.0);
+}
+
+
+/* Get keynummer of current key and max number of keys in nod */
+
+static uint _ma_keynr(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *page, uchar *keypos, uint *ret_max_key)
+{
+ uint nod_flag, used_length, keynr, max_key;
+ uchar t_buff[HA_MAX_KEY_BUFF],*end;
+
+ _ma_get_used_and_nod(info->s, page, used_length, nod_flag);
+ end= page+ used_length;
+ page+= info->s->keypage_header + nod_flag;
+
+ if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ *ret_max_key= (uint) (end-page)/(keyinfo->keylength+nod_flag);
+ return (uint) (keypos-page)/(keyinfo->keylength+nod_flag);
+ }
+
+ max_key=keynr=0;
+ t_buff[0]=0; /* Safety */
+ while (page < end)
+ {
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff))
+ return 0; /* Error */
+ max_key++;
+ if (page == keypos)
+ keynr=max_key;
+ }
+ *ret_max_key=max_key;
+ return(keynr);
+}
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
new file mode 100644
index 00000000000..bc594127296
--- /dev/null
+++ b/storage/maria/ma_recovery.c
@@ -0,0 +1,3159 @@
+/* Copyright (C) 2006, 2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3072 Maria recovery
+ First version written by Guilhem Bichot on 2006-04-27.
+*/
+
+/* Here is the implementation of this module */
+
+#include "maria_def.h"
+#include "ma_recovery.h"
+#include "ma_blockrec.h"
+#include "ma_checkpoint.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+
+struct st_trn_for_recovery /* used only in the REDO phase */
+{
+ LSN group_start_lsn, undo_lsn, first_undo_lsn;
+ TrID long_trid;
+};
+struct st_dirty_page /* used only in the REDO phase */
+{
+ uint64 file_and_page_id;
+ LSN rec_lsn;
+};
+struct st_table_for_recovery /* used in the REDO and UNDO phase */
+{
+ MARIA_HA *info;
+ File org_kfile, org_dfile; /**< OS descriptors when Checkpoint saw table */
+};
+/* Variables used by all functions of this module. Ok as single-threaded */
+static struct st_trn_for_recovery *all_active_trans;
+static struct st_table_for_recovery *all_tables;
+static HASH all_dirty_pages;
+static struct st_dirty_page *dirty_pages_pool;
+static LSN current_group_end_lsn,
+ checkpoint_start= LSN_IMPOSSIBLE;
+#ifndef DBUG_OFF
+/** Current group of REDOs is about this table and only this one */
+static MARIA_HA *current_group_table;
+#endif
+static TrID max_long_trid= 0; /**< max long trid seen by REDO phase */
+static FILE *tracef; /**< trace file for debugging */
+static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */
+/** @brief to avoid writing a checkpoint if recovery did nothing. */
+static my_bool checkpoint_useful;
+static my_bool procent_printed;
+static ulonglong now; /**< for tracking execution time of phases */
+uint warnings; /**< count of warnings */
+
+#define prototype_redo_exec_hook(R) \
+ static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec)
+
+#define prototype_redo_exec_hook_dummy(R) \
+ static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec \
+ __attribute__ ((unused)))
+
+#define prototype_undo_exec_hook(R) \
+ static int exec_UNDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec, TRN *trn)
+
+prototype_redo_exec_hook(LONG_TRANSACTION_ID);
+prototype_redo_exec_hook_dummy(CHECKPOINT);
+prototype_redo_exec_hook(REDO_CREATE_TABLE);
+prototype_redo_exec_hook(REDO_RENAME_TABLE);
+prototype_redo_exec_hook(REDO_REPAIR_TABLE);
+prototype_redo_exec_hook(REDO_DROP_TABLE);
+prototype_redo_exec_hook(FILE_ID);
+prototype_redo_exec_hook(INCOMPLETE_LOG);
+prototype_redo_exec_hook_dummy(INCOMPLETE_GROUP);
+prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD);
+prototype_redo_exec_hook(REDO_INSERT_ROW_TAIL);
+prototype_redo_exec_hook(REDO_INSERT_ROW_BLOBS);
+prototype_redo_exec_hook(REDO_PURGE_ROW_HEAD);
+prototype_redo_exec_hook(REDO_PURGE_ROW_TAIL);
+prototype_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL);
+prototype_redo_exec_hook(REDO_FREE_BLOCKS);
+prototype_redo_exec_hook(REDO_DELETE_ALL);
+prototype_redo_exec_hook(REDO_INDEX);
+prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE);
+prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE);
+prototype_redo_exec_hook(UNDO_ROW_INSERT);
+prototype_redo_exec_hook(UNDO_ROW_DELETE);
+prototype_redo_exec_hook(UNDO_ROW_UPDATE);
+prototype_redo_exec_hook(UNDO_KEY_INSERT);
+prototype_redo_exec_hook(UNDO_KEY_DELETE);
+prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
+prototype_redo_exec_hook(COMMIT);
+prototype_redo_exec_hook(CLR_END);
+prototype_undo_exec_hook(UNDO_ROW_INSERT);
+prototype_undo_exec_hook(UNDO_ROW_DELETE);
+prototype_undo_exec_hook(UNDO_ROW_UPDATE);
+prototype_undo_exec_hook(UNDO_KEY_INSERT);
+prototype_undo_exec_hook(UNDO_KEY_DELETE);
+prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
+
+static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply);
+static uint end_of_redo_phase(my_bool prepare_for_undo_phase);
+static int run_undo_phase(uint uncommitted);
+static void display_record_position(const LOG_DESC *log_desc,
+ const TRANSLOG_HEADER_BUFFER *rec,
+ uint number);
+static int display_and_apply_record(const LOG_DESC *log_desc,
+ const TRANSLOG_HEADER_BUFFER *rec);
+static MARIA_HA *get_MARIA_HA_from_REDO_record(const
+ TRANSLOG_HEADER_BUFFER *rec);
+static MARIA_HA *get_MARIA_HA_from_UNDO_record(const
+ TRANSLOG_HEADER_BUFFER *rec);
+static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon);
+static LSN parse_checkpoint_record(LSN lsn);
+static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn,
+ LSN first_undo_lsn);
+static int new_table(uint16 sid, const char *name,
+ File org_kfile, File org_dfile,
+ LSN lsn_of_file_id);
+static int new_page(File fileid, pgcache_page_no_t pageid, LSN rec_lsn,
+ struct st_dirty_page *dirty_page);
+static int close_all_tables(void);
+static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr);
+static void print_redo_phase_progress(TRANSLOG_ADDRESS addr);
+
+/** @brief global [out] buffer for translog_read_record(); never shrinks */
+static struct
+{
+ uchar *str;
+ size_t length;
+} log_record_buffer;
+static void enlarge_buffer(const TRANSLOG_HEADER_BUFFER *rec)
+{
+ if (log_record_buffer.length < rec->record_length)
+ {
+ log_record_buffer.length= rec->record_length;
+ log_record_buffer.str= my_realloc(log_record_buffer.str,
+ rec->record_length,
+ MYF(MY_WME | MY_ALLOW_ZERO_PTR));
+ }
+}
+/** @brief Tells what kind of progress message was printed to the error log */
+static enum recovery_message_type
+{
+ REC_MSG_NONE= 0, REC_MSG_REDO, REC_MSG_UNDO, REC_MSG_FLUSH
+} recovery_message_printed;
+/** @brief Prints to a trace file if it is not NULL */
+void tprint(FILE *trace_file, const char *format, ...)
+ ATTRIBUTE_FORMAT(printf, 2, 3);
+void tprint(FILE *trace_file __attribute__ ((unused)),
+ const char *format __attribute__ ((unused)), ...)
+{
+ va_list args;
+ va_start(args, format);
+ DBUG_PRINT("info", ("%s", format));
+ if (trace_file != NULL)
+ {
+ if (procent_printed)
+ {
+ procent_printed= 0;
+ fputc('\n', trace_file ? trace_file : stderr);
+ }
+ vfprintf(trace_file, format, args);
+ }
+ va_end(args);
+}
+
+void eprint(FILE *trace_file, const char *format, ...)
+ ATTRIBUTE_FORMAT(printf, 2, 3);
+
+void eprint(FILE *trace_file __attribute__ ((unused)),
+ const char *format __attribute__ ((unused)), ...)
+{
+ va_list args;
+ va_start(args, format);
+ DBUG_PRINT("error", ("%s", format));
+ if (procent_printed)
+ {
+ /* In silent mode, print on another line than the 0% 10% 20% line */
+ procent_printed= 0;
+ fputc('\n', trace_file ? trace_file : stderr);
+ }
+ vfprintf(trace_file ? trace_file : stderr, format, args);
+ va_end(args);
+}
+
+
+#define ALERT_USER() DBUG_ASSERT(0)
+
+static void print_preamble()
+{
+ ma_message_no_user(ME_JUST_INFO, "starting recovery");
+}
+
+
+/**
+ @brief Recovers from the last checkpoint.
+
+ Runs the REDO phase using special structures, then sets up the playground
+ of runtime: recreates transactions inside trnman, open tables with their
+ two-byte-id mapping; takes a checkpoint and runs the UNDO phase. Closes all
+ tables.
+
+ @return Operation status
+ @retval 0 OK
+ @retval !=0 Error
+*/
+
+int maria_recover(void)
+{
+ int res= 1;
+ FILE *trace_file;
+ uint warnings_count;
+ DBUG_ENTER("maria_recover");
+
+ DBUG_ASSERT(!maria_in_recovery);
+ maria_in_recovery= TRUE;
+
+#ifdef EXTRA_DEBUG
+ trace_file= fopen("maria_recovery.trace", "a+");
+#else
+ trace_file= NULL; /* no trace file for being fast */
+#endif
+ tprint(trace_file, "TRACE of the last MARIA recovery from mysqld\n");
+ DBUG_ASSERT(maria_pagecache->inited);
+ res= maria_apply_log(LSN_IMPOSSIBLE, MARIA_LOG_APPLY, trace_file,
+ TRUE, TRUE, TRUE, &warnings_count);
+ if (!res)
+ {
+ if (warnings_count == 0)
+ tprint(trace_file, "SUCCESS\n");
+ else
+ {
+ tprint(trace_file, "DOUBTFUL (%u warnings, check previous output)\n",
+ warnings_count);
+ /*
+ We asked for execution of UNDOs, and skipped DDLs, so shouldn't get
+ any warnings.
+ */
+ DBUG_ASSERT(0);
+ }
+ }
+ if (trace_file)
+ fclose(trace_file);
+ maria_in_recovery= FALSE;
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Displays and/or applies the log
+
+ @param from_lsn LSN from which log reading/applying should start;
+ LSN_IMPOSSIBLE means "use last checkpoint"
+ @param apply how log records should be applied or not
+ @param trace_file trace file where progress/debug messages will go
+ @param skip_DDLs_arg Should DDL records (CREATE/RENAME/DROP/REPAIR)
+ be skipped by the REDO phase or not
+ @param take_checkpoints Should we take checkpoints or not.
+ @param[out] warnings_count Count of warnings will be put there
+
+ @todo This trace_file thing is primitive; soon we will make it similar to
+ ma_check_print_warning() etc, and a successful recovery does not need to
+ create a trace file. But for debugging now it is useful.
+
+ @return Operation status
+ @retval 0 OK
+ @retval !=0 Error
+*/
+
+int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
+ FILE *trace_file,
+ my_bool should_run_undo_phase, my_bool skip_DDLs_arg,
+ my_bool take_checkpoints, uint *warnings_count)
+{
+ int error= 0;
+ uint uncommitted_trans;
+ ulonglong old_now;
+ DBUG_ENTER("maria_apply_log");
+
+ DBUG_ASSERT(apply == MARIA_LOG_APPLY || !should_run_undo_phase);
+ DBUG_ASSERT(!maria_multi_threaded);
+ warnings= 0;
+ /* checkpoints can happen only if TRNs have been built */
+ DBUG_ASSERT(should_run_undo_phase || !take_checkpoints);
+ all_active_trans= (struct st_trn_for_recovery *)
+ my_malloc((SHORT_TRID_MAX + 1) * sizeof(struct st_trn_for_recovery),
+ MYF(MY_ZEROFILL));
+ all_tables= (struct st_table_for_recovery *)
+ my_malloc((SHARE_ID_MAX + 1) * sizeof(struct st_table_for_recovery),
+ MYF(MY_ZEROFILL));
+ if (!all_active_trans || !all_tables)
+ goto err;
+
+ if (take_checkpoints && ma_checkpoint_init(0))
+ goto err;
+
+ recovery_message_printed= REC_MSG_NONE;
+ tracef= trace_file;
+ skip_DDLs= skip_DDLs_arg;
+
+ if (from_lsn == LSN_IMPOSSIBLE)
+ {
+ if (last_checkpoint_lsn == LSN_IMPOSSIBLE)
+ {
+ from_lsn= translog_first_theoretical_lsn();
+ /*
+ as far as we have not yet any checkpoint then the very first
+ log file should be present.
+ */
+ if (unlikely((from_lsn == LSN_IMPOSSIBLE) ||
+ (from_lsn == LSN_ERROR)))
+ goto err;
+ }
+ else
+ {
+ from_lsn= parse_checkpoint_record(last_checkpoint_lsn);
+ if (from_lsn == LSN_ERROR)
+ goto err;
+ }
+ }
+
+ now= my_getsystime();
+ if (run_redo_phase(from_lsn, apply))
+ {
+ ma_message_no_user(0, "Redo phase failed");
+ goto err;
+ }
+
+ if ((uncommitted_trans=
+ end_of_redo_phase(should_run_undo_phase)) == (uint)-1)
+ {
+ ma_message_no_user(0, "End of redo phase failed");
+ goto err;
+ }
+
+ old_now= now;
+ now= my_getsystime();
+ if (recovery_message_printed == REC_MSG_REDO)
+ {
+ float phase_took= (now - old_now)/10000000.0;
+ /*
+ Detailed progress info goes to stderr, because ma_message_no_user()
+ cannot put several messages on one line.
+ */
+ procent_printed= 1;
+ fprintf(stderr, " (%.1f seconds); ", phase_took);
+ }
+
+ /**
+ REDO phase does not fill blocks' rec_lsn, so a checkpoint now would be
+ wrong: if a future recovery used it, the REDO phase would always
+ start from the checkpoint and never from before, wrongly skipping REDOs
+ (tested). Another problem is that the REDO phase uses
+ PAGECACHE_PLAIN_PAGE, while Checkpoint only collects PAGECACHE_LSN_PAGE.
+
+ @todo fix this. pagecache_write() now can have a rec_lsn argument. And we
+ could make a function which goes through pages at end of REDO phase and
+ changes their type.
+ */
+#ifdef FIX_AND_ENABLE_LATER
+ if (take_checkpoints && checkpoint_useful)
+ {
+ /*
+ We take a checkpoint as it can save future recovery work if we crash
+ during the UNDO phase. But we don't flush pages, as UNDOs will change
+ them again probably.
+ */
+ if (ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE))
+ goto err;
+ }
+#endif
+
+ if (should_run_undo_phase)
+ {
+ if (run_undo_phase(uncommitted_trans))
+ {
+ ma_message_no_user(0, "Undo phase failed");
+ goto err;
+ }
+ }
+ else if (uncommitted_trans > 0)
+ {
+ tprint(tracef, "***WARNING: %u uncommitted transactions; some tables may"
+ " be left inconsistent!***\n", uncommitted_trans);
+ warnings++;
+ }
+
+ old_now= now;
+ now= my_getsystime();
+ if (recovery_message_printed == REC_MSG_UNDO)
+ {
+ float phase_took= (now - old_now)/10000000.0;
+ procent_printed= 1;
+ fprintf(stderr, " (%.1f seconds); ", phase_took);
+ }
+
+ /*
+ we don't use maria_panic() because it would maria_end(), and Recovery does
+ not want that (we want to keep some modules initialized for runtime).
+ */
+ if (close_all_tables())
+ {
+ ma_message_no_user(0, "closing of tables failed");
+ goto err;
+ }
+
+ old_now= now;
+ now= my_getsystime();
+ if (recovery_message_printed == REC_MSG_FLUSH)
+ {
+ float phase_took= (now - old_now)/10000000.0;
+ procent_printed= 1;
+ fprintf(stderr, " (%.1f seconds); ", phase_took);
+ }
+
+ if (take_checkpoints && checkpoint_useful)
+ {
+ /* No dirty pages, all tables are closed, no active transactions, save: */
+ if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE))
+ goto err;
+ }
+
+ goto end;
+err:
+ error= 1;
+ tprint(tracef, "\nRecovery of tables with transaction logs FAILED\n");
+end:
+ hash_free(&all_dirty_pages);
+ bzero(&all_dirty_pages, sizeof(all_dirty_pages));
+ my_free(dirty_pages_pool, MYF(MY_ALLOW_ZERO_PTR));
+ dirty_pages_pool= NULL;
+ my_free(all_tables, MYF(MY_ALLOW_ZERO_PTR));
+ all_tables= NULL;
+ my_free(all_active_trans, MYF(MY_ALLOW_ZERO_PTR));
+ all_active_trans= NULL;
+ my_free(log_record_buffer.str, MYF(MY_ALLOW_ZERO_PTR));
+ log_record_buffer.str= NULL;
+ log_record_buffer.length= 0;
+ ma_checkpoint_end();
+ *warnings_count= warnings;
+ if (recovery_message_printed != REC_MSG_NONE)
+ {
+ fprintf(stderr, "\n");
+ if (!error)
+ ma_message_no_user(ME_JUST_INFO, "recovery done");
+ }
+ if (error)
+ my_message(HA_ERR_INITIALIZATION,
+ "Maria recovery failed. Please run maria_chk -r on all maria "
+ "tables and delete all maria_log.######## files", MYF(0));
+ procent_printed= 0;
+ /* we don't cleanly close tables if we hit some error (may corrupt them) */
+ DBUG_RETURN(error);
+}
+
+
+/* very basic info about the record's header */
+static void display_record_position(const LOG_DESC *log_desc,
+ const TRANSLOG_HEADER_BUFFER *rec,
+ uint number)
+{
+ /*
+ if number==0, we're going over records which we had already seen and which
+ form a group, so we indent below the group's end record
+ */
+ tprint(tracef,
+ "%sRec#%u LSN (%lu,0x%lx) short_trid %u %s(num_type:%u) len %lu\n",
+ number ? "" : " ", number, LSN_IN_PARTS(rec->lsn),
+ rec->short_trid, log_desc->name, rec->type,
+ (ulong)rec->record_length);
+}
+
+
+static int display_and_apply_record(const LOG_DESC *log_desc,
+ const TRANSLOG_HEADER_BUFFER *rec)
+{
+ int error;
+ if (log_desc->record_execute_in_redo_phase == NULL)
+ {
+ /* die on all not-yet-handled records :) */
+ DBUG_ASSERT("one more hook" == "to write");
+ return 1;
+ }
+ if ((error= (*log_desc->record_execute_in_redo_phase)(rec)))
+ eprint(tracef, "Got error %d when executing record\n", my_errno);
+ return error;
+}
+
+
+prototype_redo_exec_hook(LONG_TRANSACTION_ID)
+{
+ uint16 sid= rec->short_trid;
+ TrID long_trid= all_active_trans[sid].long_trid;
+ /*
+ Any incomplete group should be of an old crash which already had a
+ recovery and thus has logged INCOMPLETE_GROUP which we must have seen.
+ */
+ DBUG_ASSERT(all_active_trans[sid].group_start_lsn == LSN_IMPOSSIBLE);
+ if (long_trid != 0)
+ {
+ LSN ulsn= all_active_trans[sid].undo_lsn;
+ /*
+ If the first record of that transaction is after 'rec', it's probably
+ because that transaction was found in the checkpoint record, and then
+ it's ok, we can forget about that transaction (we'll meet it later
+ again in the REDO phase) and replace it with the one in 'rec'.
+ */
+ if ((ulsn != LSN_IMPOSSIBLE) &&
+ (cmp_translog_addr(ulsn, rec->lsn) < 0))
+ {
+ char llbuf[22];
+ llstr(long_trid, llbuf);
+ eprint(tracef, "Found an old transaction long_trid %s short_trid %u"
+ " with same short id as this new transaction, and has neither"
+ " committed nor rollback (undo_lsn: (%lu,0x%lx))\n",
+ llbuf, sid, LSN_IN_PARTS(ulsn));
+ goto err;
+ }
+ }
+ long_trid= uint6korr(rec->header);
+ new_transaction(sid, long_trid, LSN_IMPOSSIBLE, LSN_IMPOSSIBLE);
+ goto end;
+err:
+ ALERT_USER();
+ return 1;
+end:
+ return 0;
+}
+
+
+static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn,
+ LSN first_undo_lsn)
+{
+ char llbuf[22];
+ all_active_trans[sid].long_trid= long_id;
+ llstr(long_id, llbuf);
+ tprint(tracef, "Transaction long_trid %s short_trid %u starts\n",
+ llbuf, sid);
+ all_active_trans[sid].undo_lsn= undo_lsn;
+ all_active_trans[sid].first_undo_lsn= first_undo_lsn;
+ set_if_bigger(max_long_trid, long_id);
+}
+
+
+prototype_redo_exec_hook_dummy(CHECKPOINT)
+{
+ /* the only checkpoint we care about was found via control file, ignore */
+ return 0;
+}
+
+
+prototype_redo_exec_hook_dummy(INCOMPLETE_GROUP)
+{
+ /* abortion was already made */
+ return 0;
+}
+
+prototype_redo_exec_hook(INCOMPLETE_LOG)
+{
+ MARIA_HA *info;
+ if (skip_DDLs)
+ {
+ tprint(tracef, "we skip DDLs\n");
+ return 0;
+ }
+ if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL)
+ {
+ /* no such table, don't need to warn */
+ return 0;
+ }
+ /*
+ Example of what can go wrong when replaying DDLs:
+ CREATE TABLE t (logged); INSERT INTO t VALUES(1) (logged);
+ ALTER TABLE t ... which does
+ CREATE a temporary table #sql... (logged)
+ INSERT data from t into #sql... (not logged)
+ RENAME #sql TO t (logged)
+ Removing tables by hand and replaying the log will leave in the
+ end an empty table "t": missing records. If after the RENAME an INSERT
+ into t was done, that row had number 1 in its page, executing the
+ REDO_INSERT_ROW_HEAD on the recreated empty t will fail (assertion
+ failure in _ma_apply_redo_insert_row_head_or_tail(): new data page is
+ created whereas rownr is not 0).
+ So when the server disables logging for ALTER TABLE or CREATE SELECT, it
+ logs LOGREC_INCOMPLETE_LOG to warn maria_read_log and then the user.
+
+ Another issue is that replaying of DDLs is not correct enough to work if
+ there was a crash during a DDL (see comment in execution of
+ REDO_RENAME_TABLE ).
+ */
+ tprint(tracef, "***WARNING: MySQL server currently logs no records"
+ " about insertion of data by ALTER TABLE and CREATE SELECT,"
+ " as they are not necessary for recovery;"
+ " present applying of log records may well not work.***\n");
+ warnings++;
+ return 0;
+}
+
+
+prototype_redo_exec_hook(REDO_CREATE_TABLE)
+{
+ File dfile= -1, kfile= -1;
+ char *linkname_ptr, filename[FN_REFLEN], *name, *ptr, *data_file_name,
+ *index_file_name;
+ uchar *kfile_header;
+ myf create_flag;
+ uint flags;
+ int error= 1, create_mode= O_RDWR | O_TRUNC;
+ MARIA_HA *info= NULL;
+ uint kfile_size_before_extension, keystart;
+
+ if (skip_DDLs)
+ {
+ tprint(tracef, "we skip DDLs\n");
+ return 0;
+ }
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+ name= (char *)log_record_buffer.str;
+ /*
+ TRUNCATE TABLE and REPAIR USE_FRM call maria_create(), so below we can
+ find a REDO_CREATE_TABLE for a table which we have open, that's why we
+ need to look for any open instances and close them first.
+ */
+ if (close_one_table(name, rec->lsn))
+ {
+ eprint(tracef, "Table '%s' got error %d on close\n", name, my_errno);
+ ALERT_USER();
+ goto end;
+ }
+ /* we try hard to get create_rename_lsn, to avoid mistakes if possible */
+ info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR);
+ if (info)
+ {
+ MARIA_SHARE *share= info->s;
+ /* check that we're not already using it */
+ if (share->reopen != 1)
+ {
+ eprint(tracef, "Table '%s is already open (reopen=%u)\n",
+ name, share->reopen);
+ ALERT_USER();
+ goto end;
+ }
+ DBUG_ASSERT(share->now_transactional == share->base.born_transactional);
+ if (!share->base.born_transactional)
+ {
+ /*
+ could be that transactional table was later dropped, and a non-trans
+ one was renamed to its name, thus create_rename_lsn is 0 and should
+ not be trusted.
+ */
+ tprint(tracef, "Table '%s' is not transactional, ignoring creation\n",
+ name);
+ ALERT_USER();
+ error= 0;
+ goto end;
+ }
+ if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0)
+ {
+ tprint(tracef, "Table '%s' has create_rename_lsn (%lu,0x%lx) more "
+ "recent than record, ignoring creation",
+ name, LSN_IN_PARTS(share->state.create_rename_lsn));
+ error= 0;
+ goto end;
+ }
+ if (maria_is_crashed(info))
+ {
+ eprint(tracef, "Table '%s' is crashed, can't recreate it\n", name);
+ ALERT_USER();
+ goto end;
+ }
+ maria_close(info);
+ info= NULL;
+ }
+ else /* one or two files absent, or header corrupted... */
+ tprint(tracef, "Table '%s' can't be opened, probably does not exist\n",
+ name);
+ /* if does not exist, or is older, overwrite it */
+ ptr= name + strlen(name) + 1;
+ if ((flags= ptr[0] ? HA_DONT_TOUCH_DATA : 0))
+ tprint(tracef, ", we will only touch index file");
+ ptr++;
+ kfile_size_before_extension= uint2korr(ptr);
+ ptr+= 2;
+ keystart= uint2korr(ptr);
+ ptr+= 2;
+ kfile_header= (uchar *)ptr;
+ ptr+= kfile_size_before_extension;
+ /* set create_rename_lsn (for maria_read_log to be idempotent) */
+ lsn_store(kfile_header + sizeof(info->s->state.header) + 2, rec->lsn);
+ /* we also set is_of_horizon, like maria_create() does */
+ lsn_store(kfile_header + sizeof(info->s->state.header) + 2 + LSN_STORE_SIZE,
+ rec->lsn);
+ data_file_name= ptr;
+ ptr+= strlen(data_file_name) + 1;
+ index_file_name= ptr;
+ ptr+= strlen(index_file_name) + 1;
+ /** @todo handle symlinks */
+ if (data_file_name[0] || index_file_name[0])
+ {
+ eprint(tracef, "Table '%s' DATA|INDEX DIRECTORY clauses are not handled\n",
+ name);
+ goto end;
+ }
+ fn_format(filename, name, "", MARIA_NAME_IEXT,
+ (MY_UNPACK_FILENAME |
+ (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) |
+ MY_APPEND_EXT);
+ linkname_ptr= NULL;
+ create_flag= MY_DELETE_OLD;
+ tprint(tracef, "Table '%s' creating as '%s'", name, filename);
+ if ((kfile= my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
+ MYF(MY_WME|create_flag))) < 0)
+ {
+ eprint(tracef, "Failed to create index file\n");
+ goto end;
+ }
+ if (my_pwrite(kfile, kfile_header,
+ kfile_size_before_extension, 0, MYF(MY_NABP|MY_WME)) ||
+ my_chsize(kfile, keystart, 0, MYF(MY_WME)))
+ {
+ eprint(tracef, "Failed to write to index file\n");
+ goto end;
+ }
+ if (!(flags & HA_DONT_TOUCH_DATA))
+ {
+ fn_format(filename,name,"", MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ linkname_ptr= NULL;
+ create_flag=MY_DELETE_OLD;
+ if (((dfile=
+ my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
+ MYF(MY_WME | create_flag))) < 0) ||
+ my_close(dfile, MYF(MY_WME)))
+ {
+ eprint(tracef, "Failed to create data file\n");
+ goto end;
+ }
+ /*
+ we now have an empty data file. To be able to
+ _ma_initialize_data_file() we need some pieces of the share to be
+ correctly filled. So we just open the table (fortunately, an empty
+ data file does not preclude this).
+ */
+ if (((info= maria_open(name, O_RDONLY, 0)) == NULL) ||
+ _ma_initialize_data_file(info->s, info->dfile.file))
+ {
+ eprint(tracef, "Failed to open new table or write to data file\n");
+ goto end;
+ }
+ }
+ error= 0;
+end:
+ tprint(tracef, "\n");
+ if (kfile >= 0)
+ error|= my_close(kfile, MYF(MY_WME));
+ if (info != NULL)
+ error|= maria_close(info);
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_RENAME_TABLE)
+{
+ char *old_name, *new_name;
+ int error= 1;
+ MARIA_HA *info= NULL;
+ if (skip_DDLs)
+ {
+ tprint(tracef, "we skip DDLs\n");
+ return 0;
+ }
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+ old_name= (char *)log_record_buffer.str;
+ new_name= old_name + strlen(old_name) + 1;
+ tprint(tracef, "Table '%s' to rename to '%s'; old-name table ", old_name,
+ new_name);
+ /*
+ Here is why we skip CREATE/DROP/RENAME when doing a recovery from
+ ha_maria (whereas we do when called from maria_read_log). Consider:
+ CREATE TABLE t;
+ RENAME TABLE t to u;
+ DROP TABLE u;
+ RENAME TABLE v to u; # crash between index rename and data rename.
+ And do a Recovery (not removing tables beforehand).
+ Recovery replays CREATE, then RENAME: the maria_open("t") works,
+ maria_open("u") does not (no data file) so table "u" is considered
+ inexistent and so maria_rename() is done which overwrites u's index file,
+ which is lost. Ok, the data file (v.MAD) is still available, but only a
+ REPAIR USE_FRM can rebuild the index, which is unsafe and downtime.
+ So it is preferrable to not execute RENAME, and leave the "mess" of files,
+ rather than possibly destroy a file. DBA will manually rename files.
+ A safe recovery method would probably require checking the existence of
+ the index file and of the data file separately (not via maria_open()), and
+ maybe also to store a create_rename_lsn in the data file too
+ For now, all we risk is to leave the mess (half-renamed files) left by the
+ crash. We however sync files and directories at each file rename. The SQL
+ layer is anyway not crash-safe for DDLs (except the repartioning-related
+ ones).
+ We replay DDLs in maria_read_log to be able to recreate tables from
+ scratch. It means that "maria_read_log -a" should not be used on a
+ database which just crashed during a DDL. And also ALTER TABLE does not
+ log insertions of records into the temporary table, so replaying may
+ fail (grep for INCOMPLETE_LOG in files).
+ */
+ info= maria_open(old_name, O_RDONLY, HA_OPEN_FOR_REPAIR);
+ if (info)
+ {
+ MARIA_SHARE *share= info->s;
+ if (!share->base.born_transactional)
+ {
+ tprint(tracef, ", is not transactional, ignoring renaming\n");
+ ALERT_USER();
+ error= 0;
+ goto end;
+ }
+ if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0)
+ {
+ tprint(tracef, ", has create_rename_lsn (%lu,0x%lx) more recent than"
+ " record, ignoring renaming",
+ LSN_IN_PARTS(share->state.create_rename_lsn));
+ error= 0;
+ goto end;
+ }
+ if (maria_is_crashed(info))
+ {
+ tprint(tracef, ", is crashed, can't rename it");
+ ALERT_USER();
+ goto end;
+ }
+ if (close_one_table(info->s->open_file_name, rec->lsn) ||
+ maria_close(info))
+ goto end;
+ info= NULL;
+ tprint(tracef, ", is ok for renaming; new-name table ");
+ }
+ else /* one or two files absent, or header corrupted... */
+ {
+ tprint(tracef, ", can't be opened, probably does not exist");
+ error= 0;
+ goto end;
+ }
+ /*
+ We must also check the create_rename_lsn of the 'new_name' table if it
+ exists: otherwise we may, with our rename which overwrites, destroy
+ another table. For example:
+ CREATE TABLE t;
+ RENAME t to u;
+ DROP TABLE u;
+ RENAME v to u; # v is an old table, its creation/insertions not in log
+ And start executing the log (without removing tables beforehand): creates
+ t, renames it to u (if not testing create_rename_lsn) thus overwriting
+ old-named v, drops u, and we are stuck, we have lost data.
+ */
+ info= maria_open(new_name, O_RDONLY, HA_OPEN_FOR_REPAIR);
+ if (info)
+ {
+ MARIA_SHARE *share= info->s;
+ /* We should not have open instances on this table. */
+ if (share->reopen != 1)
+ {
+ tprint(tracef, ", is already open (reopen=%u)\n", share->reopen);
+ ALERT_USER();
+ goto end;
+ }
+ if (!share->base.born_transactional)
+ {
+ tprint(tracef, ", is not transactional, ignoring renaming\n");
+ ALERT_USER();
+ goto drop;
+ }
+ if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0)
+ {
+ tprint(tracef, ", has create_rename_lsn (%lu,0x%lx) more recent than"
+ " record, ignoring renaming",
+ LSN_IN_PARTS(share->state.create_rename_lsn));
+ /*
+ We have to drop the old_name table. Consider:
+ CREATE TABLE t;
+ CREATE TABLE v;
+ RENAME TABLE t to u;
+ DROP TABLE u;
+ RENAME TABLE v to u;
+ and apply the log without removing tables beforehand. t will be
+ created, v too; in REDO_RENAME u will be more recent, but we still
+ have to drop t otherwise it stays.
+ */
+ goto drop;
+ }
+ if (maria_is_crashed(info))
+ {
+ tprint(tracef, ", is crashed, can't rename it");
+ ALERT_USER();
+ goto end;
+ }
+ if (maria_close(info))
+ goto end;
+ info= NULL;
+ /* abnormal situation */
+ tprint(tracef, ", exists but is older than record, can't rename it");
+ goto end;
+ }
+ else /* one or two files absent, or header corrupted... */
+ tprint(tracef, ", can't be opened, probably does not exist");
+ tprint(tracef, ", renaming '%s'", old_name);
+ if (maria_rename(old_name, new_name))
+ {
+ eprint(tracef, "Failed to rename table\n");
+ goto end;
+ }
+ info= maria_open(new_name, O_RDONLY, 0);
+ if (info == NULL)
+ {
+ eprint(tracef, "Failed to open renamed table\n");
+ goto end;
+ }
+ if (_ma_update_create_rename_lsn(info->s, rec->lsn, TRUE))
+ goto end;
+ if (maria_close(info))
+ goto end;
+ info= NULL;
+ error= 0;
+ goto end;
+drop:
+ tprint(tracef, ", only dropping '%s'", old_name);
+ if (maria_delete_table(old_name))
+ {
+ eprint(tracef, "Failed to drop table\n");
+ goto end;
+ }
+ error= 0;
+ goto end;
+end:
+ tprint(tracef, "\n");
+ if (info != NULL)
+ error|= maria_close(info);
+ return error;
+}
+
+
+/*
+ The record may come from REPAIR, ALTER TABLE ENABLE KEYS, OPTIMIZE.
+*/
+prototype_redo_exec_hook(REDO_REPAIR_TABLE)
+{
+ int error= 1;
+ MARIA_HA *info;
+ HA_CHECK param;
+ char *name;
+ uint quick_repair;
+ DBUG_ENTER("exec_REDO_LOGREC_REDO_REPAIR_TABLE");
+
+ if (skip_DDLs)
+ {
+ /*
+ REPAIR is not exactly a DDL, but it manipulates files without logging
+ insertions into them.
+ */
+ tprint(tracef, "we skip DDLs\n");
+ DBUG_RETURN(0);
+ }
+ if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL)
+ DBUG_RETURN(0);
+
+ /*
+ Otherwise, the mapping is newer than the table, and our record is newer
+ than the mapping, so we can repair.
+ */
+ tprint(tracef, " repairing...\n");
+
+ maria_chk_init(&param);
+ param.isam_file_name= name= info->s->open_file_name;
+ param.testflag= uint4korr(rec->header + FILEID_STORE_SIZE);
+ param.tmpdir= maria_tmpdir;
+ DBUG_ASSERT(maria_tmpdir);
+
+ info->s->state.key_map= uint8korr(rec->header + FILEID_STORE_SIZE + 4);
+ quick_repair= param.testflag & T_QUICK;
+
+
+ if (param.testflag & T_REP_PARALLEL)
+ {
+ if (maria_repair_parallel(&param, info, name, quick_repair))
+ goto end;
+ }
+ else if (param.testflag & T_REP_BY_SORT)
+ {
+ if (maria_repair_by_sort(&param, info, name, quick_repair))
+ goto end;
+ }
+ else if (maria_repair(&param, info, name, quick_repair))
+ goto end;
+
+ if (_ma_update_create_rename_lsn(info->s, rec->lsn, TRUE))
+ goto end;
+ error= 0;
+
+end:
+ DBUG_RETURN(error);
+}
+
+
+prototype_redo_exec_hook(REDO_DROP_TABLE)
+{
+ char *name;
+ int error= 1;
+ MARIA_HA *info;
+ if (skip_DDLs)
+ {
+ tprint(tracef, "we skip DDLs\n");
+ return 0;
+ }
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+ name= (char *)log_record_buffer.str;
+ tprint(tracef, "Table '%s'", name);
+ info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR);
+ if (info)
+ {
+ MARIA_SHARE *share= info->s;
+ if (!share->base.born_transactional)
+ {
+ tprint(tracef, ", is not transactional, ignoring removal\n");
+ ALERT_USER();
+ error= 0;
+ goto end;
+ }
+ if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0)
+ {
+ tprint(tracef, ", has create_rename_lsn (%lu,0x%lx) more recent than"
+ " record, ignoring removal",
+ LSN_IN_PARTS(share->state.create_rename_lsn));
+ error= 0;
+ goto end;
+ }
+ if (maria_is_crashed(info))
+ {
+ tprint(tracef, ", is crashed, can't drop it");
+ ALERT_USER();
+ goto end;
+ }
+ if (close_one_table(info->s->open_file_name, rec->lsn) ||
+ maria_close(info))
+ goto end;
+ info= NULL;
+ /* if it is older, or its header is corrupted, drop it */
+ tprint(tracef, ", dropping '%s'", name);
+ if (maria_delete_table(name))
+ {
+ eprint(tracef, "Failed to drop table\n");
+ goto end;
+ }
+ }
+ else /* one or two files absent, or header corrupted... */
+ tprint(tracef,", can't be opened, probably does not exist");
+ error= 0;
+end:
+ tprint(tracef, "\n");
+ if (info != NULL)
+ error|= maria_close(info);
+ return error;
+}
+
+
+prototype_redo_exec_hook(FILE_ID)
+{
+ uint16 sid;
+ int error= 1;
+ const char *name;
+ MARIA_HA *info;
+ DBUG_ENTER("exec_REDO_LOGREC_FILE_ID");
+
+ if (cmp_translog_addr(rec->lsn, checkpoint_start) < 0)
+ {
+ /*
+ If that mapping was still true at checkpoint time, it was found in
+ checkpoint record, no need to recreate it. If that mapping had ended at
+ checkpoint time (table was closed or repaired), a flush and force
+ happened and so mapping is not needed.
+ */
+ tprint(tracef, "ignoring because before checkpoint\n");
+ DBUG_RETURN(0);
+ }
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+ sid= fileid_korr(log_record_buffer.str);
+ info= all_tables[sid].info;
+ if (info != NULL)
+ {
+ tprint(tracef, " Closing table '%s'\n", info->s->open_file_name);
+ prepare_table_for_close(info, rec->lsn);
+ if (maria_close(info))
+ {
+ eprint(tracef, "Failed to close table\n");
+ goto end;
+ }
+ all_tables[sid].info= NULL;
+ }
+ name= (char *)log_record_buffer.str + FILEID_STORE_SIZE;
+ if (new_table(sid, name, -1, -1, rec->lsn))
+ goto end;
+ error= 0;
+end:
+ DBUG_RETURN(error);
+}
+
+
+static int new_table(uint16 sid, const char *name,
+ File org_kfile, File org_dfile,
+ LSN lsn_of_file_id)
+{
+ /*
+ -1 (skip table): close table and return 0;
+ 1 (error): close table and return 1;
+ 0 (success): leave table open and return 0.
+ */
+ int error= 1;
+ MARIA_HA *info;
+ MARIA_SHARE *share;
+ my_off_t dfile_len, kfile_len;
+
+ checkpoint_useful= TRUE;
+ if ((name == NULL) || (name[0] == 0))
+ {
+ /*
+ we didn't use DBUG_ASSERT() because such record corruption could
+ silently pass in the "info == NULL" test below.
+ */
+ tprint(tracef, ", record is corrupted");
+ info= NULL;
+ goto end;
+ }
+ tprint(tracef, "Table '%s', id %u", name, sid);
+ info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR);
+ if (info == NULL)
+ {
+ tprint(tracef, ", is absent (must have been dropped later?)"
+ " or its header is so corrupted that we cannot open it;"
+ " we skip it");
+ error= 0;
+ goto end;
+ }
+ if (maria_is_crashed(info))
+ {
+ /** @todo what should we do? how to continue recovery? */
+ tprint(tracef, "Table is crashed, can't apply log records to it\n");
+ goto end;
+ }
+ share= info->s;
+ /* check that we're not already using it */
+ if (share->reopen != 1)
+ {
+ tprint(tracef, ", is already open (reopen=%u)\n", share->reopen);
+ /*
+ It could be that we have in the log
+ FILE_ID(t1,10) ... (t1 was flushed) ... FILE_ID(t1,12);
+ */
+ if (close_one_table(share->open_file_name, lsn_of_file_id))
+ goto end;
+ }
+ DBUG_ASSERT(share->now_transactional == share->base.born_transactional);
+ if (!share->base.born_transactional)
+ {
+ tprint(tracef, ", is not transactional\n");
+ ALERT_USER();
+ error= -1;
+ goto end;
+ }
+ if (cmp_translog_addr(lsn_of_file_id, share->state.create_rename_lsn) <= 0)
+ {
+ tprint(tracef, ", has create_rename_lsn (%lu,0x%lx) more recent than"
+ " LOGREC_FILE_ID's LSN (%lu,0x%lx), ignoring open request",
+ LSN_IN_PARTS(share->state.create_rename_lsn),
+ LSN_IN_PARTS(lsn_of_file_id));
+ error= -1;
+ goto end;
+ }
+ /* don't log any records for this work */
+ _ma_tmp_disable_logging_for_table(info, FALSE);
+ /* _ma_unpin_all_pages() reads info->trn: */
+ info->trn= &dummy_transaction_object;
+ /* execution of some REDO records relies on data_file_length */
+ dfile_len= my_seek(info->dfile.file, 0, SEEK_END, MYF(MY_WME));
+ kfile_len= my_seek(info->s->kfile.file, 0, SEEK_END, MYF(MY_WME));
+ if ((dfile_len == MY_FILEPOS_ERROR) ||
+ (kfile_len == MY_FILEPOS_ERROR))
+ {
+ tprint(tracef, ", length unknown\n");
+ goto end;
+ }
+ if (share->state.state.data_file_length != dfile_len)
+ {
+ tprint(tracef, ", has wrong state.data_file_length (fixing it)");
+ share->state.state.data_file_length= dfile_len;
+ }
+ if (share->state.state.key_file_length != kfile_len)
+ {
+ tprint(tracef, ", has wrong state.key_file_length (fixing it)");
+ share->state.state.key_file_length= kfile_len;
+ }
+ if ((dfile_len % share->block_size) || (kfile_len % share->block_size))
+ {
+ tprint(tracef, ", has too short last page\n");
+ /* Recovery will fix this, no error */
+ ALERT_USER();
+ }
+ /*
+ This LSN serves in this situation; assume log is:
+ FILE_ID(6->"t2") REDO_INSERT(6) FILE_ID(6->"t1") CHECKPOINT(6->"t1")
+ then crash, checkpoint record is parsed and opens "t1" with id 6; assume
+ REDO phase starts from the REDO_INSERT above: it will wrongly try to
+ update a page of "t1". With this LSN below, REDO_INSERT can realize the
+ mapping is newer than itself, and not execute.
+ Same example is possible with UNDO_INSERT (update of the state).
+ */
+ info->s->lsn_of_file_id= lsn_of_file_id;
+ all_tables[sid].info= info;
+ all_tables[sid].org_kfile= org_kfile;
+ all_tables[sid].org_dfile= org_dfile;
+ /*
+ We don't set info->s->id, it would be useless (no logging in REDO phase);
+ if you change that, know that some records in REDO phase call
+ _ma_update_create_rename_lsn() which resets info->s->id.
+ */
+ tprint(tracef, ", opened");
+ error= 0;
+end:
+ tprint(tracef, "\n");
+ if (error)
+ {
+ if (info != NULL)
+ maria_close(info);
+ if (error == -1)
+ error= 0;
+ }
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD)
+{
+ int error= 1;
+ uchar *buff= NULL;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ {
+ /*
+ Table was skipped at open time (because later dropped/renamed, not
+ transactional, or create_rename_lsn newer than LOGREC_FILE_ID); it is
+ not an error.
+ */
+ return 0;
+ }
+ /*
+ If REDO's LSN is > page's LSN (read from disk), we are going to modify the
+ page and change its LSN. The normal runtime code stores the UNDO's LSN
+ into the page. Here storing the REDO's LSN (rec->lsn) would work
+ (we are not writing to the log here, so don't have to "flush up to UNDO's
+ LSN"). But in a test scenario where we do updates at runtime, then remove
+ tables, apply the log and check that this results in the same table as at
+ runtime, putting the same LSN as runtime had done will decrease
+ differences. So we use the UNDO's LSN which is current_group_end_lsn.
+ */
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL)
+ {
+ eprint(tracef, "Failed to read allocate buffer for record\n");
+ goto end;
+ }
+ if (translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+ buff= log_record_buffer.str;
+ if (_ma_apply_redo_insert_row_head_or_tail(info, current_group_end_lsn,
+ HEAD_PAGE,
+ buff + FILEID_STORE_SIZE,
+ buff +
+ FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE +
+ DIRPOS_STORE_SIZE,
+ rec->record_length -
+ (FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE +
+ DIRPOS_STORE_SIZE)))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_INSERT_ROW_TAIL)
+{
+ int error= 1;
+ uchar *buff;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+ buff= log_record_buffer.str;
+ if (_ma_apply_redo_insert_row_head_or_tail(info, current_group_end_lsn,
+ TAIL_PAGE,
+ buff + FILEID_STORE_SIZE,
+ buff +
+ FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE +
+ DIRPOS_STORE_SIZE,
+ rec->record_length -
+ (FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE +
+ DIRPOS_STORE_SIZE)))
+ goto end;
+ error= 0;
+
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_INSERT_ROW_BLOBS)
+{
+ int error= 1;
+ uchar *buff;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+ buff= log_record_buffer.str;
+ if (_ma_apply_redo_insert_row_blobs(info, current_group_end_lsn,
+ buff + FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_PURGE_ROW_HEAD)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn,
+ HEAD_PAGE,
+ rec->header + FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_PURGE_ROW_TAIL)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn,
+ TAIL_PAGE,
+ rec->header + FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_FREE_BLOCKS)
+{
+ int error= 1;
+ uchar *buff;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ enlarge_buffer(rec);
+
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+
+ buff= log_record_buffer.str;
+ if (_ma_apply_redo_free_blocks(info, current_group_end_lsn,
+ buff + FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+
+ if (_ma_apply_redo_free_head_or_tail(info, current_group_end_lsn,
+ rec->header + FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_DELETE_ALL)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ tprint(tracef, " deleting all %lu rows\n",
+ (ulong)info->s->state.state.records);
+ if (maria_delete_all_rows(info))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_INDEX)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ enlarge_buffer(rec);
+
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+
+ if (_ma_apply_redo_index(info, current_group_end_lsn,
+ log_record_buffer.str + FILEID_STORE_SIZE,
+ rec->record_length - FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ enlarge_buffer(rec);
+
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+
+ if (_ma_apply_redo_index_new_page(info, current_group_end_lsn,
+ log_record_buffer.str + FILEID_STORE_SIZE,
+ rec->record_length - FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+
+ if (_ma_apply_redo_index_free_page(info, current_group_end_lsn,
+ rec->header + FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+#define set_undo_lsn_for_active_trans(TRID, LSN) do { \
+ all_active_trans[TRID].undo_lsn= LSN; \
+ if (all_active_trans[TRID].first_undo_lsn == LSN_IMPOSSIBLE) \
+ all_active_trans[TRID].first_undo_lsn= LSN; } while (0)
+
+prototype_redo_exec_hook(UNDO_ROW_INSERT)
+{
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ return 0;
+ share= info->s;
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
+ {
+ tprint(tracef, " state has LSN (%lu,0x%lx) older than record, updating"
+ " rows' count\n", LSN_IN_PARTS(share->state.is_of_horizon));
+ share->state.state.records++;
+ if (share->calc_checksum)
+ {
+ uchar buff[HA_CHECKSUM_STORE_SIZE];
+ if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ HA_CHECKSUM_STORE_SIZE, buff, NULL) !=
+ HA_CHECKSUM_STORE_SIZE)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+ share->state.state.checksum+= ha_checksum_korr(buff);
+ }
+ info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+ }
+ tprint(tracef, " rows' count %lu\n", (ulong)info->s->state.state.records);
+ /* Unpin all pages, stamp them with UNDO's LSN */
+ _ma_unpin_all_pages(info, rec->lsn);
+ return 0;
+}
+
+
+prototype_redo_exec_hook(UNDO_ROW_DELETE)
+{
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ return 0;
+ share= info->s;
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
+ {
+ tprint(tracef, " state older than record\n");
+ share->state.state.records--;
+ if (share->calc_checksum)
+ {
+ uchar buff[HA_CHECKSUM_STORE_SIZE];
+ if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ HA_CHECKSUM_STORE_SIZE, buff, NULL) !=
+ HA_CHECKSUM_STORE_SIZE)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+ share->state.state.checksum+= ha_checksum_korr(buff);
+ }
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_ROWS;
+ }
+ tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
+ _ma_unpin_all_pages(info, rec->lsn);
+ return 0;
+}
+
+
+prototype_redo_exec_hook(UNDO_ROW_UPDATE)
+{
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+ if (info == NULL)
+ return 0;
+ share= info->s;
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
+ {
+ if (share->calc_checksum)
+ {
+ uchar buff[HA_CHECKSUM_STORE_SIZE];
+ if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
+ HA_CHECKSUM_STORE_SIZE, buff, NULL) !=
+ HA_CHECKSUM_STORE_SIZE)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+ share->state.state.checksum+= ha_checksum_korr(buff);
+ }
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+ }
+ _ma_unpin_all_pages(info, rec->lsn);
+ return 0;
+}
+
+
+prototype_redo_exec_hook(UNDO_KEY_INSERT)
+{
+ MARIA_HA *info;
+ MARIA_SHARE *share;
+ if (!(info= get_MARIA_HA_from_UNDO_record(rec)))
+ return 0;
+ share= info->s;
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
+ {
+ const uchar *ptr= rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE;
+ uint keynr= key_nr_korr(ptr);
+ if (share->base.auto_key == (keynr + 1)) /* it's auto-increment */
+ {
+ const HA_KEYSEG *keyseg= info->s->keyinfo[keynr].seg;
+ ulonglong value;
+ char llbuf[22];
+ uchar *to;
+ tprint(tracef, " state older than record\n");
+ /* we read the record to find the auto_increment value */
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+ to= log_record_buffer.str + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ KEY_NR_STORE_SIZE;
+ if (keyseg->flag & HA_SWAP_KEY)
+ {
+ /* We put key from log record to "data record" packing format... */
+ uchar reversed[HA_MAX_KEY_BUFF];
+ uchar *key_ptr= to;
+ uchar *key_end= key_ptr + keyseg->length;
+ to= reversed + keyseg->length;
+ do
+ {
+ *--to= *key_ptr++;
+ } while (key_ptr != key_end);
+ /* ... so that we can read it with: */
+ }
+ value= ma_retrieve_auto_increment(to, keyseg->type);
+ set_if_bigger(share->state.auto_increment, value);
+ llstr(share->state.auto_increment, llbuf);
+ tprint(tracef, " auto-inc %s\n", llbuf);
+ }
+ }
+ _ma_unpin_all_pages(info, rec->lsn);
+ return 0;
+}
+
+
+prototype_redo_exec_hook(UNDO_KEY_DELETE)
+{
+ MARIA_HA *info;
+ if (!(info= get_MARIA_HA_from_UNDO_record(rec)))
+ return 0;
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ _ma_unpin_all_pages(info, rec->lsn);
+ return 0;
+}
+
+
+prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT)
+{
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+ if (info == NULL)
+ return 0;
+ share= info->s;
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
+ {
+ uint key_nr;
+ my_off_t page;
+ key_nr= key_nr_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE);
+ page= page_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ KEY_NR_STORE_SIZE);
+ share->state.key_root[key_nr]= (page == IMPOSSIBLE_PAGE_NO ?
+ HA_OFFSET_ERROR :
+ page * share->block_size);
+ }
+ _ma_unpin_all_pages(info, rec->lsn);
+ return 0;
+}
+
+
+prototype_redo_exec_hook(COMMIT)
+{
+ uint16 sid= rec->short_trid;
+ TrID long_trid= all_active_trans[sid].long_trid;
+ char llbuf[22];
+ if (long_trid == 0)
+ {
+ tprint(tracef, "We don't know about transaction with short_trid %u;"
+ "it probably committed long ago, forget it\n", sid);
+ bzero(&all_active_trans[sid], sizeof(all_active_trans[sid]));
+ return 0;
+ }
+ llstr(long_trid, llbuf);
+ tprint(tracef, "Transaction long_trid %s short_trid %u committed\n",
+ llbuf, sid);
+ bzero(&all_active_trans[sid], sizeof(all_active_trans[sid]));
+#ifdef MARIA_VERSIONING
+ /*
+ if real recovery:
+ transaction was committed, move it to some separate list for later
+ purging (but don't purge now! purging may have been started before, we
+ may find REDO_PURGE records soon).
+ */
+#endif
+ return 0;
+}
+
+
+/*
+ Set position for next active record that will have key inserted
+*/
+
+static void set_lastpos(MARIA_HA *info, uchar *pos)
+{
+ ulonglong page;
+ uint dir_entry;
+
+ /* If we have checksum, it's before rowid */
+ if (info->s->calc_checksum)
+ pos+= HA_CHECKSUM_STORE_SIZE;
+ page= page_korr(pos);
+ dir_entry= dirpos_korr(pos + PAGE_STORE_SIZE);
+ info->cur_row.lastpos= ma_recordpos(page, dir_entry);
+}
+
+
+prototype_redo_exec_hook(CLR_END)
+{
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+ LSN previous_undo_lsn;
+ enum translog_record_type undone_record_type;
+ const LOG_DESC *log_desc;
+ my_bool row_entry= 0;
+ uchar *logpos;
+ DBUG_ENTER("exec_REDO_LOGREC_CLR_END");
+
+ if (info == NULL)
+ DBUG_RETURN(0);
+ share= info->s;
+ previous_undo_lsn= lsn_korr(rec->header);
+ undone_record_type=
+ clr_type_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE);
+ log_desc= &log_record_type_descriptor[undone_record_type];
+
+ set_undo_lsn_for_active_trans(rec->short_trid, previous_undo_lsn);
+ tprint(tracef, " CLR_END was about %s, undo_lsn now LSN (%lu,0x%lx)\n",
+ log_desc->name, LSN_IN_PARTS(previous_undo_lsn));
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+ logpos= (log_record_buffer.str + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ CLR_TYPE_STORE_SIZE);
+
+ if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
+ {
+ tprint(tracef, " state older than record\n");
+ switch (undone_record_type) {
+ case LOGREC_UNDO_ROW_DELETE:
+ row_entry= 1;
+ share->state.state.records++;
+ set_lastpos(info, logpos);
+ break;
+ case LOGREC_UNDO_ROW_INSERT:
+ share->state.state.records--;
+ share->state.changed|= STATE_NOT_OPTIMIZED_ROWS;
+ row_entry= 1;
+ break;
+ case LOGREC_UNDO_ROW_UPDATE:
+ row_entry= 1;
+ set_lastpos(info, logpos);
+ break;
+ case LOGREC_UNDO_KEY_INSERT:
+ case LOGREC_UNDO_KEY_DELETE:
+ break;
+ case LOGREC_UNDO_KEY_INSERT_WITH_ROOT:
+ case LOGREC_UNDO_KEY_DELETE_WITH_ROOT:
+ {
+ uint key_nr;
+ my_off_t page;
+ key_nr= key_nr_korr(logpos);
+ page= page_korr(logpos + KEY_NR_STORE_SIZE);
+ share->state.key_root[key_nr]= (page == IMPOSSIBLE_PAGE_NO ?
+ HA_OFFSET_ERROR :
+ page * share->block_size);
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+ if (row_entry && share->calc_checksum)
+ share->state.state.checksum+= ha_checksum_korr(logpos);
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+ }
+ else
+ {
+ /* We must set lastpos for upcoming undo delete keys */
+ switch (undone_record_type) {
+ case LOGREC_UNDO_ROW_DELETE:
+ case LOGREC_UNDO_ROW_UPDATE:
+ set_lastpos(info, logpos);
+ break;
+ default:
+ break;
+ }
+ }
+ if (row_entry)
+ tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
+ _ma_unpin_all_pages(info, rec->lsn);
+ DBUG_RETURN(0);
+}
+
+
+prototype_undo_exec_hook(UNDO_ROW_INSERT)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+ const uchar *record_ptr;
+
+ if (info == NULL)
+ {
+ /*
+ Unlike for REDOs, if the table was skipped it is abnormal; we have a
+ transaction to rollback which used this table, as it is not rolled back
+ it was supposed to hold this table and so the table should still be
+ there.
+ */
+ return 1;
+ }
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_ROWS;
+
+ record_ptr= rec->header;
+ if (share->calc_checksum)
+ {
+ /*
+ We need to read more of the record to put the checksum into the record
+ buffer used by _ma_apply_undo_row_insert().
+ If the table has no live checksum, rec->header will be enough.
+ */
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+ record_ptr= log_record_buffer.str;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_row_insert(info, previous_undo_lsn,
+ record_ptr + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE);
+ info->trn= 0;
+ /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */
+ tprint(tracef, " rows' count %lu\n", (ulong)info->s->state.state.records);
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+prototype_undo_exec_hook(UNDO_ROW_DELETE)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ return 1;
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ /*
+ For now we skip the page and directory entry. This is to be used
+ later when we mark rows as deleted.
+ */
+ error= _ma_apply_undo_row_delete(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE +
+ DIRPOS_STORE_SIZE,
+ rec->record_length -
+ (LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE));
+ info->trn= 0;
+ tprint(tracef, " rows' count %lu\n undo_lsn now LSN (%lu,0x%lx)\n",
+ (ulong)share->state.state.records, LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+prototype_undo_exec_hook(UNDO_ROW_UPDATE)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ return 1;
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_row_update(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE,
+ rec->record_length -
+ (LSN_STORE_SIZE + FILEID_STORE_SIZE));
+ info->trn= 0;
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+prototype_undo_exec_hook(UNDO_KEY_INSERT)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ {
+ /*
+ Unlike for REDOs, if the table was skipped it is abnormal; we have a
+ transaction to rollback which used this table, as it is not rolled back
+ it was supposed to hold this table and so the table should still be
+ there.
+ */
+ return 1;
+ }
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_key_insert(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE,
+ rec->record_length - LSN_STORE_SIZE -
+ FILEID_STORE_SIZE);
+ info->trn= 0;
+ /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+prototype_undo_exec_hook(UNDO_KEY_DELETE)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ {
+ /*
+ Unlike for REDOs, if the table was skipped it is abnormal; we have a
+ transaction to rollback which used this table, as it is not rolled back
+ it was supposed to hold this table and so the table should still be
+ there.
+ */
+ return 1;
+ }
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_key_delete(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE,
+ rec->record_length - LSN_STORE_SIZE -
+ FILEID_STORE_SIZE);
+ info->trn= 0;
+ /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ {
+ /*
+ Unlike for REDOs, if the table was skipped it is abnormal; we have a
+ transaction to rollback which used this table, as it is not rolled back
+ it was supposed to hold this table and so the table should still be
+ there.
+ */
+ return 1;
+ }
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_key_delete(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE,
+ rec->record_length - LSN_STORE_SIZE -
+ FILEID_STORE_SIZE - PAGE_STORE_SIZE);
+ info->trn= 0;
+ /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+
+static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply)
+{
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ int len;
+ uint i;
+
+ /* install hooks for execution */
+#define install_redo_exec_hook(R) \
+ log_record_type_descriptor[LOGREC_ ## R].record_execute_in_redo_phase= \
+ exec_REDO_LOGREC_ ## R;
+#define install_undo_exec_hook(R) \
+ log_record_type_descriptor[LOGREC_ ## R].record_execute_in_undo_phase= \
+ exec_UNDO_LOGREC_ ## R;
+ install_redo_exec_hook(LONG_TRANSACTION_ID);
+ install_redo_exec_hook(CHECKPOINT);
+ install_redo_exec_hook(REDO_CREATE_TABLE);
+ install_redo_exec_hook(REDO_RENAME_TABLE);
+ install_redo_exec_hook(REDO_REPAIR_TABLE);
+ install_redo_exec_hook(REDO_DROP_TABLE);
+ install_redo_exec_hook(FILE_ID);
+ install_redo_exec_hook(INCOMPLETE_LOG);
+ install_redo_exec_hook(INCOMPLETE_GROUP);
+ install_redo_exec_hook(REDO_INSERT_ROW_HEAD);
+ install_redo_exec_hook(REDO_INSERT_ROW_TAIL);
+ install_redo_exec_hook(REDO_INSERT_ROW_BLOBS);
+ install_redo_exec_hook(REDO_PURGE_ROW_HEAD);
+ install_redo_exec_hook(REDO_PURGE_ROW_TAIL);
+ install_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL);
+ install_redo_exec_hook(REDO_FREE_BLOCKS);
+ install_redo_exec_hook(REDO_DELETE_ALL);
+ install_redo_exec_hook(REDO_INDEX);
+ install_redo_exec_hook(REDO_INDEX_NEW_PAGE);
+ install_redo_exec_hook(REDO_INDEX_FREE_PAGE);
+ install_redo_exec_hook(UNDO_ROW_INSERT);
+ install_redo_exec_hook(UNDO_ROW_DELETE);
+ install_redo_exec_hook(UNDO_ROW_UPDATE);
+ install_redo_exec_hook(UNDO_KEY_INSERT);
+ install_redo_exec_hook(UNDO_KEY_DELETE);
+ install_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
+ install_redo_exec_hook(COMMIT);
+ install_redo_exec_hook(CLR_END);
+ install_undo_exec_hook(UNDO_ROW_INSERT);
+ install_undo_exec_hook(UNDO_ROW_DELETE);
+ install_undo_exec_hook(UNDO_ROW_UPDATE);
+ install_undo_exec_hook(UNDO_KEY_INSERT);
+ install_undo_exec_hook(UNDO_KEY_DELETE);
+ install_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
+
+ current_group_end_lsn= LSN_IMPOSSIBLE;
+#ifndef DBUG_OFF
+ current_group_table= NULL;
+#endif
+
+ if (unlikely(lsn == LSN_IMPOSSIBLE || lsn == translog_get_horizon()))
+ {
+ tprint(tracef, "checkpoint address refers to the log end log or "
+ "log is empty, nothing to do.\n");
+ return 0;
+ }
+
+ len= translog_read_record_header(lsn, &rec);
+
+ if (len == RECHEADER_READ_ERROR)
+ {
+ eprint(tracef, "Failed to read header of the first record.\n");
+ return 1;
+ }
+ if (translog_scanner_init(lsn, 1, &scanner, 1))
+ {
+ tprint(tracef, "Scanner init failed\n");
+ return 1;
+ }
+ for (i= 1;;i++)
+ {
+ uint16 sid= rec.short_trid;
+ const LOG_DESC *log_desc= &log_record_type_descriptor[rec.type];
+ display_record_position(log_desc, &rec, i);
+ /*
+ A complete group is a set of log records with an "end mark" record
+ (e.g. a set of REDOs for an operation, terminated by an UNDO for this
+ operation); if there is no "end mark" record the group is incomplete and
+ won't be executed.
+ */
+ if ((log_desc->record_in_group == LOGREC_IS_GROUP_ITSELF) ||
+ (log_desc->record_in_group == LOGREC_LAST_IN_GROUP))
+ {
+ if (all_active_trans[sid].group_start_lsn != LSN_IMPOSSIBLE)
+ {
+ if (log_desc->record_in_group == LOGREC_IS_GROUP_ITSELF)
+ {
+ /*
+ Can happen if the transaction got a table write error, then
+ unlocked tables thus wrote a COMMIT record. Or can be an
+ INCOMPLETE_GROUP record written by a previous recovery.
+ */
+ tprint(tracef, "\nDiscarding incomplete group before this record\n");
+ all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE;
+ }
+ else
+ {
+ struct st_translog_scanner_data scanner2;
+ TRANSLOG_HEADER_BUFFER rec2;
+ /*
+ There is a complete group for this transaction, containing more
+ than this event.
+ */
+ tprint(tracef, " ends a group:\n");
+ len=
+ translog_read_record_header(all_active_trans[sid].group_start_lsn,
+ &rec2);
+ if (len < 0) /* EOF or error */
+ {
+ tprint(tracef, "Cannot find record where it should be\n");
+ goto err;
+ }
+ if (translog_scanner_init(rec2.lsn, 1, &scanner2, 1))
+ {
+ tprint(tracef, "Scanner2 init failed\n");
+ goto err;
+ }
+ current_group_end_lsn= rec.lsn;
+ do
+ {
+ if (rec2.short_trid == sid) /* it's in our group */
+ {
+ const LOG_DESC *log_desc2= &log_record_type_descriptor[rec2.type];
+ display_record_position(log_desc2, &rec2, 0);
+ if (apply == MARIA_LOG_CHECK)
+ {
+ translog_size_t read_len;
+ enlarge_buffer(&rec2);
+ read_len=
+ translog_read_record(rec2.lsn, 0, rec2.record_length,
+ log_record_buffer.str, NULL);
+ if (read_len != rec2.record_length)
+ {
+ tprint(tracef, "Cannot read record's body: read %u of"
+ " %u bytes\n", read_len, rec2.record_length);
+ goto err;
+ }
+ }
+ if (apply == MARIA_LOG_APPLY &&
+ display_and_apply_record(log_desc2, &rec2))
+ {
+ translog_destroy_scanner(&scanner2);
+ goto err;
+ }
+ }
+ len= translog_read_next_record_header(&scanner2, &rec2);
+ if (len < 0) /* EOF or error */
+ {
+ tprint(tracef, "Cannot find record where it should be\n");
+ goto err;
+ }
+ }
+ while (rec2.lsn < rec.lsn);
+ translog_free_record_header(&rec2);
+ /* group finished */
+ all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE;
+ current_group_end_lsn= LSN_IMPOSSIBLE; /* for debugging */
+ display_record_position(log_desc, &rec, 0);
+ translog_destroy_scanner(&scanner2);
+ }
+ }
+ if (apply == MARIA_LOG_APPLY &&
+ display_and_apply_record(log_desc, &rec))
+ goto err;
+#ifndef DBUG_OFF
+ current_group_table= NULL;
+#endif
+ }
+ else /* record does not end group */
+ {
+ /* just record the fact, can't know if can execute yet */
+ if (all_active_trans[sid].group_start_lsn == LSN_IMPOSSIBLE)
+ {
+ /* group not yet started */
+ all_active_trans[sid].group_start_lsn= rec.lsn;
+ }
+ }
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len < 0)
+ {
+ switch (len)
+ {
+ case RECHEADER_READ_EOF:
+ tprint(tracef, "EOF on the log\n");
+ break;
+ case RECHEADER_READ_ERROR:
+ tprint(tracef, "Error reading log\n");
+ goto err;
+ }
+ break;
+ }
+ }
+ translog_destroy_scanner(&scanner);
+ translog_free_record_header(&rec);
+ if (recovery_message_printed == REC_MSG_REDO)
+ {
+ fprintf(stderr, " 100%%");
+ procent_printed= 1;
+ }
+ return 0;
+
+err:
+ translog_destroy_scanner(&scanner);
+ return 1;
+}
+
+
+/**
+ @brief Informs about any aborted groups or uncommitted transactions,
+ prepares for the UNDO phase if needed.
+
+ @note Observe that it may init trnman.
+*/
+static uint end_of_redo_phase(my_bool prepare_for_undo_phase)
+{
+ uint sid, uncommitted= 0;
+ char llbuf[22];
+ LSN addr;
+
+ hash_free(&all_dirty_pages);
+ /*
+ hash_free() can be called multiple times probably, but be safe if that
+ changes
+ */
+ bzero(&all_dirty_pages, sizeof(all_dirty_pages));
+ my_free(dirty_pages_pool, MYF(MY_ALLOW_ZERO_PTR));
+ dirty_pages_pool= NULL;
+
+ llstr(max_long_trid, llbuf);
+ tprint(tracef, "Maximum transaction long id seen: %s\n", llbuf);
+ if (prepare_for_undo_phase && trnman_init(max_long_trid))
+ return -1;
+
+ for (sid= 0; sid <= SHORT_TRID_MAX; sid++)
+ {
+ TrID long_trid= all_active_trans[sid].long_trid;
+ LSN gslsn= all_active_trans[sid].group_start_lsn;
+ TRN *trn;
+ if (gslsn != LSN_IMPOSSIBLE)
+ {
+ tprint(tracef, "Group at LSN (%lu,0x%lx) short_trid %u incomplete\n",
+ LSN_IN_PARTS(gslsn), sid);
+ all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE;
+ }
+ if (all_active_trans[sid].undo_lsn != LSN_IMPOSSIBLE)
+ {
+ llstr(long_trid, llbuf);
+ tprint(tracef, "Transaction long_trid %s short_trid %u uncommitted\n",
+ llbuf, sid);
+ /* dummy_transaction_object serves only for DDLs */
+ DBUG_ASSERT(long_trid != 0);
+ if (prepare_for_undo_phase)
+ {
+ if ((trn= trnman_recreate_trn_from_recovery(sid, long_trid)) == NULL)
+ return -1;
+ trn->undo_lsn= all_active_trans[sid].undo_lsn;
+ trn->first_undo_lsn= all_active_trans[sid].first_undo_lsn |
+ TRANSACTION_LOGGED_LONG_ID; /* because trn is known in log */
+ if (gslsn != LSN_IMPOSSIBLE)
+ {
+ /*
+ UNDO phase will log some records. So, a future recovery may see:
+ REDO(from incomplete group) - REDO(from rollback) - CLR_END
+ and thus execute the first REDO (finding it in "a complete
+ group"). To prevent that:
+ */
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS];
+ LSN lsn;
+ if (translog_write_record(&lsn, LOGREC_INCOMPLETE_GROUP,
+ trn, NULL, 0,
+ TRANSLOG_INTERNAL_PARTS, log_array,
+ NULL, NULL))
+ return -1;
+ }
+ }
+ uncommitted++;
+ }
+#ifdef MARIA_VERSIONING
+ /*
+ If real recovery: if transaction was committed, move it to some separate
+ list for soon purging.
+ */
+#endif
+ }
+
+ my_free(all_active_trans, MYF(MY_ALLOW_ZERO_PTR));
+ all_active_trans= NULL;
+
+ /*
+ The UNDO phase uses some normal run-time code of ROLLBACK: generates log
+ records, etc; prepare tables for that
+ */
+ addr= translog_get_horizon();
+ for (sid= 0; sid <= SHARE_ID_MAX; sid++)
+ {
+ MARIA_HA *info= all_tables[sid].info;
+ if (info != NULL)
+ {
+ prepare_table_for_close(info, addr);
+ /*
+ But we don't close it; we leave it available for the UNDO phase;
+ it's likely that the UNDO phase will need it.
+ */
+ if (prepare_for_undo_phase)
+ translog_assign_id_to_share_from_recovery(info->s, sid);
+ }
+ }
+ return uncommitted;
+}
+
+
+static int run_undo_phase(uint uncommitted)
+{
+ DBUG_ENTER("run_undo_phase");
+
+ if (uncommitted > 0)
+ {
+ checkpoint_useful= TRUE;
+ if (tracef != stdout)
+ {
+ if (recovery_message_printed == REC_MSG_NONE)
+ print_preamble();
+ fprintf(stderr, "transactions to roll back:");
+ recovery_message_printed= REC_MSG_UNDO;
+ }
+ tprint(tracef, "%u transactions will be rolled back\n", uncommitted);
+ for( ; ; )
+ {
+ char llbuf[22];
+ TRN *trn;
+ if (recovery_message_printed == REC_MSG_UNDO)
+ fprintf(stderr, " %u", uncommitted);
+ if ((uncommitted--) == 0)
+ break;
+ trn= trnman_get_any_trn();
+ DBUG_ASSERT(trn != NULL);
+ llstr(trn->trid, llbuf);
+ tprint(tracef, "Rolling back transaction of long id %s\n", llbuf);
+
+ /* Execute all undo entries */
+ while (trn->undo_lsn)
+ {
+ TRANSLOG_HEADER_BUFFER rec;
+ LOG_DESC *log_desc;
+ if (translog_read_record_header(trn->undo_lsn, &rec) ==
+ RECHEADER_READ_ERROR)
+ DBUG_RETURN(1);
+ log_desc= &log_record_type_descriptor[rec.type];
+ display_record_position(log_desc, &rec, 0);
+ if (log_desc->record_execute_in_undo_phase(&rec, trn))
+ {
+ tprint(tracef, "Got error %d when executing undo\n", my_errno);
+ DBUG_RETURN(1);
+ }
+ }
+
+ if (trnman_rollback_trn(trn))
+ DBUG_RETURN(1);
+ /* We could want to span a few threads (4?) instead of 1 */
+ /* In the future, we want to have this phase *online* */
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief re-enables transactionality, updates is_of_horizon
+
+ @param info table
+ @param horizon address to set is_of_horizon
+*/
+
+static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon)
+{
+ MARIA_SHARE *share= info->s;
+ /*
+ In a fully-forward REDO phase (no checkpoint record),
+ state is now at least as new as the LSN of the current record. It may be
+ newer, in case we are seeing a LOGREC_FILE_ID which tells us to close a
+ table, but that table was later modified further in the log.
+ But if we parsed a checkpoint record, it may be this way in the log:
+ FILE_ID(6->t2)... FILE_ID(6->t1)... CHECKPOINT(6->t1)
+ Checkpoint parsing opened t1 with id 6; first FILE_ID above is going to
+ make t1 close; the first condition below is however false (when checkpoint
+ was taken it increased is_of_horizon) and so it works. For safety we
+ add the second condition.
+ */
+ if (cmp_translog_addr(share->state.is_of_horizon, horizon) < 0 &&
+ cmp_translog_addr(share->lsn_of_file_id, horizon) < 0)
+ {
+ share->state.is_of_horizon= horizon;
+ _ma_state_info_write_sub(share->kfile.file, &share->state, 1);
+ }
+ _ma_reenable_logging_for_table(share);
+ info->trn= NULL; /* safety */
+}
+
+
+static MARIA_HA *get_MARIA_HA_from_REDO_record(const
+ TRANSLOG_HEADER_BUFFER *rec)
+{
+ uint16 sid;
+ pgcache_page_no_t page;
+ MARIA_HA *info;
+ char llbuf[22];
+ my_bool index_page_redo_entry= 0;
+
+ print_redo_phase_progress(rec->lsn);
+ sid= fileid_korr(rec->header);
+ page= page_korr(rec->header + FILEID_STORE_SIZE);
+ switch (rec->type) {
+ /* not all REDO records have a page: */
+ case LOGREC_REDO_INDEX_NEW_PAGE:
+ case LOGREC_REDO_INDEX:
+ case LOGREC_REDO_INDEX_FREE_PAGE:
+ index_page_redo_entry= 1;
+ /* Fall trough*/
+ case LOGREC_REDO_INSERT_ROW_HEAD:
+ case LOGREC_REDO_INSERT_ROW_TAIL:
+ case LOGREC_REDO_PURGE_ROW_HEAD:
+ case LOGREC_REDO_PURGE_ROW_TAIL:
+ llstr(page, llbuf);
+ tprint(tracef, " For page %s of table of short id %u", llbuf, sid);
+ break;
+ /* other types could print their info here too */
+ default:
+ break;
+ }
+ info= all_tables[sid].info;
+#ifndef DBUG_OFF
+ DBUG_ASSERT(current_group_table == NULL || current_group_table == info);
+ current_group_table= info;
+#endif
+ if (info == NULL)
+ {
+ tprint(tracef, ", table skipped, so skipping record\n");
+ return NULL;
+ }
+ tprint(tracef, ", '%s'", info->s->open_file_name);
+ if (cmp_translog_addr(rec->lsn, info->s->lsn_of_file_id) <= 0)
+ {
+ /*
+ This can happen only if processing a record before the checkpoint
+ record.
+ id->name mapping is newer than REDO record: for sure the table subject
+ of the REDO has been flushed and forced (id re-assignment implies this);
+ REDO can be ignored (and must be, as we don't know what this subject
+ table was).
+ */
+ DBUG_ASSERT(cmp_translog_addr(rec->lsn, checkpoint_start) < 0);
+ tprint(tracef, ", table's LOGREC_FILE_ID has LSN (%lu,0x%lx) more recent"
+ " than record, skipping record",
+ LSN_IN_PARTS(info->s->lsn_of_file_id));
+ return NULL;
+ }
+ /* detect if an open instance of a dropped table (internal bug) */
+ DBUG_ASSERT(info->s->last_version != 0);
+ if (cmp_translog_addr(rec->lsn, checkpoint_start) < 0)
+ {
+ uint64 file_and_page_id=
+ (((uint64) (index_page_redo_entry ? all_tables[sid].org_kfile :
+ all_tables[sid].org_dfile)) << 32) | page;
+ struct st_dirty_page *dirty_page= (struct st_dirty_page *)
+ hash_search(&all_dirty_pages,
+ (uchar *)&file_and_page_id, sizeof(file_and_page_id));
+ if ((dirty_page == NULL) ||
+ cmp_translog_addr(rec->lsn, dirty_page->rec_lsn) < 0)
+ {
+ tprint(tracef, ", ignoring because of dirty_pages list\n");
+ return NULL;
+ }
+ }
+
+ /*
+ So we are going to read the page, and if its LSN is older than the
+ record's we will modify the page
+ */
+ tprint(tracef, ", applying record\n");
+ _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE); /* to flush state on close */
+ return info;
+}
+
+
+static MARIA_HA *get_MARIA_HA_from_UNDO_record(const
+ TRANSLOG_HEADER_BUFFER *rec)
+{
+ uint16 sid;
+ MARIA_HA *info;
+
+ sid= fileid_korr(rec->header + LSN_STORE_SIZE);
+ tprint(tracef, " For table of short id %u", sid);
+ info= all_tables[sid].info;
+#ifndef DBUG_OFF
+ DBUG_ASSERT(current_group_table == NULL || current_group_table == info);
+ current_group_table= info;
+#endif
+ if (info == NULL)
+ {
+ tprint(tracef, ", table skipped, so skipping record\n");
+ return NULL;
+ }
+ tprint(tracef, ", '%s'", info->s->open_file_name);
+ if (cmp_translog_addr(rec->lsn, info->s->lsn_of_file_id) <= 0)
+ {
+ tprint(tracef, ", table's LOGREC_FILE_ID has LSN (%lu,0x%lx) more recent"
+ " than record, skipping record",
+ LSN_IN_PARTS(info->s->lsn_of_file_id));
+ return NULL;
+ }
+ DBUG_ASSERT(info->s->last_version != 0);
+ _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE); /* to flush state on close */
+ tprint(tracef, ", applying record\n");
+ return info;
+}
+
+
+/**
+ @brief Parses checkpoint record.
+
+ Builds from it the dirty_pages list (a hash), opens tables and maps them to
+ their 2-byte IDs, recreates transactions (not real TRNs though).
+
+ @return LSN from where in the log the REDO phase should start
+ @retval LSN_ERROR error
+ @retval other ok
+*/
+
+static LSN parse_checkpoint_record(LSN lsn)
+{
+ ulong i, nb_dirty_pages;
+ TRANSLOG_HEADER_BUFFER rec;
+ TRANSLOG_ADDRESS start_address;
+ int len;
+ uint nb_active_transactions, nb_committed_transactions, nb_tables;
+ uchar *ptr;
+ LSN minimum_rec_lsn_of_active_transactions, minimum_rec_lsn_of_dirty_pages;
+ struct st_dirty_page *next_dirty_page_in_pool;
+
+ tprint(tracef, "Loading data from checkpoint record at LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(lsn));
+ if ((len= translog_read_record_header(lsn, &rec)) == RECHEADER_READ_ERROR)
+ {
+ tprint(tracef, "Cannot find checkpoint record where it should be\n");
+ return LSN_ERROR;
+ }
+
+ enlarge_buffer(&rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec.lsn, 0, rec.record_length,
+ log_record_buffer.str, NULL) !=
+ rec.record_length)
+ {
+ eprint(tracef, "Failed to read record\n");
+ return LSN_ERROR;
+ }
+
+ ptr= log_record_buffer.str;
+ start_address= lsn_korr(ptr);
+ ptr+= LSN_STORE_SIZE;
+
+ /* transactions */
+ nb_active_transactions= uint2korr(ptr);
+ ptr+= 2;
+ tprint(tracef, "%u active transactions\n", nb_active_transactions);
+ minimum_rec_lsn_of_active_transactions= lsn_korr(ptr);
+ ptr+= LSN_STORE_SIZE;
+ max_long_trid= transid_korr(ptr);
+ ptr+= TRANSID_SIZE;
+
+ /*
+ how much brain juice and discussions there was to come to writing this
+ line
+ */
+ set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions);
+
+ for (i= 0; i < nb_active_transactions; i++)
+ {
+ uint16 sid= uint2korr(ptr);
+ TrID long_id;
+ LSN undo_lsn, first_undo_lsn;
+ ptr+= 2;
+ long_id= uint6korr(ptr);
+ ptr+= 6;
+ DBUG_ASSERT(sid > 0 && long_id > 0);
+ undo_lsn= lsn_korr(ptr);
+ ptr+= LSN_STORE_SIZE;
+ first_undo_lsn= lsn_korr(ptr);
+ ptr+= LSN_STORE_SIZE;
+ new_transaction(sid, long_id, undo_lsn, first_undo_lsn);
+ }
+ nb_committed_transactions= uint4korr(ptr);
+ ptr+= 4;
+ tprint(tracef, "%lu committed transactions\n",
+ (ulong)nb_committed_transactions);
+ /* no purging => committed transactions are not important */
+ ptr+= (6 + LSN_STORE_SIZE) * nb_committed_transactions;
+
+ /* tables */
+ nb_tables= uint4korr(ptr);
+ ptr+= 4;
+ tprint(tracef, "%u open tables\n", nb_tables);
+ for (i= 0; i< nb_tables; i++)
+ {
+ char name[FN_REFLEN];
+ File kfile, dfile;
+ LSN first_log_write_lsn;
+ uint name_len;
+ uint16 sid= uint2korr(ptr);
+ ptr+= 2;
+ DBUG_ASSERT(sid > 0);
+ kfile= uint4korr(ptr);
+ ptr+= 4;
+ dfile= uint4korr(ptr);
+ ptr+= 4;
+ first_log_write_lsn= lsn_korr(ptr);
+ ptr+= LSN_STORE_SIZE;
+ name_len= strlen((char *)ptr) + 1;
+ strmake(name, (char *)ptr, sizeof(name)-1);
+ ptr+= name_len;
+ if (new_table(sid, name, kfile, dfile, first_log_write_lsn))
+ return LSN_ERROR;
+ }
+
+ /* dirty pages */
+ nb_dirty_pages= uint8korr(ptr);
+ ptr+= 8;
+ tprint(tracef, "%lu dirty pages\n", nb_dirty_pages);
+ if (hash_init(&all_dirty_pages, &my_charset_bin, nb_dirty_pages,
+ offsetof(struct st_dirty_page, file_and_page_id),
+ sizeof(((struct st_dirty_page *)NULL)->file_and_page_id),
+ NULL, NULL, 0))
+ return LSN_ERROR;
+ dirty_pages_pool=
+ (struct st_dirty_page *)my_malloc(nb_dirty_pages *
+ sizeof(struct st_dirty_page),
+ MYF(MY_WME));
+ if (unlikely(dirty_pages_pool == NULL))
+ return LSN_ERROR;
+ next_dirty_page_in_pool= dirty_pages_pool;
+ minimum_rec_lsn_of_dirty_pages= LSN_MAX;
+ for (i= 0; i < nb_dirty_pages ; i++)
+ {
+ pgcache_page_no_t pageid;
+ LSN rec_lsn;
+ File fileid= uint4korr(ptr);
+ ptr+= 4;
+ pageid= uint4korr(ptr);
+ ptr+= 4;
+ rec_lsn= lsn_korr(ptr);
+ ptr+= LSN_STORE_SIZE;
+ if (new_page(fileid, pageid, rec_lsn, next_dirty_page_in_pool++))
+ return LSN_ERROR;
+ set_if_smaller(minimum_rec_lsn_of_dirty_pages, rec_lsn);
+ }
+ /* after that, there will be no insert/delete into the hash */
+ /*
+ sanity check on record (did we screw up with all those "ptr+=", did the
+ checkpoint write code and checkpoint read code go out of sync?).
+ */
+ if (ptr != (log_record_buffer.str + log_record_buffer.length))
+ {
+ eprint(tracef, "checkpoint record corrupted\n");
+ return LSN_ERROR;
+ }
+ set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages);
+
+ /*
+ Find LSN higher or equal to this TRANSLOG_ADDRESS, suitable for
+ translog_read_record() functions
+ */
+ checkpoint_start= translog_next_LSN(start_address, LSN_IMPOSSIBLE);
+ if (checkpoint_start == LSN_IMPOSSIBLE)
+ {
+ /*
+ There must be a problem, as our checkpoint record exists and is >= the
+ address which is stored in its first bytes, which is >= start_address.
+ */
+ return LSN_ERROR;
+ }
+ return checkpoint_start;
+}
+
+static int new_page(File fileid, pgcache_page_no_t pageid, LSN rec_lsn,
+ struct st_dirty_page *dirty_page)
+{
+ /* serves as hash key */
+ dirty_page->file_and_page_id= (((uint64)fileid) << 32) | pageid;
+ dirty_page->rec_lsn= rec_lsn;
+ return my_hash_insert(&all_dirty_pages, (uchar *)dirty_page);
+}
+
+
+static int close_all_tables(void)
+{
+ int error= 0;
+ uint count= 0;
+ LIST *list_element, *next_open;
+ MARIA_HA *info;
+ TRANSLOG_ADDRESS addr;
+ DBUG_ENTER("close_all_tables");
+
+ pthread_mutex_lock(&THR_LOCK_maria);
+ if (maria_open_list == NULL)
+ goto end;
+ tprint(tracef, "Closing all tables\n");
+ if (tracef != stdout)
+ {
+ if (recovery_message_printed == REC_MSG_NONE)
+ print_preamble();
+ for (count= 0, list_element= maria_open_list ;
+ list_element ; count++, (list_element= list_element->next))
+ fprintf(stderr, "tables to flush:");
+ recovery_message_printed= REC_MSG_FLUSH;
+ }
+ /*
+ Since the end of end_of_redo_phase(), we may have written new records
+ (if UNDO phase ran) and thus the state is newer than at
+ end_of_redo_phase(), we need to bump is_of_horizon again.
+ */
+ addr= translog_get_horizon();
+ for (list_element= maria_open_list ; ; list_element= next_open)
+ {
+ if (recovery_message_printed == REC_MSG_FLUSH)
+ fprintf(stderr, " %u", count--);
+ if (list_element == NULL)
+ break;
+ next_open= list_element->next;
+ info= (MARIA_HA*)list_element->data;
+ pthread_mutex_unlock(&THR_LOCK_maria); /* ok, UNDO phase not online yet */
+ /*
+ Tables which we see here are exactly those which were open at time of
+ crash. They might have open_count>0 as Checkpoint maybe flushed their
+ state while they were used. As Recovery corrected them, don't alarm the
+ user, don't ask for a table check:
+ */
+ info->s->state.open_count= 0;
+ prepare_table_for_close(info, addr);
+ error|= maria_close(info);
+ pthread_mutex_lock(&THR_LOCK_maria);
+ }
+end:
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ DBUG_RETURN(error);
+}
+
+
+/**
+ @brief Close all table instances with a certain name which are present in
+ all_tables.
+
+ @param name Name of table
+ @param addr Log address passed to prepare_table_for_close()
+*/
+
+static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr)
+{
+ my_bool res= 0;
+ /* There are no other threads using the tables, so we don't need any locks */
+ struct st_table_for_recovery *internal_table, *end;
+ for (internal_table= all_tables, end= internal_table + SHARE_ID_MAX + 1;
+ internal_table < end ;
+ internal_table++)
+ {
+ MARIA_HA *info= internal_table->info;
+ if ((info != NULL) && !strcmp(info->s->open_file_name, name))
+ {
+ prepare_table_for_close(info, addr);
+ if (maria_close(info))
+ res= 1;
+ internal_table->info= NULL;
+ }
+ }
+ return res;
+}
+
+
+/**
+ Temporarily disables logging for this table.
+
+ If that makes the log incomplete, writes a LOGREC_INCOMPLETE_LOG to the log
+ to warn log readers.
+
+ @param info table
+ @param log_incomplete if that disabling makes the log incomplete
+
+ @note for example in the REDO phase we disable logging but that does not
+ make the log incomplete.
+*/
+void _ma_tmp_disable_logging_for_table(MARIA_HA *info,
+ my_bool log_incomplete)
+{
+ MARIA_SHARE *share= info->s;
+ if (log_incomplete)
+ {
+ uchar log_data[FILEID_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ LSN lsn;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ translog_write_record(&lsn, LOGREC_INCOMPLETE_LOG,
+ info->trn, info, sizeof(log_data),
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data, NULL);
+ }
+ /* if we disabled before writing the record, record wouldn't reach log */
+ share->now_transactional= FALSE;
+ share->page_type= PAGECACHE_PLAIN_PAGE;
+}
+
+static void print_redo_phase_progress(TRANSLOG_ADDRESS addr)
+{
+ static int end_logno= FILENO_IMPOSSIBLE, end_offset, percentage_printed= 0;
+ static ulonglong initial_remainder= -1;
+ int cur_logno, cur_offset;
+ ulonglong local_remainder;
+ int percentage_done;
+
+ if (tracef == stdout)
+ return;
+ if (recovery_message_printed == REC_MSG_NONE)
+ {
+ print_preamble();
+ fprintf(stderr, "recovered pages: 0%%");
+ procent_printed= 1;
+ recovery_message_printed= REC_MSG_REDO;
+ }
+ if (end_logno == FILENO_IMPOSSIBLE)
+ {
+ LSN end_addr= translog_get_horizon();
+ end_logno= LSN_FILE_NO(end_addr);
+ end_offset= LSN_OFFSET(end_addr);
+ }
+ cur_logno= LSN_FILE_NO(addr);
+ cur_offset= LSN_OFFSET(addr);
+ local_remainder= (cur_logno == end_logno) ? (end_offset - cur_offset) :
+ (((longlong)log_file_size) - cur_offset +
+ max(end_logno - cur_logno - 1, 0) * ((longlong)log_file_size) +
+ end_offset);
+ if (initial_remainder == (ulonglong)(-1))
+ initial_remainder= local_remainder;
+ percentage_done= ((initial_remainder - local_remainder) * ULL(100) /
+ initial_remainder);
+ if ((percentage_done - percentage_printed) >= 10)
+ {
+ percentage_printed= percentage_done;
+ fprintf(stderr, " %d%%", percentage_done);
+ procent_printed= 1;
+ }
+}
+
+#ifdef MARIA_EXTERNAL_LOCKING
+#error Marias Checkpoint and Recovery are really not ready for it
+#endif
+
+/*
+Recovery of the state : how it works
+=====================================
+
+Here we ignore Checkpoints for a start.
+
+The state (MARIA_HA::MARIA_SHARE::MARIA_STATE_INFO) is updated in
+memory frequently (at least at every row write/update/delete) but goes
+to disk at few moments: maria_close() when closing the last open
+instance, and a few rare places like CHECK/REPAIR/ALTER
+(non-transactional tables also do it at maria_lock_database() but we
+needn't cover them here).
+
+In case of crash, state on disk is likely to be older than what it was
+in memory, the REDO phase needs to recreate the state as it was in
+memory at the time of crash. When we say Recovery here we will always
+mean "REDO phase".
+
+For example MARIA_STATUS_INFO::records (count of records). It is updated at
+the end of every row write/update/delete/delete_all. When Recovery sees the
+sign of such row operation (UNDO or REDO), it may need to update the records'
+count if that count does not reflect that operation (is older). How to know
+the age of the state compared to the log record: every time the state
+goes to disk at runtime, its member "is_of_horizon" is updated to the
+current end-of-log horizon. So Recovery just needs to compare is_of_horizon
+and the record's LSN to know if it should modify "records".
+
+Other operations like ALTER TABLE DISABLE KEYS update the state but
+don't write log records, thus the REDO phase cannot repeat their
+effect on the state in case of crash. But we make them sync the state
+as soon as they have finished. This reduces the window for a problem.
+
+It looks like only one thread at a time updates the state in memory or
+on disk. We assume that the upper level (normally MySQL) has protection
+against issuing HA_EXTRA_(FORCE_REOPEN|PREPARE_FOR_RENAME) so that these
+are not issued while there are any running transactions on the given table.
+If this is not done, we may write a corrupted state to disk.
+
+With checkpoints
+================
+
+Checkpoint module needs to read the state in memory and write it to
+disk. This may happen while some other thread is modifying the state
+in memory or on disk. Checkpoint thus may be reading changing data, it
+needs a mutex to not have it corrupted, and concurrent modifiers of
+the state need that mutex too for the same reason.
+"records" is modified for every row write/update/delete, we don't want
+to add a mutex lock/unlock there. So we re-use the mutex lock/unlock
+which is already present in these moments, namely the log's mutex which is
+taken when UNDO_ROW_INSERT|UPDATE|DELETE is written: we update "records" in
+under-log-mutex hooks when writing these records (thus "records" is
+not updated at the end of maria_write/update/delete() anymore).
+Thus Checkpoint takes the log's lock and can read "records" from
+memory an write it to disk and release log's lock.
+We however want to avoid having the disk write under the log's
+lock. So it has to be under another mutex, natural choice is
+intern_lock (as Checkpoint needs it anyway to read MARIA_SHARE::kfile,
+and as maria_close() takes it too). All state writes to disk are
+changed to be protected with intern_lock.
+So Checkpoint takes intern_lock, log's lock, reads "records" from
+memory, releases log's lock, updates is_of_horizon and writes "records" to
+disk, release intern_lock.
+In practice, not only "records" needs to be written but the full
+state. So, Checkpoint reads the full state from memory. Some other
+thread may at this moment be modifying in memory some pieces of the
+state which are not protected by the lock's log (see ma_extra.c
+HA_EXTRA_NO_KEYS), and Checkpoint would be reading a corrupted state
+from memory; to guard against that we extend the intern_lock-zone to
+changes done to the state in memory by HA_EXTRA_NO_KEYS et al, and
+also any change made in memory to create_rename_lsn/state_is_of_horizon.
+Last, we don't want in Checkpoint to do
+ log lock; read state from memory; release log lock;
+for each table, it may hold the log's lock too much in total.
+So, we instead do
+ log lock; read N states from memory; release log lock;
+Thus, the sequence above happens outside of any intern_lock.
+But this re-introduces the problem that some other thread may be changing the
+state in memory and on disk under intern_lock, without log's lock, like
+HA_EXTRA_NO_KEYS, while we read the N states. However, when Checkpoint later
+comes to handling the table under intern_lock, which is serialized with
+HA_EXTRA_NO_KEYS, it can see that is_of_horizon is higher then when the state
+was read from memory under log's lock, and thus can decide to not flush the
+obsolete state it has, knowing that the other thread flushed a more recent
+state already. If on the other hand is_of_horizon is not higher, the read
+state is current and can be flushed. So we have a per-table sequence:
+ lock intern_lock; test if is_of_horizon is higher than when we read the state
+ under log's lock; if no then flush the read state to disk.
+*/
+
+/* some comments and pseudo-code which we keep for later */
+#if 0
+ /*
+ MikaelR suggests: support checkpoints during REDO phase too: do checkpoint
+ after a certain amount of log records have been executed. This helps
+ against repeated crashes. Those checkpoints could not be user-requested
+ (as engine is not communicating during the REDO phase), so they would be
+ automatic: this changes the original assumption that we don't write to the
+ log while in the REDO phase, but why not. How often should we checkpoint?
+ */
+
+ /*
+ We want to have two steps:
+ engine->recover_with_max_memory();
+ next_engine->recover_with_max_memory();
+ engine->init_with_normal_memory();
+ next_engine->init_with_normal_memory();
+ So: in recover_with_max_memory() allocate a giant page cache, do REDO
+ phase, then all page cache is flushed and emptied and freed (only retain
+ small structures like TM): take full checkpoint, which is useful if
+ next engine crashes in its recovery the next second.
+ Destroy all shares (maria_close()), then at init_with_normal_memory() we
+ do this:
+ */
+
+ /**** UNDO PHASE *****/
+
+ /*
+ Launch one or more threads to do the background rollback. Don't wait for
+ them to complete their rollback (background rollback; for debugging, we
+ can have an option which waits). Set a counter (total_of_rollback_threads)
+ to the number of threads to lauch.
+
+ Note that InnoDB's rollback-in-background works as long as InnoDB is the
+ last engine to recover, otherwise MySQL will refuse new connections until
+ the last engine has recovered so it's not "background" from the user's
+ point of view. InnoDB is near top of sys_table_types so all others
+ (e.g. BDB) recover after it... So it's really "online rollback" only if
+ InnoDB is the only engine.
+ */
+
+ /* wake up delete/update handler */
+ /* tell the TM that it can now accept new transactions */
+
+ /*
+ mark that checkpoint requests are now allowed.
+ */
+#endif
diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h
new file mode 100644
index 00000000000..f44891a36df
--- /dev/null
+++ b/storage/maria/ma_recovery.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2006,2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3072 Maria recovery
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/* This is the interface of this module. */
+
+/* Performs recovery of the engine at start */
+
+C_MODE_START
+enum maria_apply_log_way
+{ MARIA_LOG_APPLY, MARIA_LOG_DISPLAY_HEADER, MARIA_LOG_CHECK };
+int maria_recover(void);
+int maria_apply_log(LSN lsn, enum maria_apply_log_way apply,
+ FILE *trace_file,
+ my_bool execute_undo_phase, my_bool skip_DDLs,
+ my_bool take_checkpoints, uint *warnings_count);
+C_MODE_END
diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c
new file mode 100644
index 00000000000..57d35f6c066
--- /dev/null
+++ b/storage/maria/ma_rename.c
@@ -0,0 +1,139 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Rename a table
+*/
+
+#include "ma_fulltext.h"
+#include "trnman_public.h"
+
+/**
+ @brief renames a table
+
+ @param old_name current name of table
+ @param new_name table should be renamed to this name
+
+ @return Operation status
+ @retval 0 OK
+ @retval !=0 Error
+*/
+
+int maria_rename(const char *old_name, const char *new_name)
+{
+ char from[FN_REFLEN],to[FN_REFLEN];
+ int data_file_rename_error;
+#ifdef USE_RAID
+ uint raid_type=0,raid_chunks=0;
+#endif
+ MARIA_HA *info;
+ MARIA_SHARE *share;
+ myf sync_dir;
+ DBUG_ENTER("maria_rename");
+
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(old_name,"rename old_table");
+ _ma_check_table_is_closed(new_name,"rename new table2");
+#endif
+ /** @todo LOCK take X-lock on table */
+ if (!(info= maria_open(old_name, O_RDWR, HA_OPEN_FOR_REPAIR)))
+ DBUG_RETURN(my_errno);
+ share= info->s;
+#ifdef USE_RAID
+ raid_type = share->base.raid_type;
+ raid_chunks = share->base.raid_chunks;
+#endif
+
+ /*
+ the renaming of an internal table to the final table (like in ALTER TABLE)
+ is the moment when this table receives its correct create_rename_lsn and
+ this is important; make sure transactionality has been re-enabled.
+ */
+ DBUG_ASSERT(share->now_transactional == share->base.born_transactional);
+ sync_dir= (share->now_transactional && !share->temporary &&
+ !maria_in_recovery) ? MY_SYNC_DIR : 0;
+ if (sync_dir)
+ {
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uint old_name_len= strlen(old_name)+1, new_name_len= strlen(new_name)+1;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char *)old_name;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= old_name_len;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char *)new_name;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= new_name_len;
+ /*
+ For this record to be of any use for Recovery, we need the upper
+ MySQL layer to be crash-safe, which it is not now (that would require
+ work using the ddl_log of sql/sql_table.cc); when it is, we should
+ reconsider the moment of writing this log record (before or after op,
+ under THR_LOCK_maria or not...), how to use it in Recovery.
+ For now it can serve to apply logs to a backup so we sync it.
+ */
+ if (unlikely(translog_write_record(&lsn, LOGREC_REDO_RENAME_TABLE,
+ &dummy_transaction_object, NULL,
+ old_name_len + new_name_len,
+ sizeof(log_array)/sizeof(log_array[0]),
+ log_array, NULL, NULL) ||
+ translog_flush(lsn)))
+ {
+ maria_close(info);
+ DBUG_RETURN(1);
+ }
+ /*
+ store LSN into file, needed for Recovery to not be confused if a
+ RENAME happened (applying REDOs to the wrong table).
+ */
+ if (_ma_update_create_rename_lsn(share, lsn, TRUE))
+ {
+ maria_close(info);
+ DBUG_RETURN(1);
+ }
+ }
+
+ maria_close(info);
+#ifdef USE_RAID
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(old_name,"rename raidcheck");
+#endif
+#endif /* USE_RAID */
+
+ fn_format(from,old_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ fn_format(to,new_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ if (my_rename_with_symlink(from, to, MYF(MY_WME | sync_dir)))
+ DBUG_RETURN(my_errno);
+ fn_format(from,old_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ fn_format(to,new_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+#ifdef USE_RAID
+ if (raid_type)
+ data_file_rename_error= my_raid_rename(from, to, raid_chunks,
+ MYF(MY_WME | sync_dir));
+ else
+#endif
+ data_file_rename_error=
+ my_rename_with_symlink(from, to, MYF(MY_WME | sync_dir));
+ if (data_file_rename_error)
+ {
+ /*
+ now we have a renamed index file and a non-renamed data file, try to
+ undo the rename of the index file.
+ */
+ data_file_rename_error= my_errno;
+ fn_format(from, old_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT));
+ fn_format(to, new_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT));
+ my_rename_with_symlink(to, from, MYF(MY_WME | sync_dir));
+ }
+ DBUG_RETURN(data_file_rename_error);
+
+}
diff --git a/storage/maria/ma_rfirst.c b/storage/maria/ma_rfirst.c
new file mode 100644
index 00000000000..226aaa551f0
--- /dev/null
+++ b/storage/maria/ma_rfirst.c
@@ -0,0 +1,26 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+ /* Read first row through a specfic key */
+
+int maria_rfirst(MARIA_HA *info, uchar *buf, int inx)
+{
+ DBUG_ENTER("maria_rfirst");
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->update|= HA_STATE_PREV_FOUND;
+ DBUG_RETURN(maria_rnext(info,buf,inx));
+} /* maria_rfirst */
diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c
new file mode 100644
index 00000000000..e6033cdc4a8
--- /dev/null
+++ b/storage/maria/ma_rkey.c
@@ -0,0 +1,199 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read record based on a key */
+
+#include "maria_def.h"
+#include "ma_rt_index.h"
+
+ /* Read a record using key */
+ /* Ordinary search_flag is 0 ; Give error if no record with key */
+
+int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key,
+ key_part_map keypart_map, enum ha_rkey_function search_flag)
+{
+ uchar *key_buff;
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *keyinfo;
+ HA_KEYSEG *last_used_keyseg;
+ uint pack_key_length, use_key_length, nextflag;
+ DBUG_ENTER("maria_rkey");
+ DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
+ (long) info, (long) buf, inx, search_flag));
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(my_errno);
+
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->last_key_func= search_flag;
+ keyinfo= share->keyinfo + inx;
+
+ if (info->once_flags & USE_PACKED_KEYS)
+ {
+ info->once_flags&= ~USE_PACKED_KEYS; /* Reset flag */
+ /*
+ key is already packed!; This happens when we are using a MERGE TABLE
+ In this key 'key_part_map' is the length of the key !
+ */
+ key_buff= info->lastkey+info->s->base.max_key_length;
+ pack_key_length= keypart_map;
+ bmove(key_buff, key, pack_key_length);
+ last_used_keyseg= info->s->keyinfo[inx].seg + info->last_used_keyseg;
+ }
+ else
+ {
+ DBUG_ASSERT(keypart_map);
+ /* Save the packed key for later use in the second buffer of lastkey. */
+ key_buff=info->lastkey+info->s->base.max_key_length;
+ pack_key_length= _ma_pack_key(info,(uint) inx, key_buff, key,
+ keypart_map, &last_used_keyseg);
+ /* Save packed_key_length for use by the MERGE engine. */
+ info->pack_key_length= pack_key_length;
+ info->last_used_keyseg= (uint16) (last_used_keyseg -
+ info->s->keyinfo[inx].seg);
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg,
+ key_buff, pack_key_length););
+ }
+
+ if (fast_ma_readinfo(info))
+ goto err;
+ if (share->concurrent_insert)
+ rw_rdlock(&share->key_root_lock[inx]);
+
+ nextflag=maria_read_vec[search_flag];
+ use_key_length=pack_key_length;
+ if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST)))
+ use_key_length=USE_WHOLE_KEY;
+
+ switch (info->s->keyinfo[inx].key_alg) {
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ if (maria_rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ }
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ if (!_ma_search(info, keyinfo, key_buff, use_key_length,
+ maria_read_vec[search_flag],
+ info->s->state.key_root[inx]) &&
+ share->concurrent_insert)
+ {
+ /*
+ Found a key, but it might not be usable. We cannot use rows that
+ are inserted by other threads after we got our table lock
+ ("concurrent inserts"). The record may not even be present yet.
+ Keys are inserted into the index(es) before the record is
+ inserted into the data file. When we got our table lock, we
+ saved the current data_file_length. Concurrent inserts always go
+ to the end of the file. So we can test if the found key
+ references a new record.
+ */
+ if (info->cur_row.lastpos >= info->state->data_file_length)
+ {
+ /* The key references a concurrently inserted record. */
+ if (search_flag == HA_READ_KEY_EXACT &&
+ last_used_keyseg == keyinfo->seg + keyinfo->keysegs)
+ {
+ /* Simply ignore the key if it matches exactly. (Bug #29838) */
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ }
+ else
+ {
+ /*
+ If searching for a partial key (or using >, >=, < or <=) and
+ the data is outside of the data file, we need to continue
+ searching for the first key inside the data file.
+ */
+ do
+ {
+ uint not_used[2];
+ /*
+ Skip rows that are inserted by other threads since we got
+ a lock. Note that this can only happen if we are not
+ searching after a full length exact key, because the keys
+ are sorted according to position.
+ */
+ if (_ma_search_next(info, keyinfo, info->lastkey,
+ info->lastkey_length,
+ maria_readnext_vec[search_flag],
+ info->s->state.key_root[inx]))
+ break; /* purecov: inspected */
+ /*
+ Check that the found key does still match the search.
+ _ma_search_next() delivers the next key regardless of its
+ value.
+ */
+ if (search_flag == HA_READ_KEY_EXACT &&
+ ha_key_cmp(keyinfo->seg, key_buff, info->lastkey,
+ use_key_length, SEARCH_FIND, not_used))
+ {
+ /* purecov: begin inspected */
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ break;
+ /* purecov: end */
+ }
+ } while (info->cur_row.lastpos >= info->state->data_file_length);
+ }
+ }
+ }
+ }
+ if (share->concurrent_insert)
+ rw_unlock(&share->key_root_lock[inx]);
+
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR)
+ {
+ fast_ma_writeinfo(info);
+ goto err;
+ }
+
+ /* Calculate length of the found key; Used by maria_rnext_same */
+ if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg)
+ info->last_rkey_length= _ma_keylength_part(keyinfo, info->lastkey,
+ last_used_keyseg);
+ else
+ info->last_rkey_length= pack_key_length;
+
+ /* Check if we don't want to have record back, only error message */
+ if (!buf)
+ {
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(0);
+ }
+ if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+
+ info->cur_row.lastpos= HA_OFFSET_ERROR; /* Didn't find row */
+
+err:
+ /* Store last used key as a base for read next */
+ memcpy(info->lastkey,key_buff,pack_key_length);
+ info->last_rkey_length= pack_key_length;
+ bzero((char*) info->lastkey+pack_key_length,info->s->base.rec_reflength);
+ info->lastkey_length=pack_key_length+info->s->base.rec_reflength;
+
+ if (search_flag == HA_READ_AFTER_KEY)
+ info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */
+ DBUG_RETURN(my_errno);
+} /* _ma_rkey */
diff --git a/storage/maria/ma_rlast.c b/storage/maria/ma_rlast.c
new file mode 100644
index 00000000000..a9a470d37d9
--- /dev/null
+++ b/storage/maria/ma_rlast.c
@@ -0,0 +1,26 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+ /* Read last row with the same key as the previous read. */
+
+int maria_rlast(MARIA_HA *info, uchar *buf, int inx)
+{
+ DBUG_ENTER("maria_rlast");
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->update|= HA_STATE_NEXT_FOUND;
+ DBUG_RETURN(maria_rprev(info,buf,inx));
+} /* maria_rlast */
diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c
new file mode 100644
index 00000000000..fcc0f1f6a90
--- /dev/null
+++ b/storage/maria/ma_rnext.c
@@ -0,0 +1,122 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#include "ma_rt_index.h"
+
+ /*
+ Read next row with the same key as previous read
+ One may have done a write, update or delete of the previous row.
+ NOTE! Even if one changes the previous row, the next read is done
+ based on the position of the last used key!
+ */
+
+int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
+{
+ int error,changed;
+ uint flag;
+ DBUG_ENTER("maria_rnext");
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(my_errno);
+ flag=SEARCH_BIGGER; /* Read next */
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR &&
+ info->update & HA_STATE_PREV_FOUND)
+ flag=0; /* Read first */
+
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+ changed= _ma_test_if_changed(info);
+ if (!flag)
+ {
+ switch(info->s->keyinfo[inx].key_alg){
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ error=maria_rtree_get_first(info,inx,info->lastkey_length);
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ error= _ma_search_first(info,info->s->keyinfo+inx,
+ info->s->state.key_root[inx]);
+ break;
+ }
+ }
+ else
+ {
+ switch (info->s->keyinfo[inx].key_alg) {
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ /*
+ Note that rtree doesn't support that the table
+ may be changed since last call, so we do need
+ to skip rows inserted by other threads like in btree
+ */
+ error= maria_rtree_get_next(info,inx,info->lastkey_length);
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ if (!changed)
+ error= _ma_search_next(info,info->s->keyinfo+inx,info->lastkey,
+ info->lastkey_length,flag,
+ info->s->state.key_root[inx]);
+ else
+ error= _ma_search(info,info->s->keyinfo+inx,info->lastkey,
+ USE_WHOLE_KEY,flag, info->s->state.key_root[inx]);
+ }
+ }
+
+ if (info->s->concurrent_insert)
+ {
+ if (!error)
+ {
+ while (info->cur_row.lastpos >= info->state->data_file_length)
+ {
+ /* Skip rows inserted by other threads since we got a lock */
+ if ((error= _ma_search_next(info,info->s->keyinfo+inx,
+ info->lastkey,
+ info->lastkey_length,
+ SEARCH_BIGGER,
+ info->s->state.key_root[inx])))
+ break;
+ }
+ }
+ rw_unlock(&info->s->key_root_lock[inx]);
+ }
+ /* Don't clear if database-changed */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->update|= HA_STATE_NEXT_FOUND;
+
+ if (error)
+ {
+ if (my_errno == HA_ERR_KEY_NOT_FOUND)
+ my_errno=HA_ERR_END_OF_FILE;
+ }
+ else if (!buf)
+ {
+ DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ }
+ else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("error",("Got error: %d, errno: %d",error, my_errno));
+ DBUG_RETURN(my_errno);
+} /* maria_rnext */
diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c
new file mode 100644
index 00000000000..6782cf5b8cf
--- /dev/null
+++ b/storage/maria/ma_rnext_same.c
@@ -0,0 +1,107 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#include "ma_rt_index.h"
+
+/*
+ Read next row with the same key as previous read, but abort if
+ the key changes.
+ One may have done a write, update or delete of the previous row.
+
+ NOTE! Even if one changes the previous row, the next read is done
+ based on the position of the last used key!
+*/
+
+int maria_rnext_same(MARIA_HA *info, uchar *buf)
+{
+ int error;
+ uint inx,not_used[2];
+ MARIA_KEYDEF *keyinfo;
+ DBUG_ENTER("maria_rnext_same");
+
+ if ((int) (inx= info->lastinx) < 0 ||
+ info->cur_row.lastpos == HA_OFFSET_ERROR)
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
+ keyinfo= info->s->keyinfo+inx;
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+
+ switch (keyinfo->key_alg)
+ {
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ if ((error=maria_rtree_find_next(info,inx,
+ maria_read_vec[info->last_key_func])))
+ {
+ error=1;
+ my_errno=HA_ERR_END_OF_FILE;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ break;
+ }
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ if (!(info->update & HA_STATE_RNEXT_SAME))
+ {
+ /* First rnext_same; Store old key */
+ memcpy(info->lastkey2,info->lastkey,info->last_rkey_length);
+ }
+ for (;;)
+ {
+ if ((error= _ma_search_next(info,keyinfo,info->lastkey,
+ info->lastkey_length,SEARCH_BIGGER,
+ info->s->state.key_root[inx])))
+ break;
+ if (ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey,
+ (uchar*) info->lastkey2,
+ info->last_rkey_length, SEARCH_FIND, not_used))
+ {
+ error=1;
+ my_errno=HA_ERR_END_OF_FILE;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ break;
+ }
+ /* Skip rows that are inserted by other threads since we got a lock */
+ if (info->cur_row.lastpos < info->state->data_file_length)
+ break;
+ }
+ }
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->key_root_lock[inx]);
+ /* Don't clear if database-changed */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME;
+
+ if (error)
+ {
+ if (my_errno == HA_ERR_KEY_NOT_FOUND)
+ my_errno=HA_ERR_END_OF_FILE;
+ }
+ else if (!buf)
+ {
+ DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ }
+ else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(my_errno);
+} /* maria_rnext_same */
diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c
new file mode 100644
index 00000000000..c41c88c154b
--- /dev/null
+++ b/storage/maria/ma_rprev.c
@@ -0,0 +1,88 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+ /*
+ Read previous row with the same key as previous read
+ One may have done a write, update or delete of the previous row.
+ NOTE! Even if one changes the previous row, the next read is done
+ based on the position of the last used key!
+ */
+
+int maria_rprev(MARIA_HA *info, uchar *buf, int inx)
+{
+ int error,changed;
+ register uint flag;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_rprev");
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(my_errno);
+ flag=SEARCH_SMALLER; /* Read previous */
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR &&
+ info->update & HA_STATE_NEXT_FOUND)
+ flag=0; /* Read last */
+
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+ changed= _ma_test_if_changed(info);
+ if (share->concurrent_insert)
+ rw_rdlock(&share->key_root_lock[inx]);
+ if (!flag)
+ error= _ma_search_last(info, share->keyinfo+inx,
+ share->state.key_root[inx]);
+ else if (!changed)
+ error= _ma_search_next(info,share->keyinfo+inx,info->lastkey,
+ info->lastkey_length,flag,
+ share->state.key_root[inx]);
+ else
+ error= _ma_search(info,share->keyinfo+inx,info->lastkey,
+ USE_WHOLE_KEY, flag, share->state.key_root[inx]);
+
+ if (share->concurrent_insert)
+ {
+ if (!error)
+ {
+ while (info->cur_row.lastpos >= info->state->data_file_length)
+ {
+ /* Skip rows that are inserted by other threads since we got a lock */
+ if ((error= _ma_search_next(info,share->keyinfo+inx,info->lastkey,
+ info->lastkey_length,
+ SEARCH_SMALLER,
+ share->state.key_root[inx])))
+ break;
+ }
+ }
+ rw_unlock(&share->key_root_lock[inx]);
+ }
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->update|= HA_STATE_PREV_FOUND;
+ if (error)
+ {
+ if (my_errno == HA_ERR_KEY_NOT_FOUND)
+ my_errno=HA_ERR_END_OF_FILE;
+ }
+ else if (!buf)
+ {
+ DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ }
+ else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(my_errno);
+} /* maria_rprev */
diff --git a/storage/maria/ma_rrnd.c b/storage/maria/ma_rrnd.c
new file mode 100644
index 00000000000..24c4bfdd467
--- /dev/null
+++ b/storage/maria/ma_rrnd.c
@@ -0,0 +1,44 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read a record with random-access. The position to the record must
+ get by MARIA_HA. The next record can be read with pos= MARIA_POS_ERROR */
+
+
+#include "maria_def.h"
+
+/*
+ Read a row based on position.
+
+ RETURN
+ 0 Ok.
+ HA_ERR_RECORD_DELETED Record is deleted.
+ HA_ERR_END_OF_FILE EOF.
+*/
+
+int maria_rrnd(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
+{
+ DBUG_ENTER("maria_rrnd");
+
+ DBUG_ASSERT(filepos != HA_OFFSET_ERROR);
+
+ /* Init all but update-flag */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(my_errno);
+
+ info->cur_row.lastpos= filepos; /* Remember for update */
+ DBUG_RETURN((*info->s->read_record)(info, buf, filepos));
+}
diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c
new file mode 100644
index 00000000000..9c9acac013a
--- /dev/null
+++ b/storage/maria/ma_rsame.c
@@ -0,0 +1,69 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+/*
+ Find current row with read on position or read on key
+
+ NOTES
+ If inx >= 0 find record using key
+
+ RETURN
+ 0 Ok
+ HA_ERR_KEY_NOT_FOUND Row is deleted
+ HA_ERR_END_OF_FILE End of file
+*/
+
+
+int maria_rsame(MARIA_HA *info, uchar *record, int inx)
+{
+ DBUG_ENTER("maria_rsame");
+
+ if (inx != -1 && ! maria_is_key_active(info->s->state.key_map, inx))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
+ }
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR ||
+ info->update & HA_STATE_DELETED)
+ {
+ DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No current record */
+ }
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ /* Read row from data file */
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+
+ if (inx >= 0)
+ {
+ info->lastinx=inx;
+ info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record,
+ info->cur_row.lastpos);
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+ VOID(_ma_search(info,info->s->keyinfo+inx,info->lastkey, USE_WHOLE_KEY,
+ SEARCH_SAME,
+ info->s->state.key_root[inx]));
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->key_root_lock[inx]);
+ }
+
+ if (!(*info->read_record)(info, record, info->cur_row.lastpos))
+ DBUG_RETURN(0);
+ if (my_errno == HA_ERR_RECORD_DELETED)
+ my_errno=HA_ERR_KEY_NOT_FOUND;
+ DBUG_RETURN(my_errno);
+} /* maria_rsame */
diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c
new file mode 100644
index 00000000000..186bc80c06d
--- /dev/null
+++ b/storage/maria/ma_rsamepos.c
@@ -0,0 +1,58 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* read record through position and fix key-position */
+/* As maria_rsame but supply a position */
+
+#include "maria_def.h"
+
+
+ /*
+ ** If inx >= 0 update index pointer
+ ** Returns one of the following values:
+ ** 0 = Ok.
+ ** HA_ERR_KEY_NOT_FOUND = Row is deleted
+ ** HA_ERR_END_OF_FILE = End of file
+ */
+
+int maria_rsame_with_pos(MARIA_HA *info, uchar *record, int inx,
+ MARIA_RECORD_POS filepos)
+{
+ DBUG_ENTER("maria_rsame_with_pos");
+ DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos));
+
+ if (inx < -1 ||
+ (inx >= 0 && ! maria_is_key_active(info->s->state.key_map, inx)))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
+ }
+
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ if ((*info->s->read_record)(info, record, filepos))
+ {
+ if (my_errno == HA_ERR_RECORD_DELETED)
+ my_errno=HA_ERR_KEY_NOT_FOUND;
+ DBUG_RETURN(my_errno);
+ }
+ info->cur_row.lastpos= filepos;
+ info->lastinx= inx;
+ if (inx >= 0)
+ {
+ info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record,
+ info->cur_row.lastpos);
+ info->update|=HA_STATE_KEY_CHANGED; /* Don't use indexposition */
+ }
+ DBUG_RETURN(0);
+} /* maria_rsame_pos */
diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c
new file mode 100644
index 00000000000..29a9bab4f72
--- /dev/null
+++ b/storage/maria/ma_rt_index.c
@@ -0,0 +1,1196 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+#include "ma_rt_key.h"
+#include "ma_rt_mbr.h"
+
+#define REINSERT_BUFFER_INC 10
+#define PICK_BY_AREA
+/*#define PICK_BY_PERIMETER*/
+
+typedef struct st_page_level
+{
+ uint level;
+ my_off_t offs;
+} stPageLevel;
+
+typedef struct st_page_list
+{
+ ulong n_pages;
+ ulong m_pages;
+ stPageLevel *pages;
+} stPageList;
+
+
+/*
+ Find next key in r-tree according to search_flag recursively
+
+ NOTES
+ Used in maria_rtree_find_first() and maria_rtree_find_next()
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uint search_flag,
+ uint nod_cmp_flag, my_off_t page, int level)
+{
+ MARIA_SHARE *share= info->s;
+ uint nod_flag;
+ int res;
+ uchar *page_buf, *k, *last;
+ int k_len;
+ uint *saved_key= (uint*) (info->maria_rtree_recursion_state) + level;
+
+ if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM;
+ return -1;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, page_buf, 0, 0))
+ goto err1;
+ nod_flag= _ma_test_if_nod(share, page_buf);
+
+ k_len= keyinfo->keylength - share->base.rec_reflength;
+
+ if (info->maria_rtree_recursion_depth >= level)
+ {
+ k= page_buf + *saved_key;
+ }
+ else
+ {
+ k= rt_PAGE_FIRST_KEY(share, page_buf, nod_flag);
+ }
+ last= rt_PAGE_END(share, page_buf);
+
+ for (; k < last; k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag))
+ {
+ if (nod_flag)
+ {
+ /* this is an internal node in the tree */
+ if (!(res= maria_rtree_key_cmp(keyinfo->seg,
+ info->first_mbr_key, k,
+ info->last_rkey_length, nod_cmp_flag)))
+ {
+ switch ((res= maria_rtree_find_req(info, keyinfo, search_flag,
+ nod_cmp_flag,
+ _ma_kpos(nod_flag, k),
+ level + 1)))
+ {
+ case 0: /* found - exit from recursion */
+ *saved_key= k - page_buf;
+ goto ok;
+ case 1: /* not found - continue searching */
+ info->maria_rtree_recursion_depth= level;
+ break;
+ default: /* error */
+ case -1:
+ goto err1;
+ }
+ }
+ }
+ else
+ {
+ /* this is a leaf */
+ if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key,
+ k, info->last_rkey_length, search_flag))
+ {
+ uchar *after_key= (uchar*) rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag);
+ info->cur_row.lastpos= _ma_dpos(info, 0, after_key);
+ info->lastkey_length= k_len + share->base.rec_reflength;
+ memcpy(info->lastkey, k, info->lastkey_length);
+ info->maria_rtree_recursion_depth= level;
+ *saved_key= last - page_buf;
+
+ if (after_key < last)
+ {
+ info->int_keypos= info->buff;
+ info->int_maxpos= info->buff + (last - after_key);
+ memcpy(info->buff, after_key, last - after_key);
+ info->keyread_buff_used= 0;
+ }
+ else
+ {
+ info->keyread_buff_used= 1;
+ }
+
+ res= 0;
+ goto ok;
+ }
+ }
+ }
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ res= 1;
+
+ok:
+ my_afree((uchar*)page_buf);
+ return res;
+
+err1:
+ my_afree((uchar*)page_buf);
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ return -1;
+}
+
+
+/*
+ Find first key in r-tree according to search_flag condition
+
+ SYNOPSIS
+ maria_rtree_find_first()
+ info Handler to MARIA file
+ uint keynr Key number to use
+ key Key to search for
+ key_length Length of 'key'
+ search_flag Bitmap of flags how to do the search
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length, uint search_flag)
+{
+ my_off_t root;
+ uint nod_cmp_flag;
+ MARIA_KEYDEF *keyinfo= info->s->keyinfo + keynr;
+
+ if ((root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ /*
+ Save searched key, include data pointer.
+ The data pointer is required if the search_flag contains MBR_DATA.
+ (minimum bounding rectangle)
+ */
+ memcpy(info->first_mbr_key, key, keyinfo->keylength);
+ info->last_rkey_length= key_length;
+
+ info->maria_rtree_recursion_depth= -1;
+ info->keyread_buff_used= 1;
+
+ nod_cmp_flag= ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ?
+ MBR_WITHIN : MBR_INTERSECT);
+ return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root,
+ 0);
+}
+
+
+/*
+ Find next key in r-tree according to search_flag condition
+
+ SYNOPSIS
+ maria_rtree_find_next()
+ info Handler to MARIA file
+ uint keynr Key number to use
+ search_flag Bitmap of flags how to do the search
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag)
+{
+ my_off_t root;
+ uint nod_cmp_flag;
+ MARIA_KEYDEF *keyinfo= info->s->keyinfo + keynr;
+
+ if (info->update & HA_STATE_DELETED)
+ return maria_rtree_find_first(info, keynr, info->lastkey,
+ info->lastkey_length,
+ search_flag);
+
+ if (!info->keyread_buff_used)
+ {
+ uchar *key= info->int_keypos;
+
+ while (key < info->int_maxpos)
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg,
+ info->first_mbr_key, key,
+ info->last_rkey_length, search_flag))
+ {
+ uchar *after_key= key + keyinfo->keylength;
+
+ info->cur_row.lastpos= _ma_dpos(info, 0, after_key);
+ memcpy(info->lastkey, key, info->lastkey_length);
+
+ if (after_key < info->int_maxpos)
+ info->int_keypos= after_key;
+ else
+ info->keyread_buff_used= 1;
+ return 0;
+ }
+ key+= keyinfo->keylength;
+ }
+ }
+ if ((root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ nod_cmp_flag= ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ?
+ MBR_WITHIN : MBR_INTERSECT);
+ return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0);
+}
+
+
+/*
+ Get next key in r-tree recursively
+
+ NOTES
+ Used in maria_rtree_get_first() and maria_rtree_get_next()
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uint key_length, my_off_t page, int level)
+{
+ MARIA_SHARE *share= info->s;
+ uchar *page_buf, *last, *k;
+ uint nod_flag, k_len;
+ int res;
+ uint *saved_key= (uint*) (info->maria_rtree_recursion_state) + level;
+
+ if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ return -1;
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, page_buf, 0, 0))
+ goto err1;
+ nod_flag= _ma_test_if_nod(share, page_buf);
+
+ k_len= keyinfo->keylength - share->base.rec_reflength;
+
+ if(info->maria_rtree_recursion_depth >= level)
+ {
+ k= page_buf + *saved_key;
+ if (!nod_flag)
+ {
+ /* Only leaf pages contain data references. */
+ /* Need to check next key with data reference. */
+ k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag);
+ }
+ }
+ else
+ {
+ k= rt_PAGE_FIRST_KEY(share, page_buf, nod_flag);
+ }
+ last= rt_PAGE_END(share, page_buf);
+
+ for (; k < last; k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag))
+ {
+ if (nod_flag)
+ {
+ /* this is an internal node in the tree */
+ switch ((res= maria_rtree_get_req(info, keyinfo, key_length,
+ _ma_kpos(nod_flag, k), level + 1)))
+ {
+ case 0: /* found - exit from recursion */
+ *saved_key= k - page_buf;
+ goto ok;
+ case 1: /* not found - continue searching */
+ info->maria_rtree_recursion_depth= level;
+ break;
+ default:
+ case -1: /* error */
+ goto err1;
+ }
+ }
+ else
+ {
+ /* this is a leaf */
+ uchar *after_key= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag);
+ info->cur_row.lastpos= _ma_dpos(info, 0, after_key);
+ info->lastkey_length= k_len + share->base.rec_reflength;
+ memcpy(info->lastkey, k, info->lastkey_length);
+
+ info->maria_rtree_recursion_depth= level;
+ *saved_key= k - page_buf;
+
+ if (after_key < last)
+ {
+ info->int_keypos= (uchar*) saved_key;
+ memcpy(info->buff, page_buf, keyinfo->block_length);
+ info->int_maxpos= rt_PAGE_END(share, info->buff);
+ info->keyread_buff_used= 0;
+ }
+ else
+ {
+ info->keyread_buff_used= 1;
+ }
+
+ res= 0;
+ goto ok;
+ }
+ }
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ res= 1;
+
+ok:
+ my_afree((uchar*)page_buf);
+ return res;
+
+err1:
+ my_afree((uchar*)page_buf);
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ return -1;
+}
+
+
+/*
+ Get first key in r-tree
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length)
+{
+ my_off_t root;
+ MARIA_KEYDEF *keyinfo= info->s->keyinfo + keynr;
+
+ if ((root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ info->maria_rtree_recursion_depth= -1;
+ info->keyread_buff_used= 1;
+
+ return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0);
+}
+
+
+/*
+ Get next key in r-tree
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length)
+{
+ my_off_t root;
+ MARIA_KEYDEF *keyinfo= info->s->keyinfo + keynr;
+
+ if (!info->keyread_buff_used)
+ {
+ uint k_len= keyinfo->keylength - info->s->base.rec_reflength;
+ /* rt_PAGE_NEXT_KEY(info->int_keypos) */
+ uchar *key= info->buff + *(int*)info->int_keypos + k_len +
+ info->s->base.rec_reflength;
+ /* rt_PAGE_NEXT_KEY(key) */
+ uchar *after_key= key + k_len + info->s->base.rec_reflength;
+
+ info->cur_row.lastpos= _ma_dpos(info, 0, after_key);
+ info->lastkey_length= k_len + info->s->base.rec_reflength;
+ memcpy(info->lastkey, key, k_len + info->s->base.rec_reflength);
+
+ *(int*)info->int_keypos= key - info->buff;
+ if (after_key >= info->int_maxpos)
+ {
+ info->keyread_buff_used= 1;
+ }
+
+ return 0;
+ }
+ else
+ {
+ if ((root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0);
+ }
+}
+
+
+/*
+ Choose non-leaf better key for insertion
+*/
+
+#ifdef PICK_BY_PERIMETER
+static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key,
+ uint key_length, uchar *page_buf,
+ uint nod_flag)
+{
+ double increase;
+ double best_incr= DBL_MAX;
+ double perimeter;
+ double best_perimeter;
+ uchar *best_key;
+ uchar *k= rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ uchar *last= rt_PAGE_END(info, page_buf);
+
+ LINT_INIT(best_perimeter);
+ LINT_INIT(best_key);
+
+ for (; k < last; k= rt_PAGE_NEXT_KEY(k, key_length, nod_flag))
+ {
+ if ((increase= maria_rtree_perimeter_increase(keyinfo->seg, k, key, key_length,
+ &perimeter)) == -1)
+ return NULL;
+ if ((increase < best_incr)||
+ (increase == best_incr && perimeter < best_perimeter))
+ {
+ best_key= k;
+ best_perimeter= perimeter;
+ best_incr= increase;
+ }
+ }
+ return best_key;
+}
+
+#endif /*PICK_BY_PERIMETER*/
+
+#ifdef PICK_BY_AREA
+static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key,
+ uint key_length, uchar *page_buf,
+ uint nod_flag)
+{
+ MARIA_SHARE *share= info->s;
+ double increase;
+ double best_incr= DBL_MAX;
+ double area;
+ double best_area;
+ uchar *best_key;
+ uchar *k= rt_PAGE_FIRST_KEY(share, page_buf, nod_flag);
+ uchar *last= rt_PAGE_END(share, page_buf);
+
+ LINT_INIT(best_area);
+ LINT_INIT(best_key);
+
+ for (; k < last; k= rt_PAGE_NEXT_KEY(share, k, key_length, nod_flag))
+ {
+ /* The following is safe as -1.0 is an exact number */
+ if ((increase= maria_rtree_area_increase(keyinfo->seg, k, key, key_length,
+ &area)) == -1.0)
+ return NULL;
+ /* The following should be safe, even if we compare doubles */
+ if (increase < best_incr)
+ {
+ best_key= k;
+ best_area= area;
+ best_incr= increase;
+ }
+ else
+ {
+ /* The following should be safe, even if we compare doubles */
+ if ((increase == best_incr) && (area < best_area))
+ {
+ best_key= k;
+ best_area= area;
+ best_incr= increase;
+ }
+ }
+ }
+ return best_key;
+}
+
+#endif /*PICK_BY_AREA*/
+
+/*
+ Go down and insert key into tree
+
+ RETURN
+ -1 Error
+ 0 Child was not split
+ 1 Child was split
+*/
+
+static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key,
+ uint key_length, my_off_t page,
+ my_off_t *new_page,
+ int ins_level, int level)
+{
+ uint nod_flag;
+ int res;
+ uchar *page_buf, *k;
+ MARIA_PINNED_PAGE *page_link;
+ DBUG_ENTER("maria_rtree_insert_req");
+
+ if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length +
+ HA_MAX_KEY_BUFF)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM;
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, page_buf, 0, &page_link))
+ goto err1;
+ nod_flag= _ma_test_if_nod(info->s, page_buf);
+ DBUG_PRINT("rtree", ("page: %lu level: %d ins_level: %d nod_flag: %u",
+ (ulong) page, level, ins_level, nod_flag));
+
+ if ((ins_level == -1 && nod_flag) || /* key: go down to leaf */
+ (ins_level > -1 && ins_level > level)) /* branch: go down to ins_level */
+ {
+ if ((k= maria_rtree_pick_key(info, keyinfo, key, key_length, page_buf,
+ nod_flag)) == NULL)
+ goto err1;
+ switch ((res= maria_rtree_insert_req(info, keyinfo, key, key_length,
+ _ma_kpos(nod_flag, k), new_page,
+ ins_level, level + 1)))
+ {
+ case 0: /* child was not split */
+ {
+ maria_rtree_combine_rect(keyinfo->seg, k, key, k, key_length);
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ goto ok;
+ }
+ case 1: /* child was split */
+ {
+ uchar *new_key= page_buf + keyinfo->block_length + nod_flag;
+ /* set proper MBR for key */
+ if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length,
+ _ma_kpos(nod_flag, k)))
+ goto err1;
+ /* add new key for new page */
+ _ma_kpointer(info, new_key - nod_flag, *new_page);
+ if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length,
+ *new_page))
+ goto err1;
+ res= maria_rtree_add_key(info, keyinfo, new_key, key_length,
+ page_buf, new_page);
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ goto ok;
+ }
+ default:
+ case -1: /* error */
+ {
+ goto err1;
+ }
+ }
+ }
+ else
+ {
+ res= maria_rtree_add_key(info, keyinfo, key, key_length, page_buf,
+ new_page);
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ }
+
+ok:
+ my_afree(page_buf);
+ DBUG_RETURN(res);
+
+err1:
+ my_afree(page_buf);
+ DBUG_RETURN(-1); /* purecov: inspected */
+}
+
+
+/*
+ Insert key into the tree
+
+ RETURN
+ -1 Error
+ 0 Root was not split
+ 1 Root was split
+*/
+
+static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length, int ins_level)
+{
+ my_off_t old_root;
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *keyinfo= share->keyinfo + keynr;
+ int res;
+ my_off_t new_page;
+ MARIA_PINNED_PAGE *page_link;
+ DBUG_ENTER("maria_rtree_insert_level");
+
+ if ((old_root= share->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ MARIA_PINNED_PAGE tmp_page_link;
+ page_link= &tmp_page_link;
+ if ((old_root= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
+ HA_OFFSET_ERROR)
+ DBUG_RETURN(-1);
+ info->keyread_buff_used= 1;
+ bzero(info->buff, share->keypage_header);
+ _ma_store_keynr(share, info->buff, keynr);
+ _ma_store_page_used(share, info->buff, share->keypage_header);
+
+ res= maria_rtree_add_key(info, keyinfo, key, key_length, info->buff,
+ NULL);
+ if (_ma_write_keypage(info, keyinfo, old_root,
+ page_link->write_lock,
+ DFLT_INIT_HITS, info->buff))
+ DBUG_RETURN(1);
+ share->state.key_root[keynr]= old_root;
+ DBUG_RETURN(res);
+ }
+
+ switch ((res= maria_rtree_insert_req(info, keyinfo, key, key_length,
+ old_root, &new_page, ins_level, 0)))
+ {
+ case 0: /* root was not split */
+ {
+ break;
+ }
+ case 1: /* root was split, grow a new root */
+ {
+ uchar *new_root_buf, *new_key;
+ my_off_t new_root;
+ uint nod_flag= share->base.key_reflength;
+ MARIA_PINNED_PAGE tmp_page_link;
+ page_link= &tmp_page_link;
+
+ DBUG_PRINT("rtree", ("root was split, grow a new root"));
+ if (!(new_root_buf= (uchar*) my_alloca((uint)keyinfo->block_length +
+ HA_MAX_KEY_BUFF)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM;
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+
+ bzero(new_root_buf, share->keypage_header);
+ if (nod_flag)
+ _ma_store_keypage_flag(share, new_root_buf, KEYPAGE_FLAG_ISNOD);
+ _ma_store_keynr(share, new_root_buf, keynr);
+ _ma_store_page_used(share, new_root_buf, share->keypage_header);
+ if ((new_root= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
+ HA_OFFSET_ERROR)
+ goto err1;
+
+ new_key= new_root_buf + keyinfo->block_length + nod_flag;
+
+ _ma_kpointer(info, new_key - nod_flag, old_root);
+ if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length,
+ old_root))
+ goto err1;
+ if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf,
+ NULL)
+ == -1)
+ goto err1;
+ _ma_kpointer(info, new_key - nod_flag, new_page);
+ if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length,
+ new_page))
+ goto err1;
+ if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf,
+ NULL)
+ == -1)
+ goto err1;
+ if (_ma_write_keypage(info, keyinfo, new_root, page_link->write_lock,
+ DFLT_INIT_HITS, new_root_buf))
+ goto err1;
+ share->state.key_root[keynr]= new_root;
+ DBUG_PRINT("rtree", ("new root page: %lu level: %d nod_flag: %u",
+ (ulong) new_root, 0,
+ _ma_test_if_nod(share, new_root_buf)));
+
+ my_afree((uchar*)new_root_buf);
+ break;
+err1:
+ my_afree((uchar*)new_root_buf);
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ default:
+ case -1: /* error */
+ {
+ break;
+ }
+ }
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Insert key into the tree - interface function
+
+ RETURN
+ -1 Error
+ 0 OK
+*/
+
+int maria_rtree_insert(MARIA_HA *info, uint keynr, uchar *key, uint key_length)
+{
+ DBUG_ENTER("maria_rtree_insert");
+ DBUG_RETURN((!key_length ||
+ (maria_rtree_insert_level(info, keynr, key, key_length, -1) == -1)) ?
+ -1 : 0);
+}
+
+
+/*
+ Fill reinsert page buffer
+
+ RETURN
+ -1 Error
+ 0 OK
+*/
+
+static int maria_rtree_fill_reinsert_list(stPageList *ReinsertList, my_off_t page,
+ int level)
+{
+ DBUG_ENTER("maria_rtree_fill_reinsert_list");
+ DBUG_PRINT("rtree", ("page: %lu level: %d", (ulong) page, level));
+ if (ReinsertList->n_pages == ReinsertList->m_pages)
+ {
+ ReinsertList->m_pages += REINSERT_BUFFER_INC;
+ if (!(ReinsertList->pages= (stPageLevel*)my_realloc((uchar*)ReinsertList->pages,
+ ReinsertList->m_pages * sizeof(stPageLevel), MYF(MY_ALLOW_ZERO_PTR))))
+ goto err1;
+ }
+ /* save page to ReinsertList */
+ ReinsertList->pages[ReinsertList->n_pages].offs= page;
+ ReinsertList->pages[ReinsertList->n_pages].level= level;
+ ReinsertList->n_pages++;
+ DBUG_RETURN(0);
+
+err1:
+ DBUG_RETURN(-1); /* purecov: inspected */
+}
+
+
+/*
+ Go down and delete key from the tree
+
+ RETURN
+ -1 Error
+ 0 Deleted
+ 1 Not found
+ 2 Empty leaf
+*/
+
+static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key,
+ uint key_length, my_off_t page,
+ uint *page_size,
+ stPageList *ReinsertList, int level)
+{
+ ulong i;
+ uint nod_flag;
+ int res;
+ uchar *page_buf, *last, *k;
+ MARIA_PINNED_PAGE *page_link;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_rtree_delete_req");
+
+ if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM;
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, page_buf, 0, &page_link))
+ goto err1;
+ nod_flag= _ma_test_if_nod(share, page_buf);
+ DBUG_PRINT("rtree", ("page: %lu level: %d nod_flag: %u",
+ (ulong) page, level, nod_flag));
+
+ k= rt_PAGE_FIRST_KEY(share, page_buf, nod_flag);
+ last= rt_PAGE_END(share, page_buf);
+
+ for (i= 0;
+ k < last;
+ k= rt_PAGE_NEXT_KEY(share, k, key_length, nod_flag), i++)
+ {
+ if (nod_flag)
+ {
+ /* not leaf */
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN))
+ {
+ switch ((res= maria_rtree_delete_req(info, keyinfo, key, key_length,
+ _ma_kpos(nod_flag, k), page_size, ReinsertList, level + 1)))
+ {
+ case 0: /* deleted */
+ {
+ /* test page filling */
+ if (*page_size + key_length >=
+ rt_PAGE_MIN_SIZE(keyinfo->block_length))
+ {
+ /* OK */
+ /* Calculate a new key value (MBR) for the shrinked block. */
+ if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length,
+ _ma_kpos(nod_flag, k)))
+ goto err1;
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ }
+ else
+ {
+ /*
+ Too small: delete key & add it descendant to reinsert list.
+ Store position and level of the block so that it can be
+ accessed later for inserting the remaining keys.
+ */
+ DBUG_PRINT("rtree", ("too small. move block to reinsert list"));
+ if (maria_rtree_fill_reinsert_list(ReinsertList,
+ _ma_kpos(nod_flag, k),
+ level + 1))
+ goto err1;
+ /*
+ Delete the key that references the block. This makes the
+ block disappear from the index. Hence we need to insert
+ its remaining keys later. Note: if the block is a branch
+ block, we do not only remove this block, but the whole
+ subtree. So we need to re-insert its keys on the same
+ level later to reintegrate the subtrees.
+ */
+ maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ *page_size= _ma_get_page_used(share, page_buf);
+ }
+
+ goto ok;
+ }
+ case 1: /* not found - continue searching */
+ {
+ break;
+ }
+ case 2: /* vacuous case: last key in the leaf */
+ {
+ maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ *page_size= _ma_get_page_used(share, page_buf);
+ res= 0;
+ goto ok;
+ }
+ default: /* error */
+ case -1:
+ {
+ goto err1;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* leaf */
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length,
+ MBR_EQUAL | MBR_DATA))
+ {
+ page_link->changed= 1;
+
+ maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ *page_size= _ma_get_page_used(share, page_buf);
+ if (*page_size == info->s->keypage_header)
+ {
+ /* last key in the leaf */
+ res= 2;
+ if (_ma_dispose(info, page, 0))
+ goto err1;
+ }
+ else
+ {
+ res= 0;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ }
+ goto ok;
+ }
+ }
+ }
+ res= 1;
+
+ok:
+ my_afree((uchar*)page_buf);
+ DBUG_RETURN(res);
+
+err1:
+ my_afree((uchar*)page_buf);
+ DBUG_RETURN(-1); /* purecov: inspected */
+}
+
+
+/*
+ Delete key - interface function
+
+ RETURN
+ -1 Error
+ 0 Deleted
+*/
+
+int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length)
+{
+ MARIA_SHARE *share= info->s;
+ uint page_size;
+ stPageList ReinsertList;
+ my_off_t old_root;
+ MARIA_KEYDEF *keyinfo= info->s->keyinfo + keynr;
+ MARIA_PINNED_PAGE *page_link, *root_page_link;
+ DBUG_ENTER("maria_rtree_delete");
+
+ if ((old_root= share->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ DBUG_PRINT("rtree", ("starting deletion at root page: %lu",
+ (ulong) old_root));
+
+ ReinsertList.pages= NULL;
+ ReinsertList.n_pages= 0;
+ ReinsertList.m_pages= 0;
+
+ switch (maria_rtree_delete_req(info, keyinfo, key, key_length, old_root,
+ &page_size, &ReinsertList, 0)) {
+ case 2: /* empty */
+ {
+ share->state.key_root[keynr]= HA_OFFSET_ERROR;
+ DBUG_RETURN(0);
+ }
+ case 0: /* deleted */
+ {
+ uint nod_flag;
+ ulong i;
+ for (i= 0; i < ReinsertList.n_pages; ++i)
+ {
+ uchar *page_buf, *k, *last;
+
+ if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM;
+ goto err1;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs,
+ PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, page_buf, 0, &page_link))
+ goto err1;
+ nod_flag= _ma_test_if_nod(share, page_buf);
+ DBUG_PRINT("rtree", ("reinserting keys from "
+ "page: %lu level: %d nod_flag: %u",
+ (ulong) ReinsertList.pages[i].offs,
+ ReinsertList.pages[i].level, nod_flag));
+
+ k= rt_PAGE_FIRST_KEY(share, page_buf, nod_flag);
+ last= rt_PAGE_END(share, page_buf);
+ for (; k < last; k= rt_PAGE_NEXT_KEY(share, k, key_length, nod_flag))
+ {
+ int res;
+ if ((res=
+ maria_rtree_insert_level(info, keynr, k, key_length,
+ ReinsertList.pages[i].level)) == -1)
+ {
+ my_afree(page_buf);
+ goto err1;
+ }
+ if (res)
+ {
+ ulong j;
+ DBUG_PRINT("rtree", ("root has been split, adjust levels"));
+ for (j= i; j < ReinsertList.n_pages; j++)
+ {
+ ReinsertList.pages[j].level++;
+ DBUG_PRINT("rtree", ("keys from page: %lu now level: %d",
+ (ulong) ReinsertList.pages[i].offs,
+ ReinsertList.pages[i].level));
+ }
+ }
+ }
+ my_afree(page_buf);
+ page_link->changed= 1;
+ if (_ma_dispose(info, ReinsertList.pages[i].offs, 0))
+ goto err1;
+ }
+ if (ReinsertList.pages)
+ my_free((uchar*) ReinsertList.pages, MYF(0));
+
+ /* check for redundant root (not leaf, 1 child) and eliminate */
+ if ((old_root= share->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ goto err1;
+ if (!_ma_fetch_keypage(info, keyinfo, old_root,
+ PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, info->buff, 0, &root_page_link))
+ goto err1;
+ nod_flag= _ma_test_if_nod(share, info->buff);
+ page_size= _ma_get_page_used(share, info->buff);
+ if (nod_flag && (page_size == share->keypage_header + key_length +
+ nod_flag))
+ {
+ my_off_t new_root= _ma_kpos(nod_flag,
+ rt_PAGE_FIRST_KEY(share, info->buff,
+ nod_flag));
+ root_page_link->changed= 1;
+ if (_ma_dispose(info, old_root, 0))
+ goto err1;
+ share->state.key_root[keynr]= new_root;
+ }
+ info->update= HA_STATE_DELETED;
+ DBUG_RETURN(0);
+
+err1:
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ case 1: /* not found */
+ {
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ default:
+ case -1: /* error */
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+}
+
+
+/*
+ Estimate number of suitable keys in the tree
+
+ RETURN
+ estimated value
+*/
+
+ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length, uint flag)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *keyinfo= share->keyinfo + keynr;
+ my_off_t root;
+ uint i= 0;
+ uint nod_flag, k_len;
+ uchar *page_buf, *k, *last;
+ double area= 0;
+ ha_rows res= 0;
+
+ if (flag & MBR_DISJOINT)
+ return info->state->records;
+
+ if ((root= share->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ return HA_POS_ERROR;
+ if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ return HA_POS_ERROR;
+ if (!_ma_fetch_keypage(info, keyinfo, root, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, page_buf, 0, 0))
+ goto err1;
+ nod_flag= _ma_test_if_nod(share, page_buf);
+
+ k_len= keyinfo->keylength - share->base.rec_reflength;
+
+ k= rt_PAGE_FIRST_KEY(share, page_buf, nod_flag);
+ last= rt_PAGE_END(share, page_buf);
+
+ for (; k < last; k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag), i++)
+ {
+ if (nod_flag)
+ {
+ double k_area= maria_rtree_rect_volume(keyinfo->seg, k, key_length);
+
+ /* The following should be safe, even if we compare doubles */
+ if (k_area == 0)
+ {
+ if (flag & (MBR_CONTAIN | MBR_INTERSECT))
+ {
+ area += 1;
+ }
+ else if (flag & (MBR_WITHIN | MBR_EQUAL))
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length,
+ MBR_WITHIN))
+ area += 1;
+ }
+ else
+ goto err1;
+ }
+ else
+ {
+ if (flag & (MBR_CONTAIN | MBR_INTERSECT))
+ {
+ area+= maria_rtree_overlapping_area(keyinfo->seg, key, k,
+ key_length) / k_area;
+ }
+ else if (flag & (MBR_WITHIN | MBR_EQUAL))
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length,
+ MBR_WITHIN))
+ area+= (maria_rtree_rect_volume(keyinfo->seg, key, key_length) /
+ k_area);
+ }
+ else
+ goto err1;
+ }
+ }
+ else
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, flag))
+ ++res;
+ }
+ }
+ if (nod_flag)
+ {
+ if (i)
+ res= (ha_rows) (area / i * info->state->records);
+ else
+ res= HA_POS_ERROR;
+ }
+
+ my_afree((uchar*)page_buf);
+ return res;
+
+err1:
+ my_afree(page_buf);
+ return HA_POS_ERROR;
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_index.h b/storage/maria/ma_rt_index.h
new file mode 100644
index 00000000000..999b41bc805
--- /dev/null
+++ b/storage/maria/ma_rt_index.h
@@ -0,0 +1,49 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _rt_index_h
+#define _rt_index_h
+
+#ifdef HAVE_RTREE_KEYS
+
+#define rt_PAGE_FIRST_KEY(share, page, nod_flag) (page + share->keypage_header + nod_flag)
+#define rt_PAGE_NEXT_KEY(share, key, key_length, nod_flag) (key + key_length +\
+ (nod_flag ? nod_flag : share->base.rec_reflength))
+#define rt_PAGE_END(share, page) (page + _ma_get_page_used(share, page))
+
+#define rt_PAGE_MIN_SIZE(block_length) ((uint)(block_length - KEYPAGE_CHECKSUM_SIZE) / 3)
+
+int maria_rtree_insert(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length);
+int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length);
+
+int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length, uint search_flag);
+int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag);
+
+int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length);
+int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length);
+
+ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length, uint flag);
+
+int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page,
+ uchar *key, uint key_length,
+ my_off_t *new_page_offs);
+
+#endif /*HAVE_RTREE_KEYS*/
+#endif /* _rt_index_h */
diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c
new file mode 100644
index 00000000000..a7b43406d4f
--- /dev/null
+++ b/storage/maria/ma_rt_key.c
@@ -0,0 +1,113 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+#include "ma_rt_index.h"
+#include "ma_rt_key.h"
+#include "ma_rt_mbr.h"
+
+/*
+ Add key to the page
+
+ RESULT VALUES
+ -1 Error
+ 0 Not split
+ 1 Split
+*/
+
+int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
+ uint key_length, uchar *page_buf, my_off_t *new_page)
+{
+ MARIA_SHARE *share= info->s;
+ uint page_size= _ma_get_page_used(share, page_buf);
+ uint nod_flag= _ma_test_if_nod(share, page_buf);
+ DBUG_ENTER("maria_rtree_add_key");
+
+ if (page_size + key_length + share->base.rec_reflength <=
+ keyinfo->block_length)
+ {
+ /* split won't be necessary */
+ if (nod_flag)
+ {
+ /* save key */
+ DBUG_ASSERT(_ma_kpos(nod_flag, key) < info->state->key_file_length);
+ memcpy(rt_PAGE_END(share, page_buf), key - nod_flag,
+ key_length + nod_flag);
+ page_size+= key_length + nod_flag;
+ }
+ else
+ {
+ /* save key */
+ DBUG_ASSERT(_ma_dpos(info, nod_flag, key + key_length +
+ share->base.rec_reflength) <
+ info->state->data_file_length +
+ share->base.pack_reclength);
+ memcpy(rt_PAGE_END(share, page_buf), key, key_length +
+ share->base.rec_reflength);
+ page_size+= key_length + share->base.rec_reflength;
+ }
+ _ma_store_page_used(share, page_buf, page_size);
+ DBUG_RETURN(0);
+ }
+
+ DBUG_RETURN(maria_rtree_split_page(info, keyinfo, page_buf, key, key_length,
+ new_page) ? -1 : 1);
+}
+
+
+/*
+ Delete key from the page
+*/
+
+int maria_rtree_delete_key(MARIA_HA *info, uchar *page_buf, uchar *key,
+ uint key_length, uint nod_flag)
+{
+ MARIA_SHARE *share= info->s;
+ uint16 page_size= _ma_get_page_used(share, page_buf);
+ uchar *key_start;
+
+ key_start= key - nod_flag;
+ if (!nod_flag)
+ key_length+= share->base.rec_reflength;
+
+ memmove(key_start, key + key_length, page_size - key_length -
+ (key - page_buf));
+ page_size-= key_length + nod_flag;
+
+ _ma_store_page_used(share, page_buf, page_size);
+ return 0;
+}
+
+
+/*
+ Calculate and store key MBR
+*/
+
+int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
+ uint key_length, my_off_t child_page)
+{
+ DBUG_ENTER("maria_rtree_set_key_mbr");
+ if (!_ma_fetch_keypage(info, keyinfo, child_page,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->buff, 0, 0))
+ DBUG_RETURN(-1);
+
+ DBUG_RETURN(maria_rtree_page_mbr(info, keyinfo->seg,
+ info->buff, key, key_length));
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_key.h b/storage/maria/ma_rt_key.h
new file mode 100644
index 00000000000..3f95d3d3e67
--- /dev/null
+++ b/storage/maria/ma_rt_key.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Ramil Kalimullin, who has a shared copyright to this code */
+
+#ifndef _rt_key_h
+#define _rt_key_h
+
+#ifdef HAVE_RTREE_KEYS
+
+int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
+ uint key_length, uchar *page_buf, my_off_t *new_page);
+int maria_rtree_delete_key(MARIA_HA *info, uchar *page, uchar *key,
+ uint key_length, uint nod_flag);
+int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
+ uint key_length, my_off_t child_page);
+
+#endif /*HAVE_RTREE_KEYS*/
+#endif /* _rt_key_h */
diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c
new file mode 100644
index 00000000000..dbde616401a
--- /dev/null
+++ b/storage/maria/ma_rt_mbr.c
@@ -0,0 +1,807 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+#include "ma_rt_mbr.h"
+
+#define INTERSECT_CMP(amin, amax, bmin, bmax) ((amin > bmax) || (bmin > amax))
+#define CONTAIN_CMP(amin, amax, bmin, bmax) ((bmin > amin) || (bmax < amax))
+#define WITHIN_CMP(amin, amax, bmin, bmax) ((amin > bmin) || (amax < bmax))
+#define DISJOINT_CMP(amin, amax, bmin, bmax) ((amin <= bmax) && (bmin <= amax))
+#define EQUAL_CMP(amin, amax, bmin, bmax) ((amin != bmin) || (amax != bmax))
+
+#define FCMP(A, B) ((int)(A) - (int)(B))
+#define p_inc(A, B, X) {A += X; B += X;}
+
+#define RT_CMP(nextflag) \
+ if (nextflag & MBR_INTERSECT) \
+ { \
+ if (INTERSECT_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_CONTAIN) \
+ { \
+ if (CONTAIN_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_WITHIN) \
+ { \
+ if (WITHIN_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_EQUAL) \
+ { \
+ if (EQUAL_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_DISJOINT) \
+ { \
+ if (DISJOINT_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ }\
+ else /* if unknown comparison operator */ \
+ { \
+ DBUG_ASSERT(0); \
+ }
+
+#define RT_CMP_KORR(type, korr_func, len, nextflag) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin= korr_func(a); \
+ bmin= korr_func(b); \
+ amax= korr_func(a+len); \
+ bmax= korr_func(b+len); \
+ RT_CMP(nextflag); \
+}
+
+#define RT_CMP_GET(type, get_func, len, nextflag) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ RT_CMP(nextflag); \
+}
+
+/*
+ Compares two keys a and b depending on nextflag
+ nextflag can contain these flags:
+ MBR_INTERSECT(a,b) a overlaps b
+ MBR_CONTAIN(a,b) a contains b
+ MBR_DISJOINT(a,b) a disjoint b
+ MBR_WITHIN(a,b) a within b
+ MBR_EQUAL(a,b) All coordinates of MBRs are equal
+ MBR_DATA(a,b) Data reference is the same
+ Returns 0 on success.
+*/
+
+int maria_rtree_key_cmp(HA_KEYSEG *keyseg, uchar *b, uchar *a, uint key_length,
+ uint nextflag)
+{
+ for (; (int) key_length > 0; keyseg += 2 )
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_CMP_KORR(int8, mi_sint1korr, 1, nextflag);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_CMP_KORR(uint8, mi_uint1korr, 1, nextflag);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_CMP_KORR(int16, mi_sint2korr, 2, nextflag);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_CMP_KORR(uint16, mi_uint2korr, 2, nextflag);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_CMP_KORR(int32, mi_sint3korr, 3, nextflag);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_CMP_KORR(uint32, mi_uint3korr, 3, nextflag);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_CMP_KORR(int32, mi_sint4korr, 4, nextflag);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_CMP_KORR(uint32, mi_uint4korr, 4, nextflag);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_CMP_KORR(longlong, mi_sint8korr, 8, nextflag)
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_CMP_KORR(ulonglong, mi_uint8korr, 8, nextflag)
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ /* The following should be safe, even if we compare doubles */
+ RT_CMP_GET(float, mi_float4get, 4, nextflag);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_CMP_GET(double, mi_float8get, 8, nextflag);
+ break;
+ case HA_KEYTYPE_END:
+ goto end;
+ default:
+ return 1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+
+end:
+ if (nextflag & MBR_DATA)
+ {
+ uchar *end= a + keyseg->length;
+ do
+ {
+ if (*a++ != *b++)
+ return FCMP(a[-1], b[-1]);
+ } while (a != end);
+ }
+ return 0;
+}
+
+#define RT_VOL_KORR(type, korr_func, len, cast) \
+{ \
+ type amin, amax; \
+ amin= korr_func(a); \
+ amax= korr_func(a+len); \
+ res *= (cast(amax) - cast(amin)); \
+}
+
+#define RT_VOL_GET(type, get_func, len, cast) \
+{ \
+ type amin, amax; \
+ get_func(amin, a); \
+ get_func(amax, a+len); \
+ res *= (cast(amax) - cast(amin)); \
+}
+
+/*
+ Calculates rectangle volume
+*/
+double maria_rtree_rect_volume(HA_KEYSEG *keyseg, uchar *a, uint key_length)
+{
+ double res= 1;
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_VOL_KORR(int8, mi_sint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_VOL_KORR(uint8, mi_uint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_VOL_KORR(int16, mi_sint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_VOL_KORR(uint16, mi_uint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_VOL_KORR(int32, mi_sint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_VOL_KORR(uint32, mi_uint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_VOL_KORR(int32, mi_sint4korr, 4, (double));
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_VOL_KORR(uint32, mi_uint4korr, 4, (double));
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_VOL_KORR(longlong, mi_sint8korr, 8, (double));
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_VOL_KORR(longlong, mi_sint8korr, 8, ulonglong2double);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_VOL_GET(float, mi_float4get, 4, (double));
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_VOL_GET(double, mi_float8get, 8, (double));
+ break;
+ case HA_KEYTYPE_END:
+ key_length= 0;
+ break;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ }
+ return res;
+}
+
+#define RT_D_MBR_KORR(type, korr_func, len, cast) \
+{ \
+ type amin, amax; \
+ amin= korr_func(a); \
+ amax= korr_func(a+len); \
+ *res++= cast(amin); \
+ *res++= cast(amax); \
+}
+
+#define RT_D_MBR_GET(type, get_func, len, cast) \
+{ \
+ type amin, amax; \
+ get_func(amin, a); \
+ get_func(amax, a+len); \
+ *res++= cast(amin); \
+ *res++= cast(amax); \
+}
+
+
+/*
+ Creates an MBR as an array of doubles.
+*/
+
+int maria_rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res)
+{
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_D_MBR_KORR(int8, mi_sint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_D_MBR_KORR(uint8, mi_uint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_D_MBR_KORR(int16, mi_sint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_D_MBR_KORR(uint16, mi_uint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_D_MBR_KORR(int32, mi_sint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_D_MBR_KORR(uint32, mi_uint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_D_MBR_KORR(int32, mi_sint4korr, 4, (double));
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_D_MBR_KORR(uint32, mi_uint4korr, 4, (double));
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_D_MBR_KORR(longlong, mi_sint8korr, 8, (double));
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_D_MBR_KORR(longlong, mi_sint8korr, 8, ulonglong2double);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_D_MBR_GET(float, mi_float4get, 4, (double));
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_D_MBR_GET(double, mi_float8get, 8, (double));
+ break;
+ case HA_KEYTYPE_END:
+ key_length= 0;
+ break;
+ default:
+ return 1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ }
+ return 0;
+}
+
+#define RT_COMB_KORR(type, korr_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin= korr_func(a); \
+ bmin= korr_func(b); \
+ amax= korr_func(a+len); \
+ bmax= korr_func(b+len); \
+ amin= min(amin, bmin); \
+ amax= max(amax, bmax); \
+ store_func(c, amin); \
+ store_func(c+len, amax); \
+}
+
+#define RT_COMB_GET(type, get_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ amin= min(amin, bmin); \
+ amax= max(amax, bmax); \
+ store_func(c, amin); \
+ store_func(c+len, amax); \
+}
+
+/*
+ Creates common minimal bounding rectungle
+ for two input rectagnles a and b
+ Result is written to c
+*/
+
+int maria_rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c,
+ uint key_length)
+{
+ for ( ; (int) key_length > 0 ; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_COMB_KORR(int8, mi_sint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_COMB_KORR(uint8, mi_uint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_COMB_KORR(int16, mi_sint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_COMB_KORR(uint16, mi_uint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_COMB_KORR(int32, mi_sint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_COMB_KORR(uint32, mi_uint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_COMB_KORR(int32, mi_sint4korr, mi_int4store, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_COMB_KORR(uint32, mi_uint4korr, mi_int4store, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_COMB_KORR(longlong, mi_sint8korr, mi_int8store, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_COMB_KORR(ulonglong, mi_uint8korr, mi_int8store, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_COMB_GET(float, mi_float4get, mi_float4store, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_COMB_GET(double, mi_float8get, mi_float8store, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return 0;
+ default:
+ return 1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ c+= keyseg_length;
+ }
+ return 0;
+}
+
+
+#define RT_OVL_AREA_KORR(type, korr_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin= korr_func(a); \
+ bmin= korr_func(b); \
+ amax= korr_func(a+len); \
+ bmax= korr_func(b+len); \
+ amin= max(amin, bmin); \
+ amax= min(amax, bmax); \
+ if (amin >= amax) \
+ return 0; \
+ res *= amax - amin; \
+}
+
+#define RT_OVL_AREA_GET(type, get_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ amin= max(amin, bmin); \
+ amax= min(amax, bmax); \
+ if (amin >= amax) \
+ return 0; \
+ res *= amax - amin; \
+}
+
+/*
+Calculates overlapping area of two MBRs a & b
+*/
+double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
+ uint key_length)
+{
+ double res= 1;
+ for (; (int) key_length > 0 ; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_OVL_AREA_KORR(int8, mi_sint1korr, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_OVL_AREA_KORR(uint8, mi_uint1korr, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_OVL_AREA_KORR(int16, mi_sint2korr, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_OVL_AREA_KORR(uint16, mi_uint2korr, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_OVL_AREA_KORR(int32, mi_sint3korr, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_OVL_AREA_KORR(uint32, mi_uint3korr, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_OVL_AREA_KORR(int32, mi_sint4korr, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_OVL_AREA_KORR(uint32, mi_uint4korr, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_OVL_AREA_GET(float, mi_float4get, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_OVL_AREA_GET(double, mi_float8get, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return res;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+ return res;
+}
+
+#define RT_AREA_INC_KORR(type, korr_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin= korr_func(a); \
+ bmin= korr_func(b); \
+ amax= korr_func(a+len); \
+ bmax= korr_func(b+len); \
+ a_area *= (((double)amax) - ((double)amin)); \
+ loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+#define RT_AREA_INC_GET(type, get_func, len)\
+{\
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ a_area *= (((double)amax) - ((double)amin)); \
+ loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+/*
+ Calculates MBR_AREA(a+b) - MBR_AREA(a)
+*/
+
+double maria_rtree_area_increase(HA_KEYSEG *keyseg, uchar *a, uchar *b,
+ uint key_length, double *ab_area)
+{
+ double a_area= 1.0;
+ double loc_ab_area= 1.0;
+
+ *ab_area= 1.0;
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+
+ if (keyseg->null_bit) /* Handle NULL part */
+ return -1;
+
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_AREA_INC_KORR(int8, mi_sint1korr, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_AREA_INC_KORR(uint8, mi_uint1korr, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_AREA_INC_KORR(int16, mi_sint2korr, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_AREA_INC_KORR(uint16, mi_uint2korr, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_AREA_INC_KORR(int32, mi_sint3korr, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_AREA_INC_KORR(int32, mi_uint3korr, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_AREA_INC_KORR(int32, mi_sint4korr, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_AREA_INC_KORR(uint32, mi_uint4korr, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_AREA_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_AREA_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_AREA_INC_GET(float, mi_float4get, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_AREA_INC_GET(double, mi_float8get, 8);
+ break;
+ case HA_KEYTYPE_END:
+ goto safe_end;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+safe_end:
+ *ab_area= loc_ab_area;
+ return loc_ab_area - a_area;
+}
+
+#define RT_PERIM_INC_KORR(type, korr_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin= korr_func(a); \
+ bmin= korr_func(b); \
+ amax= korr_func(a+len); \
+ bmax= korr_func(b+len); \
+ a_perim+= (((double)amax) - ((double)amin)); \
+ *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+#define RT_PERIM_INC_GET(type, get_func, len)\
+{\
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ a_perim+= (((double)amax) - ((double)amin)); \
+ *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+/*
+Calculates MBR_PERIMETER(a+b) - MBR_PERIMETER(a)
+*/
+double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b,
+ uint key_length, double *ab_perim)
+{
+ double a_perim= 0.0;
+
+ *ab_perim= 0.0;
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+
+ if (keyseg->null_bit) /* Handle NULL part */
+ return -1;
+
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_PERIM_INC_KORR(int8, mi_sint1korr, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_PERIM_INC_KORR(uint8, mi_uint1korr, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_PERIM_INC_KORR(int16, mi_sint2korr, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_PERIM_INC_KORR(uint16, mi_uint2korr, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_PERIM_INC_KORR(int32, mi_sint3korr, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_PERIM_INC_KORR(int32, mi_uint3korr, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_PERIM_INC_KORR(int32, mi_sint4korr, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_PERIM_INC_KORR(uint32, mi_uint4korr, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_PERIM_INC_GET(float, mi_float4get, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_PERIM_INC_GET(double, mi_float8get, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return *ab_perim - a_perim;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+ return *ab_perim - a_perim;
+}
+
+
+#define RT_PAGE_MBR_KORR(share, type, korr_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin= korr_func(k + inc); \
+ amax= korr_func(k + inc + len); \
+ k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag); \
+ for (; k < last; k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag)) \
+{ \
+ bmin= korr_func(k + inc); \
+ bmax= korr_func(k + inc + len); \
+ if (amin > bmin) \
+ amin= bmin; \
+ if (amax < bmax) \
+ amax= bmax; \
+} \
+ store_func(c, amin); \
+ c += len; \
+ store_func(c, amax); \
+ c += len; \
+ inc += 2 * len; \
+}
+
+#define RT_PAGE_MBR_GET(share, type, get_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, k + inc); \
+ get_func(amax, k + inc + len); \
+ k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag); \
+ for (; k < last; k= rt_PAGE_NEXT_KEY(share, k, k_len, nod_flag)) \
+{ \
+ get_func(bmin, k + inc); \
+ get_func(bmax, k + inc + len); \
+ if (amin > bmin) \
+ amin= bmin; \
+ if (amax < bmax) \
+ amax= bmax; \
+} \
+ store_func(c, amin); \
+ c += len; \
+ store_func(c, amax); \
+ c += len; \
+ inc += 2 * len; \
+}
+
+/*
+ Calculates key page total MBR= MBR(key1) + MBR(key2) + ...
+*/
+int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, uchar *page_buf,
+ uchar *c, uint key_length)
+{
+ MARIA_SHARE *share= info->s;
+ uint inc= 0;
+ uint k_len= key_length;
+ uint nod_flag= _ma_test_if_nod(share, page_buf);
+ uchar *k;
+ uchar *last= rt_PAGE_END(share, page_buf);
+
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ key_length -= keyseg->length * 2;
+
+ /* Handle NULL part */
+ if (keyseg->null_bit)
+ {
+ return 1;
+ }
+
+ k= rt_PAGE_FIRST_KEY(share, page_buf, nod_flag);
+
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_PAGE_MBR_KORR(share, int8, mi_sint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_PAGE_MBR_KORR(share, uint8, mi_uint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_PAGE_MBR_KORR(share, int16, mi_sint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_PAGE_MBR_KORR(share, uint16, mi_uint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_PAGE_MBR_KORR(share, int32, mi_sint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_PAGE_MBR_KORR(share, uint32, mi_uint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_PAGE_MBR_KORR(share, int32, mi_sint4korr, mi_int4store, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_PAGE_MBR_KORR(share, uint32, mi_uint4korr, mi_int4store, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_PAGE_MBR_KORR(share, longlong, mi_sint8korr, mi_int8store, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_PAGE_MBR_KORR(share, ulonglong, mi_uint8korr, mi_int8store, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_PAGE_MBR_GET(share, float, mi_float4get, mi_float4store, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_PAGE_MBR_GET(share, double, mi_float8get, mi_float8store, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return 0;
+ default:
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_mbr.h b/storage/maria/ma_rt_mbr.h
new file mode 100644
index 00000000000..ad855518e62
--- /dev/null
+++ b/storage/maria/ma_rt_mbr.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _rt_mbr_h
+#define _rt_mbr_h
+
+#ifdef HAVE_RTREE_KEYS
+
+int maria_rtree_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b, uint key_length,
+ uint nextflag);
+int maria_rtree_combine_rect(HA_KEYSEG *keyseg,uchar *, uchar *, uchar*,
+ uint key_length);
+double maria_rtree_rect_volume(HA_KEYSEG *keyseg, uchar*, uint key_length);
+int maria_rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length,
+ double *res);
+double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar *a, uchar *b,
+ uint key_length);
+double maria_rtree_area_increase(HA_KEYSEG *keyseg, uchar *a, uchar *b,
+ uint key_length, double *ab_area);
+double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b,
+ uint key_length, double *ab_perim);
+int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, uchar *page_buf,
+ uchar* c, uint key_length);
+#endif /*HAVE_RTREE_KEYS*/
+#endif /* _rt_mbr_h */
diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c
new file mode 100644
index 00000000000..c6228cd65f3
--- /dev/null
+++ b/storage/maria/ma_rt_split.c
@@ -0,0 +1,374 @@
+/* Copyright (C) 2006 MySQL AB & Alexey Botchkov & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+#include "ma_rt_key.h"
+#include "ma_rt_mbr.h"
+
+typedef struct
+{
+ double square;
+ int n_node;
+ uchar *key;
+ double *coords;
+} SplitStruct;
+
+inline static double *reserve_coords(double **d_buffer, int n_dim)
+{
+ double *coords= *d_buffer;
+ (*d_buffer)+= n_dim * 2;
+ return coords;
+}
+
+static void mbr_join(double *a, const double *b, int n_dim)
+{
+ double *end= a + n_dim * 2;
+ do
+ {
+ if (a[0] > b[0])
+ a[0]= b[0];
+
+ if (a[1] < b[1])
+ a[1]= b[1];
+
+ a+= 2;
+ b+= 2;
+ } while (a != end);
+}
+
+/*
+Counts the square of mbr which is a join of a and b
+*/
+static double mbr_join_square(const double *a, const double *b, int n_dim)
+{
+ const double *end= a + n_dim * 2;
+ double square= 1.0;
+ do
+ {
+ square *=
+ ((a[1] < b[1]) ? b[1] : a[1]) - ((a[0] > b[0]) ? b[0] : a[0]);
+
+ a+= 2;
+ b+= 2;
+ } while (a != end);
+
+ return square;
+}
+
+static double count_square(const double *a, int n_dim)
+{
+ const double *end= a + n_dim * 2;
+ double square= 1.0;
+ do
+ {
+ square *= a[1] - a[0];
+ a+= 2;
+ } while (a != end);
+ return square;
+}
+
+inline static void copy_coords(double *dst, const double *src, int n_dim)
+{
+ memcpy(dst, src, sizeof(double) * (n_dim * 2));
+}
+
+/*
+Select two nodes to collect group upon
+*/
+static void pick_seeds(SplitStruct *node, int n_entries,
+ SplitStruct **seed_a, SplitStruct **seed_b, int n_dim)
+{
+ SplitStruct *cur1;
+ SplitStruct *lim1= node + (n_entries - 1);
+ SplitStruct *cur2;
+ SplitStruct *lim2= node + n_entries;
+
+ double max_d= -DBL_MAX;
+ double d;
+
+ for (cur1= node; cur1 < lim1; cur1++)
+ {
+ for (cur2=cur1 + 1; cur2 < lim2; cur2++)
+ {
+
+ d= mbr_join_square(cur1->coords, cur2->coords, n_dim) - cur1->square -
+ cur2->square;
+ if (d > max_d)
+ {
+ max_d= d;
+ *seed_a= cur1;
+ *seed_b= cur2;
+ }
+ }
+ }
+}
+
+/*
+Select next node and group where to add
+*/
+static void pick_next(SplitStruct *node, int n_entries, double *g1, double *g2,
+ SplitStruct **choice, int *n_group, int n_dim)
+{
+ SplitStruct *cur= node;
+ SplitStruct *end= node + n_entries;
+
+ double max_diff= -DBL_MAX;
+
+ for (; cur < end; cur++)
+ {
+ double diff;
+ double abs_diff;
+
+ if (cur->n_node)
+ {
+ continue;
+ }
+
+ diff= mbr_join_square(g1, cur->coords, n_dim) -
+ mbr_join_square(g2, cur->coords, n_dim);
+
+ abs_diff= fabs(diff);
+ if (abs_diff > max_diff)
+ {
+ max_diff= abs_diff;
+ *n_group= 1 + (diff > 0);
+ *choice= cur;
+ }
+ }
+}
+
+/*
+Mark not-in-group entries as n_group
+*/
+static void mark_all_entries(SplitStruct *node, int n_entries, int n_group)
+{
+ SplitStruct *cur= node;
+ SplitStruct *end= node + n_entries;
+
+ for (; cur < end; cur++)
+ {
+ if (cur->n_node)
+ {
+ continue;
+ }
+ cur->n_node= n_group;
+ }
+}
+
+static int split_maria_rtree_node(SplitStruct *node, int n_entries,
+ int all_size, /* Total key's size */
+ int key_size,
+ int min_size, /* Minimal group size */
+ int size1, int size2 /* initial group sizes */,
+ double **d_buffer, int n_dim)
+{
+ SplitStruct *cur;
+ SplitStruct *a;
+ SplitStruct *b;
+ double *g1= reserve_coords(d_buffer, n_dim);
+ double *g2= reserve_coords(d_buffer, n_dim);
+ SplitStruct *next;
+ int next_node;
+ int i;
+ SplitStruct *end= node + n_entries;
+ LINT_INIT(a);
+ LINT_INIT(b);
+ LINT_INIT(next);
+ LINT_INIT(next_node);
+
+ if (all_size < min_size * 2)
+ {
+ return 1;
+ }
+
+ cur= node;
+ for (; cur < end; cur++)
+ {
+ cur->square= count_square(cur->coords, n_dim);
+ cur->n_node= 0;
+ }
+
+ pick_seeds(node, n_entries, &a, &b, n_dim);
+ a->n_node= 1;
+ b->n_node= 2;
+
+
+ copy_coords(g1, a->coords, n_dim);
+ size1+= key_size;
+ copy_coords(g2, b->coords, n_dim);
+ size2+= key_size;
+
+
+ for (i=n_entries - 2; i>0; --i)
+ {
+ if (all_size - (size2 + key_size) < min_size) /* Can't write into group 2 */
+ {
+ mark_all_entries(node, n_entries, 1);
+ break;
+ }
+
+ if (all_size - (size1 + key_size) < min_size) /* Can't write into group 1 */
+ {
+ mark_all_entries(node, n_entries, 2);
+ break;
+ }
+
+ pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim);
+ if (next_node == 1)
+ {
+ size1+= key_size;
+ mbr_join(g1, next->coords, n_dim);
+ }
+ else
+ {
+ size2+= key_size;
+ mbr_join(g2, next->coords, n_dim);
+ }
+ next->n_node= next_node;
+ }
+
+ return 0;
+}
+
+int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page, uchar *key,
+ uint key_length, my_off_t *new_page_offs)
+{
+ MARIA_SHARE *share= info->s;
+ int n1, n2; /* Number of items in groups */
+ SplitStruct *task;
+ SplitStruct *cur;
+ SplitStruct *stop;
+ double *coord_buf;
+ double *next_coord;
+ double *old_coord;
+ int n_dim;
+ uchar *source_cur, *cur1, *cur2;
+ uchar *new_page;
+ int err_code= 0;
+ uint nod_flag= _ma_test_if_nod(share, page);
+ uint full_length= key_length + (nod_flag ? nod_flag :
+ share->base.rec_reflength);
+ int max_keys= ((_ma_get_page_used(share, page) - share->keypage_header) /
+ (full_length));
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ DBUG_ENTER("maria_rtree_split_page");
+ DBUG_PRINT("rtree", ("splitting block"));
+
+ n_dim= keyinfo->keysegs / 2;
+
+ if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) *
+ (max_keys + 1 + 4) +
+ sizeof(SplitStruct) * (max_keys + 1))))
+ DBUG_RETURN(-1); /* purecov: inspected */
+
+ task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4));
+
+ next_coord= coord_buf;
+
+ stop= task + max_keys;
+ source_cur= rt_PAGE_FIRST_KEY(share, page, nod_flag);
+
+ for (cur= task;
+ cur < stop;
+ cur++, source_cur= rt_PAGE_NEXT_KEY(share, source_cur, key_length,
+ nod_flag))
+ {
+ cur->coords= reserve_coords(&next_coord, n_dim);
+ cur->key= source_cur;
+ maria_rtree_d_mbr(keyinfo->seg, source_cur, key_length, cur->coords);
+ }
+
+ cur->coords= reserve_coords(&next_coord, n_dim);
+ maria_rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords);
+ cur->key= key;
+
+ old_coord= next_coord;
+
+ if (split_maria_rtree_node(task, max_keys + 1,
+ _ma_get_page_used(share, page) + full_length + 2,
+ full_length,
+ rt_PAGE_MIN_SIZE(keyinfo->block_length),
+ 2, 2, &next_coord, n_dim))
+ {
+ err_code= 1;
+ goto split_err;
+ }
+
+ if (!(new_page= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ {
+ err_code= -1;
+ goto split_err;
+ }
+
+ stop= task + (max_keys + 1);
+ cur1= rt_PAGE_FIRST_KEY(share, page, nod_flag);
+ cur2= rt_PAGE_FIRST_KEY(share, new_page, nod_flag);
+
+ n1= n2= 0;
+ for (cur= task; cur < stop; cur++)
+ {
+ uchar *to;
+ if (cur->n_node == 1)
+ {
+ to= cur1;
+ cur1= rt_PAGE_NEXT_KEY(share, cur1, key_length, nod_flag);
+ n1++;
+ }
+ else
+ {
+ to= cur2;
+ cur2= rt_PAGE_NEXT_KEY(share, cur2, key_length, nod_flag);
+ n2++;
+ }
+ if (to != cur->key)
+ memcpy(to - nod_flag, cur->key - nod_flag, full_length);
+ }
+
+ bzero(new_page, share->keypage_header);
+ if (nod_flag)
+ _ma_store_keypage_flag(share, new_page, KEYPAGE_FLAG_ISNOD);
+ _ma_store_keynr(share, new_page, keyinfo->key_nr);
+ _ma_store_page_used(share, page, share->keypage_header + n1 * full_length)
+ _ma_store_page_used(share, new_page, share->keypage_header +
+ n2 * full_length);
+
+ if ((*new_page_offs= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
+ HA_OFFSET_ERROR)
+ err_code= -1;
+ else
+ err_code= _ma_write_keypage(info, keyinfo, *new_page_offs,
+ page_link->write_lock,
+ DFLT_INIT_HITS, new_page);
+ DBUG_PRINT("rtree", ("split new block: %lu", (ulong) *new_page_offs));
+
+ my_afree((uchar*)new_page);
+
+split_err:
+ /**
+ @todo the cast below is useless (coord_buf is uchar*); at the moment we
+ changed all "byte" to "uchar", some casts became useless and should be
+ removed.
+ */
+ my_afree((uchar*) coord_buf);
+ DBUG_RETURN(err_code);
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c
new file mode 100644
index 00000000000..e41ebdce500
--- /dev/null
+++ b/storage/maria/ma_rt_test.c
@@ -0,0 +1,475 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Testing of the basic functions of a MARIA rtree table */
+/* Written by Alex Barkov who has a shared copyright to this code */
+
+
+#include "maria.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+
+#define MAX_REC_LENGTH 1024
+#define ndims 2
+#define KEYALG HA_KEY_ALG_RTREE
+
+static int read_with_pos(MARIA_HA * file, int silent);
+static void create_record(char *record,uint rownr);
+static void create_record1(char *record,uint rownr);
+static void print_record(char * record,my_off_t offs,const char * tail);
+static int run_test(const char *filename);
+
+static double rt_data[]=
+{
+ /*1*/ 0,10,0,10,
+ /*2*/ 5,15,0,10,
+ /*3*/ 0,10,5,15,
+ /*4*/ 10,20,10,20,
+ /*5*/ 0,10,0,10,
+ /*6*/ 5,15,0,10,
+ /*7*/ 0,10,5,15,
+ /*8*/ 10,20,10,20,
+ /*9*/ 0,10,0,10,
+ /*10*/ 5,15,0,10,
+ /*11*/ 0,10,5,15,
+ /*12*/ 10,20,10,20,
+ /*13*/ 0,10,0,10,
+ /*14*/ 5,15,0,10,
+ /*15*/ 0,10,5,15,
+ /*16*/ 10,20,10,20,
+ /*17*/ 5,15,0,10,
+ /*18*/ 0,10,5,15,
+ /*19*/ 10,20,10,20,
+ /*20*/ 0,10,0,10,
+
+ /*1*/ 100,110,0,10,
+ /*2*/ 105,115,0,10,
+ /*3*/ 100,110,5,15,
+ /*4*/ 110,120,10,20,
+ /*5*/ 100,110,0,10,
+ /*6*/ 105,115,0,10,
+ /*7*/ 100,110,5,15,
+ /*8*/ 110,120,10,20,
+ /*9*/ 100,110,0,10,
+ /*10*/ 105,115,0,10,
+ /*11*/ 100,110,5,15,
+ /*12*/ 110,120,10,20,
+ /*13*/ 100,110,0,10,
+ /*14*/ 105,115,0,10,
+ /*15*/ 100,110,5,15,
+ /*16*/ 110,120,10,20,
+ /*17*/ 105,115,0,10,
+ /*18*/ 100,110,5,15,
+ /*19*/ 110,120,10,20,
+ /*20*/ 100,110,0,10,
+ -1
+};
+
+int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused)))
+{
+ MY_INIT(argv[0]);
+ maria_init();
+ exit(run_test("rt_test"));
+}
+
+
+static int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ MARIA_UNIQUEDEF uniquedef;
+ MARIA_CREATE_INFO create_info;
+ MARIA_COLUMNDEF recinfo[20];
+ MARIA_KEYDEF keyinfo[20];
+ HA_KEYSEG keyseg[20];
+ key_range range;
+
+ int silent=0;
+ int opt_unique=0;
+ int create_flag=0;
+ int key_type=HA_KEYTYPE_DOUBLE;
+ int key_length=8;
+ int null_fields=0;
+ int nrecords=sizeof(rt_data)/(sizeof(double)*4);/* 3000;*/
+ int rec_length=0;
+ int uniques=0;
+ int i;
+ int error;
+ int row_count=0;
+ char record[MAX_REC_LENGTH];
+ char read_record[MAX_REC_LENGTH];
+ int upd= 10;
+ ha_rows hrows;
+
+ /* Define a column for NULLs and DEL markers*/
+
+ recinfo[0].type=FIELD_NORMAL;
+ recinfo[0].length=1; /* For NULL bits */
+ rec_length=1;
+
+ /* Define 2*ndims columns for coordinates*/
+
+ for (i=1; i<=2*ndims ;i++)
+ {
+ recinfo[i].type=FIELD_NORMAL;
+ recinfo[i].length=key_length;
+ rec_length+=key_length;
+ }
+
+ /* Define a key with 2*ndims segments */
+
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=2*ndims;
+ keyinfo[0].flag=0;
+ keyinfo[0].key_alg=KEYALG;
+
+ for (i=0; i<2*ndims; i++)
+ {
+ keyinfo[0].seg[i].type= key_type;
+ keyinfo[0].seg[i].flag=0; /* Things like HA_REVERSE_SORT */
+ keyinfo[0].seg[i].start= (key_length*i)+1;
+ keyinfo[0].seg[i].length=key_length;
+ keyinfo[0].seg[i].null_bit= null_fields ? 2 : 0;
+ keyinfo[0].seg[i].null_pos=0;
+ keyinfo[0].seg[i].language=default_charset_info->number;
+ }
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=10000000;
+
+ if (maria_create(filename,
+ DYNAMIC_RECORD,
+ 1, /* keys */
+ keyinfo,
+ 1+2*ndims+opt_unique, /* columns */
+ recinfo,uniques,&uniquedef,&create_info,create_flag))
+ goto err;
+
+ if (!silent)
+ printf("- Open isam-file\n");
+
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ for (i=0; i<nrecords; i++ )
+ {
+ create_record(record,i);
+ error=maria_write(file,record);
+ print_record(record,maria_position(file),"\n");
+ if (!error)
+ {
+ row_count++;
+ }
+ else
+ {
+ printf("maria_write: %d\n", error);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Reading rows with key\n");
+
+ for (i=0 ; i < nrecords ; i++)
+ {
+ my_errno=0;
+ create_record(record,i);
+
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_EQUAL);
+
+ if (error && error!=HA_ERR_KEY_NOT_FOUND)
+ {
+ printf(" maria_rkey: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ if (error == HA_ERR_KEY_NOT_FOUND)
+ {
+ print_record(record,maria_position(file)," NOT FOUND\n");
+ continue;
+ }
+ print_record(read_record,maria_position(file),"\n");
+ }
+
+ if (!silent)
+ printf("- Deleting rows\n");
+ for (i=0; i < nrecords/4; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"\n");
+
+ error=maria_delete(file,read_record);
+ if (error)
+ {
+ printf("pos: %2d maria_delete: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if (!silent)
+ printf("- Updating rows with position\n");
+ for (i=0; i < (nrecords - nrecords/4) ; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"");
+ create_record(record,i+nrecords*upd);
+ printf("\t-> ");
+ print_record(record,maria_position(file),"\n");
+ error=maria_update(file,read_record,record);
+ if (error)
+ {
+ printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Test maria_rkey then a sequence of maria_rnext_same\n");
+
+ create_record(record, nrecords*4/5);
+ print_record(record,0," search for\n");
+
+ if ((error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_INTERSECT)))
+ {
+ printf("maria_rkey: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rkey\n");
+ row_count=1;
+
+ for (;;)
+ {
+ if ((error=maria_rnext_same(file,read_record)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext_same\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_rfirst then a sequence of maria_rnext\n");
+
+ error=maria_rfirst(file,read_record,0);
+ if (error)
+ {
+ printf("maria_rfirst: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ row_count=1;
+ print_record(read_record,maria_position(file)," maria_frirst\n");
+
+ for (i=0;i<nrecords;i++)
+ {
+ if ((error=maria_rnext(file,read_record,0)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_records_in_range()\n");
+
+ create_record1(record, nrecords*4/5);
+ print_record(record,0,"\n");
+
+ range.key= record+1;
+ range.length= 1000; /* Big enough */
+ range.flag= HA_READ_MBR_INTERSECT;
+ hrows= maria_records_in_range(file,0, &range, (key_range*) 0);
+ printf(" %ld rows\n", (long) hrows);
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return 0;
+
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+}
+
+
+
+static int read_with_pos (MARIA_HA * file,int silent)
+{
+ int error;
+ int i;
+ char read_record[MAX_REC_LENGTH];
+
+ if (!silent)
+ printf("- Reading rows with position\n");
+ for (i=0;;i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ return error;
+ }
+ print_record(read_record,maria_position(file),"\n");
+ }
+ return 0;
+}
+
+
+#ifdef NOT_USED
+static void bprint_record(char * record,
+ my_off_t offs __attribute__((unused)),
+ const char * tail)
+{
+ int i;
+ char * pos;
+ i=(unsigned char)record[0];
+ printf("%02X ",i);
+
+ for( pos=record+1, i=0; i<32; i++,pos++){
+ int b=(unsigned char)*pos;
+ printf("%02X",b);
+ }
+ printf("%s",tail);
+}
+#endif
+
+
+static void print_record(char * record,
+ my_off_t offs __attribute__((unused)),
+ const char * tail)
+{
+ int i;
+ char * pos;
+ double c;
+
+ printf(" rec=(%d)",(unsigned char)record[0]);
+ for ( pos=record+1, i=0; i<2*ndims; i++)
+ {
+ memcpy(&c,pos,sizeof(c));
+ float8get(c,pos);
+ printf(" %.14g ",c);
+ pos+=sizeof(c);
+ }
+ printf("pos=%ld",(long int)offs);
+ printf("%s",tail);
+}
+
+
+
+static void create_record1(char *record,uint rownr)
+{
+ int i;
+ char * pos;
+ double c=rownr+10;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ record[0]=0x01; /* DEL marker */
+
+ for ( pos=record+1, i=0; i<2*ndims; i++)
+ {
+ memcpy(pos,&c,sizeof(c));
+ float8store(pos,c);
+ pos+=sizeof(c);
+ }
+}
+
+#ifdef NOT_USED
+
+static void create_record0(char *record,uint rownr)
+{
+ int i;
+ char * pos;
+ double c=rownr+10;
+ double c0=0;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ record[0]=0x01; /* DEL marker */
+
+ for ( pos=record+1, i=0; i<ndims; i++)
+ {
+ memcpy(pos,&c0,sizeof(c0));
+ float8store(pos,c0);
+ pos+=sizeof(c0);
+ memcpy(pos,&c,sizeof(c));
+ float8store(pos,c);
+ pos+=sizeof(c);
+ }
+}
+
+#endif
+
+static void create_record(char *record,uint rownr)
+{
+ int i;
+ char *pos;
+ double *data= rt_data+rownr*4;
+ record[0]=0x01; /* DEL marker */
+ for ( pos=record+1, i=0; i<ndims*2; i++)
+ {
+ float8store(pos,data[i]);
+ pos+=8;
+ }
+}
+
+#else
+int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused)))
+{
+ exit(0);
+}
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_scan.c b/storage/maria/ma_scan.c
new file mode 100644
index 00000000000..48e9e3400ce
--- /dev/null
+++ b/storage/maria/ma_scan.c
@@ -0,0 +1,73 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read through all rows sequntially */
+
+#include "maria_def.h"
+
+int maria_scan_init(register MARIA_HA *info)
+{
+ DBUG_ENTER("maria_scan_init");
+
+ info->cur_row.nextpos= info->s->pack.header_length; /* Read first record */
+ info->lastinx= -1; /* Can't forward or backward */
+ if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(my_errno);
+
+ if ((*info->s->scan_init)(info))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+}
+
+/*
+ Read a row based on position.
+
+ SYNOPSIS
+ maria_scan()
+ info Maria handler
+ record Read data here
+
+ RETURN
+ 0 ok
+ HA_ERR_END_OF_FILE End of file
+ # Error code
+*/
+
+int maria_scan(MARIA_HA *info, uchar *record)
+{
+ DBUG_ENTER("maria_scan");
+ /* Init all but update-flag */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ DBUG_RETURN((*info->s->scan)(info, record, info->cur_row.nextpos, 1));
+}
+
+
+void maria_scan_end(MARIA_HA *info)
+{
+ (*info->s->scan_end)(info);
+}
+
+
+int _ma_def_scan_remember_pos(MARIA_HA *info, MARIA_RECORD_POS *lastpos)
+{
+ *lastpos= info->cur_row.lastpos;
+ return 0;
+}
+
+
+void _ma_def_scan_restore_pos(MARIA_HA *info, MARIA_RECORD_POS lastpos)
+{
+ info->cur_row.nextpos= lastpos;
+}
diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c
new file mode 100644
index 00000000000..a76e9dfd63d
--- /dev/null
+++ b/storage/maria/ma_search.c
@@ -0,0 +1,2032 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* key handling functions */
+
+#include "ma_fulltext.h"
+#include "m_ctype.h"
+
+static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page,
+ uchar *key, uchar *keypos,
+ uint *return_key_length);
+
+ /* Check index */
+
+int _ma_check_index(MARIA_HA *info, int inx)
+{
+ if (inx < 0 || ! maria_is_key_active(info->s->state.key_map, inx))
+ {
+ my_errno=HA_ERR_WRONG_INDEX;
+ return -1;
+ }
+ if (info->lastinx != inx) /* Index changed */
+ {
+ info->lastinx = inx;
+ info->page_changed=1;
+ info->update= ((info->update & (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED)) |
+ HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND);
+ }
+ if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache))
+ return(-1);
+ return(inx);
+} /* _ma_check_index */
+
+
+/**
+ @breif Search after row by a key
+
+ @note
+ Position to row is stored in info->lastpos
+
+ @return
+ @retval 0 ok (key found)
+ @retval -1 Not found
+ @retval 1 If one should continue search on higher level
+*/
+
+int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_len, uint nextflag, register my_off_t pos)
+{
+ my_bool last_key;
+ int error,flag;
+ uint nod_flag, used_length;
+ uchar *keypos,*maxpos;
+ uchar lastkey[HA_MAX_KEY_BUFF],*buff;
+ DBUG_ENTER("_ma_search");
+ DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu",
+ (ulong) pos, nextflag, (ulong) info->cur_row.lastpos));
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_len););
+
+ if (pos == HA_OFFSET_ERROR)
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ if (!(nextflag & (SEARCH_SMALLER | SEARCH_BIGGER | SEARCH_LAST)))
+ DBUG_RETURN(-1); /* Not found ; return error */
+ DBUG_RETURN(1); /* Search at upper levels */
+ }
+
+ if (!(buff= _ma_fetch_keypage(info,keyinfo, pos,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->keyread_buff,
+ test(!(nextflag & SEARCH_SAVE_BUFF)), 0)))
+ goto err;
+ DBUG_DUMP("page", buff, _ma_get_page_used(info->s, buff));
+
+ flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag,
+ &keypos,lastkey, &last_key);
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ DBUG_RETURN(-1);
+ _ma_get_used_and_nod(info->s, buff, used_length, nod_flag);
+ maxpos= buff + used_length -1;
+
+ if (flag)
+ {
+ if ((error= _ma_search(info,keyinfo,key,key_len,nextflag,
+ _ma_kpos(nod_flag,keypos))) <= 0)
+ DBUG_RETURN(error);
+
+ if (flag >0)
+ {
+ if (nextflag & (SEARCH_SMALLER | SEARCH_LAST) &&
+ keypos == buff + info->s->keypage_header + nod_flag)
+ DBUG_RETURN(1); /* Bigger than key */
+ }
+ else if (nextflag & SEARCH_BIGGER && keypos >= maxpos)
+ DBUG_RETURN(1); /* Smaller than key */
+ }
+ else
+ {
+ if ((nextflag & SEARCH_FIND) && nod_flag &&
+ ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME ||
+ key_len != USE_WHOLE_KEY))
+ {
+ if ((error= _ma_search(info,keyinfo,key,key_len,SEARCH_FIND,
+ _ma_kpos(nod_flag,keypos))) >= 0 ||
+ my_errno != HA_ERR_KEY_NOT_FOUND)
+ DBUG_RETURN(error);
+ info->last_keypage= HA_OFFSET_ERROR; /* Buffer not in mem */
+ }
+ }
+ if (pos != info->last_keypage)
+ {
+ uchar *old_buff=buff;
+ if (!(buff= _ma_fetch_keypage(info,keyinfo, pos,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,DFLT_INIT_HITS,
+ info->keyread_buff,
+ test(!(nextflag & SEARCH_SAVE_BUFF)), 0)))
+ goto err;
+ keypos=buff+(keypos-old_buff);
+ maxpos=buff+(maxpos-old_buff);
+ }
+
+ if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0)
+ {
+ uint not_used[2];
+ if (_ma_get_prev_key(info,keyinfo, buff, info->lastkey, keypos,
+ &info->lastkey_length))
+ goto err;
+ if (!(nextflag & SEARCH_SMALLER) &&
+ ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, key_len,
+ SEARCH_FIND, not_used))
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */
+ goto err;
+ }
+ }
+ else
+ {
+ info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey);
+ if (!info->lastkey_length)
+ goto err;
+ memcpy(info->lastkey,lastkey,info->lastkey_length);
+ }
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+ /* Save position for a possible read next / previous */
+ info->int_keypos= info->keyread_buff+ (keypos-buff);
+ info->int_maxpos= info->keyread_buff+ (maxpos-buff);
+ info->int_nod_flag=nod_flag;
+ info->int_keytree_version=keyinfo->version;
+ info->last_search_keypage=info->last_keypage;
+ info->page_changed=0;
+ /* Set marker that buffer was used (Marker for mi_search_next()) */
+ info->keyread_buff_used= (info->keyread_buff != buff);
+
+ DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+
+err:
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->page_changed=1;
+ DBUG_RETURN (-1);
+} /* _ma_search */
+
+
+ /* Search after key in page-block */
+ /* If packed key puts smaller or identical key in buff */
+ /* ret_pos point to where find or bigger key starts */
+ /* ARGSUSED */
+
+int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint comp_flag, uchar **ret_pos,
+ uchar *buff __attribute__((unused)), my_bool *last_key)
+{
+ int flag;
+ uint start, mid, end, save_end, totlength, nod_flag, used_length;
+ uint not_used[2];
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_bin_search");
+
+ LINT_INIT(flag);
+ _ma_get_used_and_nod(share, page, used_length, nod_flag);
+
+ totlength= keyinfo->keylength + nod_flag;
+ DBUG_ASSERT(used_length >= share->keypage_header + nod_flag + totlength);
+
+ start=0;
+ mid=1;
+ save_end= end= ((used_length - nod_flag - share->keypage_header) /
+ totlength-1);
+ DBUG_PRINT("test",("page_length: %u end: %u", used_length, end));
+ page+= share->keypage_header + nod_flag;
+
+ while (start != end)
+ {
+ mid= (start+end)/2;
+ if ((flag=ha_key_cmp(keyinfo->seg,(uchar*) page+(uint) mid*totlength,
+ (uchar*) key, key_len, comp_flag, not_used))
+ >= 0)
+ end=mid;
+ else
+ start=mid+1;
+ }
+ if (mid != start)
+ flag=ha_key_cmp(keyinfo->seg, (uchar*) page+(uint) start*totlength,
+ (uchar*) key, key_len, comp_flag, not_used);
+ if (flag < 0)
+ start++; /* point at next, bigger key */
+ *ret_pos= (char*) (page+(uint) start*totlength);
+ *last_key= end == save_end;
+ DBUG_PRINT("exit",("flag: %d keypos: %d",flag,start));
+ DBUG_RETURN(flag);
+} /* _ma_bin_search */
+
+
+/*
+ Locate a packed key in a key page.
+
+ SYNOPSIS
+ _ma_seq_search()
+ info Open table information.
+ keyinfo Key definition information.
+ page Key page (beginning).
+ key Search key.
+ key_len Length to use from search key or USE_WHOLE_KEY
+ comp_flag Search flags like SEARCH_SAME etc.
+ ret_pos RETURN Position in key page behind this key.
+ buff RETURN Copy of previous or identical unpacked key.
+ last_key RETURN If key is last in page.
+
+ DESCRIPTION
+ Used instead of _ma_bin_search() when key is packed.
+ Puts smaller or identical key in buff.
+ Key is searched sequentially.
+
+ RETURN
+ > 0 Key in 'buff' is smaller than search key.
+ 0 Key in 'buff' is identical to search key.
+ < 0 Not found.
+*/
+
+int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint comp_flag, uchar **ret_pos,
+ uchar *buff, my_bool *last_key)
+{
+ MARIA_SHARE *share= info->s;
+ int flag;
+ uint nod_flag, length, used_length, not_used[2];
+ uchar t_buff[HA_MAX_KEY_BUFF], *end;
+ DBUG_ENTER("_ma_seq_search");
+
+ LINT_INIT(flag);
+ LINT_INIT(length);
+
+ _ma_get_used_and_nod(share, page, used_length, nod_flag);
+ end= page + used_length;
+ page+= share->keypage_header + nod_flag;
+ *ret_pos= (uchar*) page;
+ t_buff[0]=0; /* Avoid bugs */
+ while (page < end)
+ {
+ length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff);
+ if (length == 0 || page > end)
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_PRINT("error",
+ ("Found wrong key: length: %u page: 0x%lx end: 0x%lx",
+ length, (long) page, (long) end));
+ DBUG_RETURN(MARIA_FOUND_WRONG_KEY);
+ }
+ if ((flag= ha_key_cmp(keyinfo->seg, (uchar*) t_buff,(uchar*) key,
+ key_len,comp_flag, not_used)) >= 0)
+ break;
+#ifdef EXTRA_DEBUG
+ DBUG_PRINT("loop",("page: 0x%lx key: '%s' flag: %d", (long) page, t_buff,
+ flag));
+#endif
+ memcpy(buff,t_buff,length);
+ *ret_pos=page;
+ }
+ if (flag == 0)
+ memcpy(buff,t_buff,length); /* Result is first key */
+ *last_key= page == end;
+ DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_RETURN(flag);
+} /* _ma_seq_search */
+
+
+int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint nextflag, uchar **ret_pos, uchar *buff,
+ my_bool *last_key)
+{
+ MARIA_SHARE *share= info->s;
+ /*
+ my_flag is raw comparison result to be changed according to
+ SEARCH_NO_FIND,SEARCH_LAST and HA_REVERSE_SORT flags.
+ flag is the value returned by ha_key_cmp and as treated as final
+ */
+ int flag=0, my_flag=-1;
+ uint nod_flag, used_length, length, len, matched, cmplen, kseg_len;
+ uint prefix_len,suffix_len;
+ int key_len_skip, seg_len_pack, key_len_left;
+ uchar *end;
+ uchar *vseg, *saved_vseg, *saved_from;
+ uchar *sort_order= keyinfo->seg->charset->sort_order;
+ uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
+ uchar *saved_to;
+ const uchar *kseg;
+ uint saved_length=0, saved_prefix_len=0;
+ uint length_pack;
+ DBUG_ENTER("_ma_prefix_search");
+
+ LINT_INIT(length);
+ LINT_INIT(prefix_len);
+ LINT_INIT(seg_len_pack);
+ LINT_INIT(saved_from);
+ LINT_INIT(saved_to);
+ LINT_INIT(saved_vseg);
+
+ t_buff[0]=0; /* Avoid bugs */
+ _ma_get_used_and_nod(share, page, used_length, nod_flag);
+ end= page + used_length;
+ page+= share->keypage_header + nod_flag;
+ *ret_pos= page;
+ kseg= key;
+
+ get_key_pack_length(kseg_len, length_pack, kseg);
+ key_len_skip=length_pack+kseg_len;
+ key_len_left=(int) key_len- (int) key_len_skip;
+ /* If key_len is 0, then lenght_pack is 1, then key_len_left is -1. */
+ cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack;
+ DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg));
+
+ /*
+ Keys are compressed the following way:
+
+ If the max length of first key segment <= 127 bytes the prefix is
+ 1 uchar else it's 2 byte
+
+ (prefix) length The high bit is set if this is a prefix for the prev key.
+ [suffix length] Packed length of suffix if the previous was a prefix.
+ (suffix) data Key data bytes (past the common prefix or whole segment).
+ [next-key-seg] Next key segments (([packed length], data), ...)
+ pointer Reference to the data file (last_keyseg->length).
+ */
+
+ matched=0; /* how many char's from prefix were alredy matched */
+ len=0; /* length of previous key unpacked */
+
+ while (page < end)
+ {
+ uint packed= *page & 128;
+
+ vseg= (uchar*) page;
+ if (keyinfo->seg->length >= 127)
+ {
+ suffix_len=mi_uint2korr(vseg) & 32767;
+ vseg+=2;
+ }
+ else
+ suffix_len= *vseg++ & 127;
+
+ if (packed)
+ {
+ if (suffix_len == 0)
+ {
+ /* == 0x80 or 0x8000, same key, prefix length == old key length. */
+ prefix_len=len;
+ }
+ else
+ {
+ /* > 0x80 or 0x8000, this is prefix lgt, packed suffix lgt follows. */
+ prefix_len=suffix_len;
+ get_key_length(suffix_len,vseg);
+ }
+ }
+ else
+ {
+ /* Not packed. No prefix used from last key. */
+ prefix_len=0;
+ }
+
+ len=prefix_len+suffix_len;
+ seg_len_pack=get_pack_length(len);
+ t_buff=tt_buff+3-seg_len_pack;
+ store_key_length(t_buff,len);
+
+ if (prefix_len > saved_prefix_len)
+ memcpy(t_buff+seg_len_pack+saved_prefix_len,saved_vseg,
+ prefix_len-saved_prefix_len);
+ saved_vseg=vseg;
+ saved_prefix_len=prefix_len;
+
+ DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack,
+ suffix_len,vseg));
+ {
+ uchar *from= vseg+suffix_len;
+ HA_KEYSEG *keyseg;
+ uint l;
+
+ for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ )
+ {
+
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!(*from++))
+ continue;
+ }
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
+ {
+ get_key_length(l,from);
+ }
+ else
+ l=keyseg->length;
+
+ from+=l;
+ }
+ from+= keyseg->length;
+ page= (uchar*) from+nod_flag;
+ length= (uint) (from-vseg);
+ }
+
+ if (page > end)
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_PRINT("error",
+ ("Found wrong key: length: %u page: 0x%lx end: %lx",
+ length, (long) page, (long) end));
+ DBUG_RETURN(MARIA_FOUND_WRONG_KEY);
+ }
+
+ if (matched >= prefix_len)
+ {
+ /* We have to compare. But we can still skip part of the key */
+ uint left;
+ const uchar *k= kseg+prefix_len;
+
+ /*
+ If prefix_len > cmplen then we are in the end-space comparison
+ phase. Do not try to acces the key any more ==> left= 0.
+ */
+ left= ((len <= cmplen) ? suffix_len :
+ ((prefix_len < cmplen) ? cmplen - prefix_len : 0));
+
+ matched=prefix_len+left;
+
+ if (sort_order)
+ {
+ for (my_flag=0;left;left--)
+ if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++]))
+ break;
+ }
+ else
+ {
+ for (my_flag=0;left;left--)
+ if ((my_flag= (int) *vseg++ - (int) *k++))
+ break;
+ }
+
+ if (my_flag>0) /* mismatch */
+ break;
+ if (my_flag==0) /* match */
+ {
+ /*
+ ** len cmplen seg_left_len more_segs
+ ** < matched=len; continue search
+ ** > = prefix ? found : (matched=len; continue search)
+ ** > < - ok, found
+ ** = < - ok, found
+ ** = = - ok, found
+ ** = = + next seg
+ */
+ if (len < cmplen)
+ {
+ if ((keyinfo->seg->type != HA_KEYTYPE_TEXT &&
+ keyinfo->seg->type != HA_KEYTYPE_VARTEXT1 &&
+ keyinfo->seg->type != HA_KEYTYPE_VARTEXT2))
+ my_flag= -1;
+ else
+ {
+ /* We have to compare k and vseg as if they were space extended */
+ const uchar *k_end= k+ (cmplen - len);
+ for ( ; k < k_end && *k == ' '; k++) ;
+ if (k == k_end)
+ goto cmp_rest; /* should never happen */
+ if ((uchar) *k < (uchar) ' ')
+ {
+ my_flag= 1; /* Compared string is smaller */
+ break;
+ }
+ my_flag= -1; /* Continue searching */
+ }
+ }
+ else if (len > cmplen)
+ {
+ uchar *vseg_end;
+ if ((nextflag & SEARCH_PREFIX) && key_len_left == 0)
+ goto fix_flag;
+
+ /* We have to compare k and vseg as if they were space extended */
+ for (vseg_end= vseg + (len-cmplen) ;
+ vseg < vseg_end && *vseg == (uchar) ' ';
+ vseg++, matched++) ;
+ DBUG_ASSERT(vseg < vseg_end);
+
+ if ((uchar) *vseg > (uchar) ' ')
+ {
+ my_flag= 1; /* Compared string is smaller */
+ break;
+ }
+ my_flag= -1; /* Continue searching */
+ }
+ else
+ {
+ cmp_rest:
+ if (key_len_left>0)
+ {
+ uint not_used[2];
+ if ((flag = ha_key_cmp(keyinfo->seg+1,vseg,
+ k, key_len_left, nextflag, not_used)) >= 0)
+ break;
+ }
+ else
+ {
+ /*
+ at this line flag==-1 if the following lines were already
+ visited and 0 otherwise, i.e. flag <=0 here always !!!
+ */
+ fix_flag:
+ DBUG_ASSERT(flag <= 0);
+ if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST))
+ flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1;
+ if (flag>=0)
+ break;
+ }
+ }
+ }
+ matched-=left;
+ }
+ /* else (matched < prefix_len) ---> do nothing. */
+
+ memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len);
+ saved_to= buff+saved_length;
+ saved_from= saved_vseg;
+ saved_length=length;
+ *ret_pos=page;
+ }
+ if (my_flag)
+ flag=(keyinfo->seg->flag & HA_REVERSE_SORT) ? -my_flag : my_flag;
+ if (flag == 0)
+ {
+ memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len);
+ saved_to= buff+saved_length;
+ saved_from= saved_vseg;
+ saved_length=length;
+ }
+ if (saved_length)
+ memcpy(saved_to, (uchar*) saved_from, saved_length);
+
+ *last_key= page == end;
+
+ DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_RETURN(flag);
+} /* _ma_prefix_search */
+
+
+ /* Get pos to a key_block */
+
+my_off_t _ma_kpos(uint nod_flag, uchar *after_key)
+{
+ after_key-=nod_flag;
+ switch (nod_flag) {
+#if SIZEOF_OFF_T > 4
+ case 7:
+ return mi_uint7korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 6:
+ return mi_uint6korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 5:
+ return mi_uint5korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+#else
+ case 7:
+ after_key++;
+ case 6:
+ after_key++;
+ case 5:
+ after_key++;
+#endif
+ case 4:
+ return ((my_off_t) mi_uint4korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 3:
+ return ((my_off_t) mi_uint3korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 2:
+ return (my_off_t) (mi_uint2korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH);
+ case 1:
+ return (uint) (*after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 0: /* At leaf page */
+ default: /* Impossible */
+ return(HA_OFFSET_ERROR);
+ }
+} /* _kpos */
+
+
+ /* Save pos to a key_block */
+
+void _ma_kpointer(register MARIA_HA *info, register uchar *buff, my_off_t pos)
+{
+ pos/=MARIA_MIN_KEY_BLOCK_LENGTH;
+ switch (info->s->base.key_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 7: mi_int7store(buff,pos); break;
+ case 6: mi_int6store(buff,pos); break;
+ case 5: mi_int5store(buff,pos); break;
+#else
+ case 7: *buff++=0;
+ /* fall trough */
+ case 6: *buff++=0;
+ /* fall trough */
+ case 5: *buff++=0;
+ /* fall trough */
+#endif
+ case 4: mi_int4store(buff,pos); break;
+ case 3: mi_int3store(buff,pos); break;
+ case 2: mi_int2store(buff,(uint) pos); break;
+ case 1: buff[0]= (uchar) pos; break;
+ default: abort(); /* impossible */
+ }
+} /* _ma_kpointer */
+
+
+ /* Calc pos to a data-record from a key */
+
+MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag,
+ const uchar *after_key)
+{
+ my_off_t pos;
+ after_key-=(nod_flag + info->s->rec_reflength);
+ switch (info->s->rec_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 8: pos= (my_off_t) mi_uint8korr(after_key); break;
+ case 7: pos= (my_off_t) mi_uint7korr(after_key); break;
+ case 6: pos= (my_off_t) mi_uint6korr(after_key); break;
+ case 5: pos= (my_off_t) mi_uint5korr(after_key); break;
+#else
+ case 8: pos= (my_off_t) mi_uint4korr(after_key+4); break;
+ case 7: pos= (my_off_t) mi_uint4korr(after_key+3); break;
+ case 6: pos= (my_off_t) mi_uint4korr(after_key+2); break;
+ case 5: pos= (my_off_t) mi_uint4korr(after_key+1); break;
+#endif
+ case 4: pos= (my_off_t) mi_uint4korr(after_key); break;
+ case 3: pos= (my_off_t) mi_uint3korr(after_key); break;
+ case 2: pos= (my_off_t) mi_uint2korr(after_key); break;
+ default:
+ pos=0L; /* Shut compiler up */
+ }
+ return info->s->keypos_to_recpos(info, pos);
+}
+
+
+/* Calc position from a record pointer ( in delete link chain ) */
+
+MARIA_RECORD_POS _ma_rec_pos(MARIA_HA *info, uchar *ptr)
+{
+ MARIA_SHARE *s= info->s;
+
+ my_off_t pos;
+ switch (s->rec_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 8:
+ pos= (my_off_t) mi_uint8korr(ptr);
+ if (pos == HA_OFFSET_ERROR)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+ case 7:
+ pos= (my_off_t) mi_uint7korr(ptr);
+ if (pos == (((my_off_t) 1) << 56) -1)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+ case 6:
+ pos= (my_off_t) mi_uint6korr(ptr);
+ if (pos == (((my_off_t) 1) << 48) -1)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+ case 5:
+ pos= (my_off_t) mi_uint5korr(ptr);
+ if (pos == (((my_off_t) 1) << 40) -1)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+#else
+ case 8:
+ case 7:
+ case 6:
+ case 5:
+ ptr+= (s->rec_reflength-4);
+ /* fall through */
+#endif
+ case 4:
+ pos= (my_off_t) mi_uint4korr(ptr);
+ if (pos == (my_off_t) (uint32) ~0L)
+ return HA_OFFSET_ERROR;
+ break;
+ case 3:
+ pos= (my_off_t) mi_uint3korr(ptr);
+ if (pos == (my_off_t) (1 << 24) -1)
+ return HA_OFFSET_ERROR;
+ break;
+ case 2:
+ pos= (my_off_t) mi_uint2korr(ptr);
+ if (pos == (my_off_t) (1 << 16) -1)
+ return HA_OFFSET_ERROR;
+ break;
+ default: abort(); /* Impossible */
+ }
+ return (*s->keypos_to_recpos)(info, pos);
+}
+
+
+/* save position to record */
+
+void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos)
+{
+ if (pos != HA_OFFSET_ERROR)
+ pos= (*info->s->recpos_to_keypos)(info, pos);
+
+ switch (info->s->rec_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 8: mi_int8store(buff,pos); break;
+ case 7: mi_int7store(buff,pos); break;
+ case 6: mi_int6store(buff,pos); break;
+ case 5: mi_int5store(buff,pos); break;
+#else
+ case 8: *buff++=0;
+ /* fall trough */
+ case 7: *buff++=0;
+ /* fall trough */
+ case 6: *buff++=0;
+ /* fall trough */
+ case 5: *buff++=0;
+ /* fall trough */
+#endif
+ case 4: mi_int4store(buff,pos); break;
+ case 3: mi_int3store(buff,pos); break;
+ case 2: mi_int2store(buff,(uint) pos); break;
+ default: abort(); /* Impossible */
+ }
+} /* _ma_dpointer */
+
+
+my_off_t _ma_static_keypos_to_recpos(MARIA_HA *info, my_off_t pos)
+{
+ return pos * info->s->base.pack_reclength;
+}
+
+
+my_off_t _ma_static_recpos_to_keypos(MARIA_HA *info, my_off_t pos)
+{
+ return pos / info->s->base.pack_reclength;
+}
+
+my_off_t _ma_transparent_recpos(MARIA_HA *info __attribute__((unused)),
+ my_off_t pos)
+{
+ return pos;
+}
+
+my_off_t _ma_transaction_keypos_to_recpos(MARIA_HA *info
+ __attribute__((unused)),
+ my_off_t pos)
+{
+ /* We need one bit to store if there is transid's after position */
+ return pos >> 1;
+}
+
+my_off_t _ma_transaction_recpos_to_keypos(MARIA_HA *info
+ __attribute__((unused)),
+ my_off_t pos)
+{
+ return pos << 1;
+}
+
+/*
+ @brief Get key from key-block
+
+ @param nod_flag Is set to nod length if we on nod
+ @param page Points at previous key; Its advanced to point at next key
+ @param key Should contain previous key
+
+ @notes
+ Same as _ma_get_key but used with fixed length keys
+
+ @retval Returns length of found key + pointers
+ */
+
+uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag,
+ register uchar **page, uchar *key)
+{
+ memcpy((uchar*) key,(uchar*) *page,
+ (size_t) (keyinfo->keylength+nod_flag));
+ *page+=keyinfo->keylength+nod_flag;
+ return(keyinfo->keylength);
+} /* _ma_get_static_key */
+
+
+/*
+ get key witch is packed against previous key or key with a NULL column.
+
+ SYNOPSIS
+ _ma_get_pack_key()
+ keyinfo key definition information.
+ nod_flag If nod: Length of node pointer, else zero.
+ page_pos RETURN position in key page behind this key.
+ key IN/OUT in: prev key, out: unpacked key.
+
+ RETURN
+ key_length + length of data pointer
+*/
+
+uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag,
+ register uchar **page_pos, register uchar *key)
+{
+ reg1 HA_KEYSEG *keyseg;
+ uchar *start_key,*page=*page_pos;
+ uint length;
+
+ start_key=key;
+ for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++)
+ {
+ if (keyseg->flag & HA_PACK_KEY)
+ {
+ /* key with length, packed to previous key */
+ uchar *start= key;
+ uint packed= *page & 128,tot_length,rest_length;
+ if (keyseg->length >= 127)
+ {
+ length=mi_uint2korr(page) & 32767;
+ page+=2;
+ }
+ else
+ length= *page++ & 127;
+
+ if (packed)
+ {
+ if (length > (uint) keyseg->length)
+ {
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return 0; /* Error */
+ }
+ if (length == 0) /* Same key */
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ *key++=1; /* Can't be NULL */
+ get_key_length(length,key);
+ key+= length; /* Same diff_key as prev */
+ if (length > keyseg->length)
+ {
+ DBUG_PRINT("error",
+ ("Found too long null packed key: %u of %u at 0x%lx",
+ length, keyseg->length, (long) *page_pos));
+ DBUG_DUMP("key",(char*) *page_pos,16);
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return 0;
+ }
+ continue;
+ }
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ key++; /* Skip null marker*/
+ start++;
+ }
+
+ get_key_length(rest_length,page);
+ tot_length=rest_length+length;
+
+ /* If the stored length has changed, we must move the key */
+ if (tot_length >= 255 && *start != 255)
+ {
+ /* length prefix changed from a length of one to a length of 3 */
+ bmove_upp(key+length+3, key+length+1, length);
+ *key=255;
+ mi_int2store(key+1,tot_length);
+ key+=3+length;
+ }
+ else if (tot_length < 255 && *start == 255)
+ {
+ bmove(key+1,key+3,length);
+ *key=tot_length;
+ key+=1+length;
+ }
+ else
+ {
+ store_key_length_inc(key,tot_length);
+ key+=length;
+ }
+ memcpy(key,page,rest_length);
+ page+=rest_length;
+ key+=rest_length;
+ continue;
+ }
+ else
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!length--) /* Null part */
+ {
+ *key++=0;
+ continue;
+ }
+ *key++=1; /* Not null */
+ }
+ }
+ if (length > (uint) keyseg->length)
+ {
+ DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx",
+ length, keyseg->length, (long) *page_pos));
+ DBUG_DUMP("key",(char*) *page_pos,16);
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return 0; /* Error */
+ }
+ store_key_length_inc(key,length);
+ }
+ else
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!(*key++ = *page++))
+ continue;
+ }
+ if (keyseg->flag &
+ (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
+ {
+ uchar *tmp=page;
+ get_key_length(length,tmp);
+ length+=(uint) (tmp-page);
+ }
+ else
+ length=keyseg->length;
+ }
+ memcpy((uchar*) key,(uchar*) page,(size_t) length);
+ key+=length;
+ page+=length;
+ }
+ length=keyseg->length+nod_flag;
+ bmove((uchar*) key,(uchar*) page,length);
+ *page_pos= page+length;
+ return ((uint) (key-start_key)+keyseg->length);
+} /* _ma_get_pack_key */
+
+
+
+/* key that is packed relatively to previous */
+
+uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag,
+ register uchar **page_pos, register uchar *key)
+{
+ reg1 HA_KEYSEG *keyseg;
+ uchar *start_key,*page,*page_end,*from,*from_end;
+ uint length,tmp;
+ DBUG_ENTER("_ma_get_binary_pack_key");
+
+ page= *page_pos;
+ page_end=page+HA_MAX_KEY_BUFF+1;
+ start_key=key;
+
+ /*
+ Keys are compressed the following way:
+
+ prefix length Packed length of prefix common with prev key. (1 or 3 bytes)
+ for each key segment:
+ [is null] Null indicator if can be null (1 byte, zero means null)
+ [length] Packed length if varlength (1 or 3 bytes)
+ key segment 'length' bytes of key segment value
+ pointer Reference to the data file (last_keyseg->length).
+
+ get_key_length() is a macro. It gets the prefix length from 'page'
+ and puts it into 'length'. It increments 'page' by 1 or 3, depending
+ on the packed length of the prefix length.
+ */
+ get_key_length(length,page);
+ if (length)
+ {
+ if (length > keyinfo->maxlength)
+ {
+ DBUG_PRINT("error",
+ ("Found too long binary packed key: %u of %u at 0x%lx",
+ length, keyinfo->maxlength, (long) *page_pos));
+ DBUG_DUMP("key",(char*) *page_pos,16);
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0); /* Wrong key */
+ }
+ /* Key is packed against prev key, take prefix from prev key. */
+ from= key;
+ from_end= key + length;
+ }
+ else
+ {
+ /* Key is not packed against prev key, take all from page buffer. */
+ from= page;
+ from_end= page_end;
+ }
+
+ /*
+ The trouble is that key can be split in two parts:
+ The first part (prefix) is in from .. from_end - 1.
+ The second part starts at page.
+ The split can be at every byte position. So we need to check for
+ the end of the first part before using every byte.
+ */
+ for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++)
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ /* If prefix is used up, switch to rest. */
+ if (from == from_end)
+ {
+ from=page;
+ from_end=page_end;
+ }
+ if (!(*key++ = *from++))
+ continue; /* Null part */
+ }
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
+ {
+ /* If prefix is used up, switch to rest. */
+ if (from == from_end) { from=page; from_end=page_end; }
+ /* Get length of dynamic length key part */
+ if ((length= (uint) (uchar) (*key++ = *from++)) == 255)
+ {
+ /* If prefix is used up, switch to rest. */
+ if (from == from_end) { from=page; from_end=page_end; }
+ length= ((uint) (uchar) ((*key++ = *from++))) << 8;
+ /* If prefix is used up, switch to rest. */
+ if (from == from_end) { from=page; from_end=page_end; }
+ length+= (uint) (uchar) ((*key++ = *from++));
+ }
+ }
+ else
+ length=keyseg->length;
+
+ if ((tmp=(uint) (from_end-from)) <= length)
+ {
+ key+=tmp; /* Use old key */
+ length-=tmp;
+ from=page; from_end=page_end;
+ }
+ DBUG_ASSERT((int) length >= 0);
+ DBUG_PRINT("info",("key: 0x%lx from: 0x%lx length: %u",
+ (long) key, (long) from, length));
+ memmove((uchar*) key, (uchar*) from, (size_t) length);
+ key+=length;
+ from+=length;
+ }
+ /*
+ Last segment (type == 0) contains length of data pointer.
+ If we have mixed key blocks with data pointer and key block pointer,
+ we have to copy both.
+ */
+ length=keyseg->length+nod_flag;
+ if ((tmp=(uint) (from_end-from)) <= length)
+ {
+ /* Remaining length is less or equal max possible length. */
+ memcpy(key+tmp,page,length-tmp); /* Get last part of key */
+ *page_pos= page+length-tmp;
+ }
+ else
+ {
+ /*
+ Remaining length is greater than max possible length.
+ This can happen only if we switched to the new key bytes already.
+ 'page_end' is calculated with MI_MAX_KEY_BUFF. So it can be far
+ behind the real end of the key.
+ */
+ if (from_end != page_end)
+ {
+ DBUG_PRINT("error",("Error when unpacking key"));
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0); /* Error */
+ }
+ /* Copy data pointer and, if appropriate, key block pointer. */
+ memcpy((uchar*) key,(uchar*) from,(size_t) length);
+ *page_pos= from+length;
+ }
+ DBUG_RETURN((uint) (key-start_key)+keyseg->length);
+}
+
+
+/*
+ @brief Get key at position without knowledge of previous key
+
+ @return pointer to next key
+*/
+
+uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page,
+ uchar *key, uchar *keypos, uint *return_key_length)
+{
+ uint nod_flag;
+ DBUG_ENTER("_ma_get_key");
+
+ nod_flag=_ma_test_if_nod(info->s, page);
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ bmove((uchar*) key,(uchar*) keypos,keyinfo->keylength+nod_flag);
+ DBUG_RETURN(keypos+keyinfo->keylength+nod_flag);
+ }
+ else
+ {
+ page+= info->s->keypage_header + nod_flag;
+ key[0]=0; /* safety */
+ while (page <= keypos)
+ {
+ *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key);
+ if (*return_key_length == 0)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ }
+ }
+ DBUG_PRINT("exit",("page: 0x%lx length: %u", (long) page,
+ *return_key_length));
+ DBUG_RETURN(page);
+} /* _ma_get_key */
+
+
+/*
+ @brief Get key at position without knowledge of previous key
+
+ @return
+ @retval 0 ok
+ @retval 1 error
+*/
+
+static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page, uchar *key, uchar *keypos,
+ uint *return_key_length)
+{
+ uint nod_flag;
+ DBUG_ENTER("_ma_get_prev_key");
+
+ nod_flag=_ma_test_if_nod(info->s, page);
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ *return_key_length=keyinfo->keylength;
+ bmove((uchar*) key,(uchar*) keypos- *return_key_length-nod_flag,
+ *return_key_length);
+ DBUG_RETURN(0);
+ }
+ else
+ {
+ page+= info->s->keypage_header + nod_flag;
+ key[0]=0; /* safety */
+ while (page < keypos)
+ {
+ *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key);
+ if (*return_key_length == 0)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ DBUG_RETURN(0);
+} /* _ma_get_key */
+
+
+/*
+ @brief Get last key from key-page before 'endpos'
+
+ @note
+ endpos may be either end of buffer or start of a key
+
+ @return
+ @retval pointer to where key starts
+*/
+
+uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page,
+ uchar *lastkey, uchar *endpos, uint *return_key_length)
+{
+ uint nod_flag;
+ uchar *lastpos;
+ DBUG_ENTER("_ma_get_last_key");
+ DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) page,
+ (long) endpos));
+
+ nod_flag= _ma_test_if_nod(info->s, page);
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ lastpos=endpos-keyinfo->keylength-nod_flag;
+ *return_key_length=keyinfo->keylength;
+ if (lastpos > page)
+ bmove((uchar*) lastkey,(uchar*) lastpos,keyinfo->keylength+nod_flag);
+ }
+ else
+ {
+ page+= info->s->keypage_header + nod_flag;
+ lastpos= page;
+ lastkey[0]=0;
+ while (page < endpos)
+ {
+ lastpos=page;
+ *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,lastkey);
+ if (*return_key_length == 0)
+ {
+ DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx",
+ (long) page));
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ }
+ }
+ DBUG_PRINT("exit",("lastpos: 0x%lx length: %u", (long) lastpos,
+ *return_key_length));
+ DBUG_RETURN(lastpos);
+} /* _ma_get_last_key */
+
+
+/* Calculate length of key */
+
+uint _ma_keylength(MARIA_KEYDEF *keyinfo, register const uchar *key)
+{
+ reg1 HA_KEYSEG *keyseg;
+ const uchar *start;
+
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ return (keyinfo->keylength);
+
+ start= key;
+ for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ if (!*key++)
+ continue;
+ if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ {
+ uint length;
+ get_key_length(length,key);
+ key+=length;
+ }
+ else
+ key+= keyseg->length;
+ }
+ return((uint) (key-start)+keyseg->length);
+} /* _ma_keylength */
+
+
+/*
+ Calculate length of part key.
+
+ Used in maria_rkey() to find the key found for the key-part that was used.
+ This is needed in case of multi-byte character sets where we may search
+ after '0xDF' but find 'ss'
+*/
+
+uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const uchar *key,
+ HA_KEYSEG *end)
+{
+ reg1 HA_KEYSEG *keyseg;
+ const uchar *start= key;
+
+ for (keyseg=keyinfo->seg ; keyseg != end ; keyseg++)
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ if (!*key++)
+ continue;
+ if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ {
+ uint length;
+ get_key_length(length,key);
+ key+=length;
+ }
+ else
+ key+= keyseg->length;
+ }
+ return (uint) (key-start);
+}
+
+
+/* Move a key */
+
+uchar *_ma_move_key(MARIA_KEYDEF *keyinfo, uchar *to, const uchar *from)
+{
+ reg1 uint length;
+ memcpy(to, from, (size_t) (length= _ma_keylength(keyinfo, from)));
+ return to+length;
+}
+
+
+/*
+ Find next/previous record with same key
+
+ WARNING
+ This can't be used when database is touched after last read
+*/
+
+int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length, uint nextflag, my_off_t pos)
+{
+ int error;
+ uint nod_flag;
+ uchar lastkey[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("_ma_search_next");
+ DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu page_changed %d keyread_buff_used: %d",
+ nextflag, (ulong) info->cur_row.lastpos,
+ (ulong) info->int_keypos,
+ info->page_changed, info->keyread_buff_used));
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_length););
+
+ /* Force full read if we are at last key or if we are not on a leaf
+ and the key tree has changed since we used it last time
+ Note that even if the key tree has changed since last read, we can use
+ the last read data from the leaf if we haven't used the buffer for
+ something else.
+ */
+
+ if (((nextflag & SEARCH_BIGGER) && info->int_keypos >= info->int_maxpos) ||
+ info->page_changed ||
+ (info->int_keytree_version != keyinfo->version &&
+ (info->int_nod_flag || info->keyread_buff_used)))
+ DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF, pos));
+
+ if (info->keyread_buff_used)
+ {
+ if (!_ma_fetch_keypage(info, keyinfo, info->last_search_keypage,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->keyread_buff, 0, 0))
+ DBUG_RETURN(-1);
+ info->keyread_buff_used=0;
+ }
+
+ /* Last used buffer is in info->keyread_buff */
+ nod_flag= _ma_test_if_nod(info->s, info->keyread_buff);
+
+ if (nextflag & SEARCH_BIGGER) /* Next key */
+ {
+ my_off_t tmp_pos= _ma_kpos(nod_flag,info->int_keypos);
+ if (tmp_pos != HA_OFFSET_ERROR)
+ {
+ if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF, tmp_pos)) <=0)
+ DBUG_RETURN(error);
+ }
+ memcpy(lastkey,key,key_length);
+ if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,
+ &info->int_keypos,lastkey)))
+ DBUG_RETURN(-1);
+ }
+ else /* Previous key */
+ {
+ uint length;
+ /* Find start of previous key */
+ info->int_keypos= _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey,
+ info->int_keypos, &length);
+ if (!info->int_keypos)
+ DBUG_RETURN(-1);
+ if (info->int_keypos == info->keyread_buff + info->s->keypage_header)
+ DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF, pos));
+ if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF,
+ _ma_kpos(nod_flag,info->int_keypos))) <= 0)
+ DBUG_RETURN(error);
+
+ /* QQ: We should be able to optimize away the following call */
+ if (! _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey,
+ info->int_keypos,&info->lastkey_length))
+ DBUG_RETURN(-1);
+ }
+ memcpy(info->lastkey,lastkey,info->lastkey_length);
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+ DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+} /* _ma_search_next */
+
+
+ /* Search after position for the first row in an index */
+ /* This is stored in info->cur_row.lastpos */
+
+int _ma_search_first(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ register my_off_t pos)
+{
+ uint nod_flag;
+ uchar *page;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_search_first");
+
+ if (pos == HA_OFFSET_ERROR)
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+
+ do
+ {
+ if (!_ma_fetch_keypage(info, keyinfo, pos, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->keyread_buff, 0, 0))
+ {
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+ nod_flag=_ma_test_if_nod(share, info->keyread_buff);
+ page= info->keyread_buff + share->keypage_header + nod_flag;
+ } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR);
+
+ if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,
+ info->lastkey)))
+ DBUG_RETURN(-1); /* Crashed */
+
+ info->int_keypos=page;
+ info->int_maxpos= (info->keyread_buff +
+ _ma_get_page_used(share, info->keyread_buff)-1);
+ info->int_nod_flag=nod_flag;
+ info->int_keytree_version=keyinfo->version;
+ info->last_search_keypage=info->last_keypage;
+ info->page_changed=info->keyread_buff_used=0;
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+
+ DBUG_PRINT("exit",("found key at %lu", (ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+} /* _ma_search_first */
+
+
+ /* Search after position for the last row in an index */
+ /* This is stored in info->cur_row.lastpos */
+
+int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ register my_off_t pos)
+{
+ uint nod_flag;
+ uchar *buff,*end_of_page;
+ DBUG_ENTER("_ma_search_last");
+
+ if (pos == HA_OFFSET_ERROR)
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+
+ buff=info->keyread_buff;
+ do
+ {
+ uint used_length;
+ if (!_ma_fetch_keypage(info, keyinfo, pos, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, buff, 0, 0))
+ {
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+ _ma_get_used_and_nod(info->s, buff, used_length, nod_flag);
+ end_of_page= buff + used_length;
+ } while ((pos= _ma_kpos(nod_flag, end_of_page)) != HA_OFFSET_ERROR);
+
+ if (!_ma_get_last_key(info, keyinfo, buff, info->lastkey, end_of_page,
+ &info->lastkey_length))
+ DBUG_RETURN(-1);
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+ info->int_keypos= info->int_maxpos= end_of_page;
+ info->int_nod_flag=nod_flag;
+ info->int_keytree_version=keyinfo->version;
+ info->last_search_keypage=info->last_keypage;
+ info->page_changed=info->keyread_buff_used=0;
+
+ DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+} /* _ma_search_last */
+
+
+
+/****************************************************************************
+**
+** Functions to store and pack a key in a page
+**
+** maria_calc_xx_key_length takes the following arguments:
+** nod_flag If nod: Length of nod-pointer
+** next_key Position to pos after the new key in buffer
+** org_key Key that was before the next key in buffer
+** prev_key Last key before current key
+** key Key that will be stored
+** s_temp Information how next key will be packed
+****************************************************************************/
+
+/* Static length key */
+
+int
+_ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag,
+ uchar *next_pos __attribute__((unused)),
+ uchar *org_key __attribute__((unused)),
+ uchar *prev_key __attribute__((unused)),
+ const uchar *key, MARIA_KEY_PARAM *s_temp)
+{
+ s_temp->key= key;
+ return (int) (s_temp->move_length= keyinfo->keylength + nod_flag);
+}
+
+/* Variable length key */
+
+int
+_ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag,
+ uchar *next_pos __attribute__((unused)),
+ uchar *org_key __attribute__((unused)),
+ uchar *prev_key __attribute__((unused)),
+ const uchar *key, MARIA_KEY_PARAM *s_temp)
+{
+ s_temp->key= key;
+ return (int) (s_temp->move_length= _ma_keylength(keyinfo,key)+nod_flag);
+}
+
+/**
+ @brief Calc length needed to store prefixed compressed keys
+
+ @info
+ Variable length first segment which is prefix compressed
+ (maria_chk reports 'packed + stripped')
+
+ Keys are compressed the following way:
+
+ If the max length of first key segment <= 127 bytes the prefix is
+ 1 uchar else it's 2 byte
+
+ prefix byte(s) The high bit is set if this is a prefix for the prev key
+ length Packed length if the previous was a prefix byte
+ [length] data bytes ('length' bytes)
+ next-key-seg Next key segments
+
+ If the first segment can have NULL:
+ The length is 0 for NULLS and 1+length for not null columns.
+*/
+
+int
+_ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar *next_key,
+ uchar *org_key, uchar *prev_key, const uchar *key,
+ MARIA_KEY_PARAM *s_temp)
+{
+ reg1 HA_KEYSEG *keyseg;
+ int length;
+ uint key_length,ref_length,org_key_length=0,
+ length_pack,new_key_length,diff_flag,pack_marker;
+ const uchar *start,*end,*key_end;
+ uchar *sort_order;
+ bool same_length;
+
+ length_pack=s_temp->ref_length=s_temp->n_ref_length=s_temp->n_length=0;
+ same_length=0; keyseg=keyinfo->seg;
+ key_length= _ma_keylength(keyinfo,key)+nod_flag;
+
+ sort_order=0;
+ if ((keyinfo->flag & HA_FULLTEXT) &&
+ ((keyseg->type == HA_KEYTYPE_TEXT) ||
+ (keyseg->type == HA_KEYTYPE_VARTEXT1) ||
+ (keyseg->type == HA_KEYTYPE_VARTEXT2)) &&
+ !use_strnxfrm(keyseg->charset))
+ sort_order= keyseg->charset->sort_order;
+
+ /* diff flag contains how many bytes is needed to pack key */
+ if (keyseg->length >= 127)
+ {
+ diff_flag=2;
+ pack_marker=32768;
+ }
+ else
+ {
+ diff_flag= 1;
+ pack_marker=128;
+ }
+ s_temp->pack_marker=pack_marker;
+
+ /* Handle the case that the first part have NULL values */
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!*key++)
+ {
+ s_temp->key= key;
+ s_temp->key_length= 0;
+ s_temp->totlength= key_length-1+diff_flag;
+ s_temp->next_key_pos= 0; /* No next key */
+ return (s_temp->move_length= s_temp->totlength);
+ }
+ s_temp->store_not_null=1;
+ key_length--; /* We don't store NULL */
+ if (prev_key && !*prev_key++)
+ org_key=prev_key=0; /* Can't pack against prev */
+ else if (org_key)
+ org_key++; /* Skip NULL */
+ }
+ else
+ s_temp->store_not_null=0;
+ s_temp->prev_key= org_key;
+
+ /* The key part will start with a packed length */
+
+ get_key_pack_length(new_key_length,length_pack,key);
+ end= key_end= key+ new_key_length;
+ start= key;
+
+ /* Calc how many characters are identical between this and the prev. key */
+ if (prev_key)
+ {
+ get_key_length(org_key_length,prev_key);
+ s_temp->prev_key=prev_key; /* Pointer at data */
+ /* Don't use key-pack if length == 0 */
+ if (new_key_length && new_key_length == org_key_length)
+ same_length=1;
+ else if (new_key_length > org_key_length)
+ end= key + org_key_length;
+
+ if (sort_order) /* SerG */
+ {
+ while (key < end &&
+ sort_order[* (uchar*) key] == sort_order[* (uchar*) prev_key])
+ {
+ key++; prev_key++;
+ }
+ }
+ else
+ {
+ while (key < end && *key == *prev_key)
+ {
+ key++; prev_key++;
+ }
+ }
+ }
+
+ s_temp->key=key;
+ s_temp->key_length= (uint) (key_end-key);
+
+ if (same_length && key == key_end)
+ {
+ /* identical variable length key */
+ s_temp->ref_length= pack_marker;
+ length=(int) key_length-(int) (key_end-start)-length_pack;
+ length+= diff_flag;
+ if (next_key)
+ { /* Can't combine with next */
+ s_temp->n_length= *next_key; /* Needed by _ma_store_key */
+ next_key=0;
+ }
+ }
+ else
+ {
+ if (start != key)
+ { /* Starts as prev key */
+ ref_length= (uint) (key-start);
+ s_temp->ref_length= ref_length + pack_marker;
+ length= (int) (key_length - ref_length);
+
+ length-= length_pack;
+ length+= diff_flag;
+ length+= ((new_key_length-ref_length) >= 255) ? 3 : 1;/* Rest_of_key */
+ }
+ else
+ {
+ s_temp->key_length+=s_temp->store_not_null; /* If null */
+ length= key_length - length_pack+ diff_flag;
+ }
+ }
+ s_temp->totlength=(uint) length;
+ s_temp->prev_length=0;
+ DBUG_PRINT("test",("tot_length: %u length: %d uniq_key_length: %u",
+ key_length, length, s_temp->key_length));
+
+ /* If something after that hasn't length=0, test if we can combine */
+ if ((s_temp->next_key_pos=next_key))
+ {
+ uint packed,n_length;
+
+ packed = *next_key & 128;
+ if (diff_flag == 2)
+ {
+ n_length= mi_uint2korr(next_key) & 32767; /* Length of next key */
+ next_key+=2;
+ }
+ else
+ n_length= *next_key++ & 127;
+ if (!packed)
+ n_length-= s_temp->store_not_null;
+
+ if (n_length || packed) /* Don't pack 0 length keys */
+ {
+ uint next_length_pack, new_ref_length=s_temp->ref_length;
+
+ if (packed)
+ {
+ /* If first key and next key is packed (only on delete) */
+ if (!prev_key && org_key)
+ {
+ get_key_length(org_key_length,org_key);
+ key=start;
+ if (sort_order) /* SerG */
+ {
+ while (key < end &&
+ sort_order[*(uchar*) key] == sort_order[*(uchar*) org_key])
+ {
+ key++; org_key++;
+ }
+ }
+ else
+ {
+ while (key < end && *key == *org_key)
+ {
+ key++; org_key++;
+ }
+ }
+ if ((new_ref_length= (uint) (key - start)))
+ new_ref_length+=pack_marker;
+ }
+
+ if (!n_length)
+ {
+ /*
+ We put a different key between two identical variable length keys
+ Extend next key to have same prefix as this key
+ */
+ if (new_ref_length) /* prefix of previus key */
+ { /* make next key longer */
+ s_temp->part_of_prev_key= new_ref_length;
+ s_temp->prev_length= org_key_length -
+ (new_ref_length-pack_marker);
+ s_temp->n_ref_length= s_temp->part_of_prev_key;
+ s_temp->n_length= s_temp->prev_length;
+ n_length= get_pack_length(s_temp->prev_length);
+ s_temp->prev_key+= (new_ref_length - pack_marker);
+ length+= s_temp->prev_length + n_length;
+ }
+ else
+ { /* Can't use prev key */
+ s_temp->part_of_prev_key=0;
+ s_temp->prev_length= org_key_length;
+ s_temp->n_ref_length=s_temp->n_length= org_key_length;
+ length+= org_key_length;
+ }
+ return (s_temp->move_length= (int) length);
+ }
+
+ ref_length=n_length;
+ /* Get information about not packed key suffix */
+ get_key_pack_length(n_length,next_length_pack,next_key);
+
+ /* Test if new keys has fewer characters that match the previous key */
+ if (!new_ref_length)
+ { /* Can't use prev key */
+ s_temp->part_of_prev_key= 0;
+ s_temp->prev_length= ref_length;
+ s_temp->n_ref_length= s_temp->n_length= n_length+ref_length;
+ return s_temp->move_length= ((int) length+ref_length-
+ next_length_pack);
+ }
+ if (ref_length+pack_marker > new_ref_length)
+ {
+ uint new_pack_length=new_ref_length-pack_marker;
+ /* We must copy characters from the original key to the next key */
+ s_temp->part_of_prev_key= new_ref_length;
+ s_temp->prev_length= ref_length - new_pack_length;
+ s_temp->n_ref_length=s_temp->n_length=n_length + s_temp->prev_length;
+ s_temp->prev_key+= new_pack_length;
+ length-= (next_length_pack - get_pack_length(s_temp->n_length));
+ return s_temp->move_length= ((int) length + s_temp->prev_length);
+ }
+ }
+ else
+ {
+ /* Next key wasn't a prefix of previous key */
+ ref_length=0;
+ next_length_pack=0;
+ }
+ DBUG_PRINT("test",("length: %d next_key: 0x%lx", length,
+ (long) next_key));
+
+ {
+ uint tmp_length;
+ key=(start+=ref_length);
+ if (key+n_length < key_end) /* Normalize length based */
+ key_end= key+n_length;
+ if (sort_order) /* SerG */
+ {
+ while (key < key_end &&
+ sort_order[*(uchar*) key] == sort_order[*(uchar*) next_key])
+ {
+ key++; next_key++;
+ }
+ }
+ else
+ {
+ while (key < key_end && *key == *next_key)
+ {
+ key++; next_key++;
+ }
+ }
+ if (!(tmp_length=(uint) (key-start)))
+ { /* Key can't be re-packed */
+ s_temp->next_key_pos=0;
+ return (s_temp->move_length= length);
+ }
+ ref_length+=tmp_length;
+ n_length-=tmp_length;
+ length-=tmp_length+next_length_pack; /* We gained these chars */
+ }
+ if (n_length == 0 && ref_length == new_key_length)
+ {
+ s_temp->n_ref_length=pack_marker; /* Same as prev key */
+ }
+ else
+ {
+ s_temp->n_ref_length=ref_length | pack_marker;
+ length+= get_pack_length(n_length);
+ s_temp->n_length=n_length;
+ }
+ }
+ }
+ return (s_temp->move_length= length);
+}
+
+
+/* Length of key which is prefix compressed */
+
+int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar *next_key,
+ uchar *org_key, uchar *prev_key,
+ const uchar *key,
+ MARIA_KEY_PARAM *s_temp)
+{
+ uint length,key_length,ref_length;
+
+ s_temp->totlength=key_length= _ma_keylength(keyinfo,key)+nod_flag;
+#ifdef HAVE_purify
+ s_temp->n_length= s_temp->n_ref_length=0; /* For valgrind */
+#endif
+ s_temp->key=key;
+ s_temp->prev_key=org_key;
+ if (prev_key) /* If not first key in block */
+ {
+ /* pack key against previous key */
+ /*
+ As keys may be identical when running a sort in maria_chk, we
+ have to guard against the case where keys may be identical
+ */
+ const uchar *end;
+ end=key+key_length;
+ for ( ; *key == *prev_key && key < end; key++,prev_key++) ;
+ s_temp->ref_length= ref_length=(uint) (key-s_temp->key);
+ length=key_length - ref_length + get_pack_length(ref_length);
+ }
+ else
+ {
+ /* No previous key */
+ s_temp->ref_length=ref_length=0;
+ length=key_length+1;
+ }
+ if ((s_temp->next_key_pos=next_key)) /* If another key after */
+ {
+ /* pack key against next key */
+ uint next_length,next_length_pack;
+ get_key_pack_length(next_length,next_length_pack,next_key);
+
+ /* If first key and next key is packed (only on delete) */
+ if (!prev_key && org_key && next_length)
+ {
+ const uchar *end;
+ for (key= s_temp->key, end=key+next_length ;
+ *key == *org_key && key < end;
+ key++,org_key++) ;
+ ref_length= (uint) (key - s_temp->key);
+ }
+
+ if (next_length > ref_length)
+ {
+ /* We put a key with different case between two keys with the same prefix
+ Extend next key to have same prefix as
+ this key */
+ s_temp->n_ref_length= ref_length;
+ s_temp->prev_length= next_length-ref_length;
+ s_temp->prev_key+= ref_length;
+ return s_temp->move_length= ((int) (length+ s_temp->prev_length -
+ next_length_pack +
+ get_pack_length(ref_length)));
+ }
+ /* Check how many characters are identical to next key */
+ key= s_temp->key+next_length;
+ while (*key++ == *next_key++) ;
+ if ((ref_length= (uint) (key - s_temp->key)-1) == next_length)
+ {
+ s_temp->next_key_pos=0;
+ return (s_temp->move_length= length); /* Can't pack next key */
+ }
+ s_temp->prev_length=0;
+ s_temp->n_ref_length=ref_length;
+ return s_temp->move_length= (int) (length-(ref_length - next_length) -
+ next_length_pack +
+ get_pack_length(ref_length));
+ }
+ return (s_temp->move_length= (int) length);
+}
+
+
+/*
+** store a key packed with _ma_calc_xxx_key_length in page-buffert
+*/
+
+/* store key without compression */
+
+void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
+ register uchar *key_pos,
+ register MARIA_KEY_PARAM *s_temp)
+{
+ memcpy(key_pos, s_temp->key,(size_t) s_temp->move_length);
+ s_temp->changed_length= s_temp->move_length;
+}
+
+
+/* store variable length key with prefix compression */
+
+#define store_pack_length(test,pos,length) { \
+ if (test) { *((pos)++) = (uchar) (length); } else \
+ { *((pos)++) = (uchar) ((length) >> 8); *((pos)++) = (uchar) (length); } }
+
+
+void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
+ register uchar *key_pos,
+ register MARIA_KEY_PARAM *s_temp)
+{
+ uint length;
+ uchar *org_key_pos= key_pos;
+
+ if (s_temp->ref_length)
+ {
+ /* Packed against previous key */
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->ref_length);
+ /* If not same key after */
+ if (s_temp->ref_length != s_temp->pack_marker)
+ store_key_length_inc(key_pos,s_temp->key_length);
+ }
+ else
+ {
+ /* Not packed against previous key */
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->key_length);
+ }
+ bmove(key_pos, s_temp->key,
+ (length= s_temp->totlength - (uint) (key_pos-org_key_pos)));
+
+ key_pos+= length;
+
+ if (!s_temp->next_key_pos) /* No following key */
+ goto end;
+
+ if (s_temp->prev_length)
+ {
+ /* Extend next key because new key didn't have same prefix as prev key */
+ if (s_temp->part_of_prev_key)
+ {
+ store_pack_length(s_temp->pack_marker == 128,key_pos,
+ s_temp->part_of_prev_key);
+ store_key_length_inc(key_pos,s_temp->n_length);
+ }
+ else
+ {
+ s_temp->n_length+= s_temp->store_not_null;
+ store_pack_length(s_temp->pack_marker == 128,key_pos,
+ s_temp->n_length);
+ }
+ memcpy(key_pos, s_temp->prev_key, s_temp->prev_length);
+ key_pos+= s_temp->prev_length;
+ }
+ else if (s_temp->n_ref_length)
+ {
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_ref_length);
+ if (s_temp->n_ref_length != s_temp->pack_marker)
+ {
+ /* Not identical key */
+ store_key_length_inc(key_pos,s_temp->n_length);
+ }
+ }
+ else
+ {
+ s_temp->n_length+= s_temp->store_not_null;
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_length);
+ }
+
+end:
+ s_temp->changed_length= (uint) (key_pos - org_key_pos);
+}
+
+
+/* variable length key with prefix compression */
+
+void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
+ register uchar *key_pos,
+ register MARIA_KEY_PARAM *s_temp)
+{
+ uchar *org_key_pos= key_pos;
+ size_t length= s_temp->totlength - s_temp->ref_length;
+
+ store_key_length_inc(key_pos,s_temp->ref_length);
+ memcpy(key_pos, s_temp->key+s_temp->ref_length, length);
+ key_pos+= length;
+
+ if (s_temp->next_key_pos)
+ {
+ store_key_length_inc(key_pos,s_temp->n_ref_length);
+ if (s_temp->prev_length) /* If we must extend key */
+ {
+ memcpy(key_pos,s_temp->prev_key,s_temp->prev_length);
+ key_pos+= s_temp->prev_length;
+ }
+ }
+ s_temp->changed_length= (uint) (key_pos - org_key_pos);
+}
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
new file mode 100644
index 00000000000..64f451982a9
--- /dev/null
+++ b/storage/maria/ma_sort.c
@@ -0,0 +1,1059 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Creates a index for a database by reading keys, sorting them and outputing
+ them in sorted order through MARIA_SORT_INFO functions.
+*/
+
+#include "ma_fulltext.h"
+#if defined(MSDOS) || defined(__WIN__)
+#include <fcntl.h>
+#else
+#include <stddef.h>
+#endif
+#include <queues.h>
+
+/* static variables */
+
+#undef MIN_SORT_MEMORY
+#undef MYF_RW
+#undef DISK_BUFFER_SIZE
+
+#define MERGEBUFF 15
+#define MERGEBUFF2 31
+#define MIN_SORT_MEMORY (4096-MALLOC_OVERHEAD)
+#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL)
+#define DISK_BUFFER_SIZE (IO_SIZE*16)
+
+
+/*
+ Pointers of functions for store and read keys from temp file
+*/
+
+extern void print_error _VARARGS((const char *fmt,...));
+
+/* Functions defined in this file */
+
+static ha_rows find_all_keys(MARIA_SORT_PARAM *info,uint keys,
+ uchar **sort_keys,
+ DYNAMIC_ARRAY *buffpek,int *maxbuffer,
+ IO_CACHE *tempfile,
+ IO_CACHE *tempfile_for_exceptions);
+static int write_keys(MARIA_SORT_PARAM *info, uchar **sort_keys,
+ uint count, BUFFPEK *buffpek,IO_CACHE *tempfile);
+static int write_key(MARIA_SORT_PARAM *info, uchar *key,
+ IO_CACHE *tempfile);
+static int write_index(MARIA_SORT_PARAM *info, uchar **sort_keys,
+ uint count);
+static int merge_many_buff(MARIA_SORT_PARAM *info,uint keys,
+ uchar **sort_keys,
+ BUFFPEK *buffpek,int *maxbuffer,
+ IO_CACHE *t_file);
+static uint read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek,
+ uint sort_length);
+static int merge_buffers(MARIA_SORT_PARAM *info,uint keys,
+ IO_CACHE *from_file, IO_CACHE *to_file,
+ uchar **sort_keys, BUFFPEK *lastbuff,
+ BUFFPEK *Fb, BUFFPEK *Tb);
+static int merge_index(MARIA_SORT_PARAM *,uint, uchar **,BUFFPEK *, int,
+ IO_CACHE *);
+static int flush_maria_ft_buf(MARIA_SORT_PARAM *info);
+
+static int write_keys_varlen(MARIA_SORT_PARAM *info, uchar **sort_keys,
+ uint count, BUFFPEK *buffpek,
+ IO_CACHE *tempfile);
+static uint read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek,
+ uint sort_length);
+static int write_merge_key(MARIA_SORT_PARAM *info, IO_CACHE *to_file,
+ uchar *key, uint sort_length, uint count);
+static int write_merge_key_varlen(MARIA_SORT_PARAM *info,
+ IO_CACHE *to_file, uchar *key,
+ uint sort_length, uint count);
+static inline int
+my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, uchar *bufs);
+
+/*
+ Creates a index of sorted keys
+
+ SYNOPSIS
+ _ma_create_index_by_sort()
+ info Sort parameters
+ no_messages Set to 1 if no output
+ sortbuff_size Size of sortbuffer to allocate
+
+ RESULT
+ 0 ok
+ <> 0 Error
+*/
+
+int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
+ size_t sortbuff_size)
+{
+ int error,maxbuffer,skr;
+ size_t memavl,old_memavl;
+ uint keys,sort_length;
+ DYNAMIC_ARRAY buffpek;
+ ha_rows records;
+ uchar **sort_keys;
+ IO_CACHE tempfile, tempfile_for_exceptions;
+ DBUG_ENTER("_ma_create_index_by_sort");
+ DBUG_PRINT("enter",("sort_buff_size: %lu sort_length: %d max_records: %lu",
+ (ulong) sortbuff_size, info->key_length,
+ (ulong) info->sort_info->max_records));
+
+ if (info->keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ info->write_keys= write_keys_varlen;
+ info->read_to_buffer=read_to_buffer_varlen;
+ info->write_key=write_merge_key_varlen;
+ }
+ else
+ {
+ info->write_keys= write_keys;
+ info->read_to_buffer=read_to_buffer;
+ info->write_key=write_merge_key;
+ }
+
+ my_b_clear(&tempfile);
+ my_b_clear(&tempfile_for_exceptions);
+ bzero((char*) &buffpek,sizeof(buffpek));
+ sort_keys= (uchar **) NULL; error= 1;
+ maxbuffer=1;
+
+ memavl=max(sortbuff_size,MIN_SORT_MEMORY);
+ records= info->sort_info->max_records;
+ sort_length= info->key_length;
+ LINT_INIT(keys);
+
+ while (memavl >= MIN_SORT_MEMORY)
+ {
+ if ((records < UINT_MAX32) &&
+ ((my_off_t) (records + 1) *
+ (sort_length + sizeof(char*)) <= (my_off_t) memavl))
+ keys= (uint)records+1;
+ else
+ do
+ {
+ skr=maxbuffer;
+ if (memavl < sizeof(BUFFPEK)*(uint) maxbuffer ||
+ (keys=(memavl-sizeof(BUFFPEK)*(uint) maxbuffer)/
+ (sort_length+sizeof(char*))) <= 1 ||
+ keys < (uint) maxbuffer)
+ {
+ _ma_check_print_error(info->sort_info->param,
+ "maria_sort_buffer_size is too small");
+ goto err;
+ }
+ }
+ while ((maxbuffer= (int) (records/(keys-1)+1)) != skr);
+
+ if ((sort_keys=(uchar**) my_malloc(keys*(sort_length+sizeof(char*))+
+ HA_FT_MAXBYTELEN, MYF(0))))
+ {
+ if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer,
+ maxbuffer/2))
+ {
+ my_free((uchar*) sort_keys,MYF(0));
+ sort_keys= 0;
+ }
+ else
+ break;
+ }
+ old_memavl=memavl;
+ if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY)
+ memavl=MIN_SORT_MEMORY;
+ }
+ if (memavl < MIN_SORT_MEMORY)
+ {
+ _ma_check_print_error(info->sort_info->param, "Maria sort buffer"
+ " too small"); /* purecov: tested */
+ goto err; /* purecov: tested */
+ }
+ (*info->lock_in_memory)(info->sort_info->param);/* Everything is allocated */
+
+ if (!no_messages)
+ printf(" - Searching for keys, allocating buffer for %d keys\n",keys);
+
+ if ((records=find_all_keys(info,keys,sort_keys,&buffpek,&maxbuffer,
+ &tempfile,&tempfile_for_exceptions))
+ == HA_POS_ERROR)
+ goto err; /* purecov: tested */
+ if (maxbuffer == 0)
+ {
+ if (!no_messages)
+ printf(" - Dumping %lu keys\n", (ulong) records);
+ if (write_index(info,sort_keys, (uint) records))
+ goto err; /* purecov: inspected */
+ }
+ else
+ {
+ keys=(keys*(sort_length+sizeof(char*)))/sort_length;
+ if (maxbuffer >= MERGEBUFF2)
+ {
+ if (!no_messages)
+ printf(" - Merging %lu keys\n", (ulong) records); /* purecov: tested */
+ if (merge_many_buff(info,keys,sort_keys,
+ dynamic_element(&buffpek,0,BUFFPEK *),&maxbuffer,&tempfile))
+ goto err; /* purecov: inspected */
+ }
+ if (flush_io_cache(&tempfile) ||
+ reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
+ goto err; /* purecov: inspected */
+ if (!no_messages)
+ printf(" - Last merge and dumping keys\n"); /* purecov: tested */
+ if (merge_index(info,keys,sort_keys,dynamic_element(&buffpek,0,BUFFPEK *),
+ maxbuffer,&tempfile))
+ goto err; /* purecov: inspected */
+ }
+
+ if (flush_maria_ft_buf(info) || _ma_flush_pending_blocks(info))
+ goto err;
+
+ if (my_b_inited(&tempfile_for_exceptions))
+ {
+ MARIA_HA *idx=info->sort_info->info;
+ uint keyno=info->key;
+ uint key_length, ref_length=idx->s->rec_reflength;
+
+ if (!no_messages)
+ printf(" - Adding exceptions\n"); /* purecov: tested */
+ if (flush_io_cache(&tempfile_for_exceptions) ||
+ reinit_io_cache(&tempfile_for_exceptions,READ_CACHE,0L,0,0))
+ goto err;
+
+ while (!my_b_read(&tempfile_for_exceptions,(uchar*)&key_length,
+ sizeof(key_length))
+ && !my_b_read(&tempfile_for_exceptions,(uchar*)sort_keys,
+ (uint) key_length))
+ {
+ if (_ma_ck_write(idx,keyno,(uchar*) sort_keys,key_length-ref_length))
+ goto err;
+ }
+ }
+
+ error =0;
+
+err:
+ if (sort_keys)
+ my_free((uchar*) sort_keys,MYF(0));
+ delete_dynamic(&buffpek);
+ close_cached_file(&tempfile);
+ close_cached_file(&tempfile_for_exceptions);
+
+ DBUG_RETURN(error ? -1 : 0);
+} /* _ma_create_index_by_sort */
+
+
+/* Search after all keys and place them in a temp. file */
+
+static ha_rows find_all_keys(MARIA_SORT_PARAM *info, uint keys,
+ uchar **sort_keys, DYNAMIC_ARRAY *buffpek,
+ int *maxbuffer, IO_CACHE *tempfile,
+ IO_CACHE *tempfile_for_exceptions)
+{
+ int error;
+ uint idx;
+ DBUG_ENTER("find_all_keys");
+
+ idx=error=0;
+ sort_keys[0]= (uchar*) (sort_keys+keys);
+
+ while (!(error=(*info->key_read)(info,sort_keys[idx])))
+ {
+ if (info->real_key_length > info->key_length)
+ {
+ if (write_key(info,sort_keys[idx],tempfile_for_exceptions))
+ DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+ continue;
+ }
+
+ if (++idx == keys)
+ {
+ if (info->write_keys(info,sort_keys,idx-1,
+ (BUFFPEK *)alloc_dynamic(buffpek),
+ tempfile))
+ DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+
+ sort_keys[0]=(uchar*) (sort_keys+keys);
+ memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length);
+ idx=1;
+ }
+ sort_keys[idx]=sort_keys[idx-1]+info->key_length;
+ }
+ if (error > 0)
+ DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */
+ if (buffpek->elements)
+ {
+ if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek),
+ tempfile))
+ DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+ *maxbuffer=buffpek->elements-1;
+ }
+ else
+ *maxbuffer=0;
+
+ DBUG_RETURN((*maxbuffer)*(keys-1)+idx);
+} /* find_all_keys */
+
+
+#ifdef THREAD
+/* Search after all keys and place them in a temp. file */
+
+pthread_handler_t _ma_thr_find_all_keys(void *arg)
+{
+ MARIA_SORT_PARAM *sort_param= (MARIA_SORT_PARAM*) arg;
+ int error;
+ size_t memavl,old_memavl;
+ uint sort_length;
+ ulong idx, maxbuffer, keys;
+ uchar **sort_keys=0;
+
+ LINT_INIT(keys);
+
+ error=1;
+
+ if (my_thread_init())
+ goto err;
+
+ { /* Add extra block since DBUG_ENTER declare variables */
+ DBUG_ENTER("_ma_thr_find_all_keys");
+ DBUG_PRINT("enter", ("master: %d", sort_param->master));
+ if (sort_param->sort_info->got_error)
+ goto err;
+
+ if (sort_param->keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ sort_param->write_keys= write_keys_varlen;
+ sort_param->read_to_buffer= read_to_buffer_varlen;
+ sort_param->write_key= write_merge_key_varlen;
+ }
+ else
+ {
+ sort_param->write_keys= write_keys;
+ sort_param->read_to_buffer= read_to_buffer;
+ sort_param->write_key= write_merge_key;
+ }
+
+ my_b_clear(&sort_param->tempfile);
+ my_b_clear(&sort_param->tempfile_for_exceptions);
+ bzero((char*) &sort_param->buffpek,sizeof(sort_param->buffpek));
+ bzero((char*) &sort_param->unique, sizeof(sort_param->unique));
+
+ memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY);
+ idx= (uint)sort_param->sort_info->max_records;
+ sort_length= sort_param->key_length;
+ maxbuffer= 1;
+
+ while (memavl >= MIN_SORT_MEMORY)
+ {
+ if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <= (my_off_t) memavl)
+ keys= idx+1;
+ else
+ {
+ ulong skr;
+ do
+ {
+ skr= maxbuffer;
+ if (memavl < sizeof(BUFFPEK)*maxbuffer ||
+ (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/
+ (sort_length+sizeof(char*))) <= 1 ||
+ keys < maxbuffer)
+ {
+ _ma_check_print_error(sort_param->sort_info->param,
+ "maria_sort_buffer_size is too small");
+ goto err;
+ }
+ }
+ while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr);
+ }
+ if ((sort_keys= (uchar **)
+ my_malloc(keys*(sort_length+sizeof(char*))+
+ ((sort_param->keyinfo->flag & HA_FULLTEXT) ?
+ HA_FT_MAXBYTELEN : 0), MYF(0))))
+ {
+ if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK),
+ maxbuffer, maxbuffer/2))
+ {
+ my_free((uchar*) sort_keys,MYF(0));
+ sort_keys= (uchar **) NULL; /* for err: label */
+ }
+ else
+ break;
+ }
+ old_memavl= memavl;
+ if ((memavl= memavl/4*3) < MIN_SORT_MEMORY &&
+ old_memavl > MIN_SORT_MEMORY)
+ memavl= MIN_SORT_MEMORY;
+ }
+ if (memavl < MIN_SORT_MEMORY)
+ {
+ _ma_check_print_error(sort_param->sort_info->param,
+ "Maria sort buffer too small");
+ goto err; /* purecov: tested */
+ }
+
+ if (sort_param->sort_info->param->testflag & T_VERBOSE)
+ printf("Key %d - Allocating buffer for %lu keys\n",
+ sort_param->key+1, (ulong) keys);
+ sort_param->sort_keys= sort_keys;
+
+ idx= error= 0;
+ sort_keys[0]= (uchar*) (sort_keys+keys);
+
+ DBUG_PRINT("info", ("reading keys"));
+ while (!(error= sort_param->sort_info->got_error) &&
+ !(error= (*sort_param->key_read)(sort_param, sort_keys[idx])))
+ {
+ if (sort_param->real_key_length > sort_param->key_length)
+ {
+ if (write_key(sort_param,sort_keys[idx],
+ &sort_param->tempfile_for_exceptions))
+ goto err;
+ continue;
+ }
+
+ if (++idx == keys)
+ {
+ if (sort_param->write_keys(sort_param, sort_keys, idx - 1,
+ (BUFFPEK *)alloc_dynamic(&sort_param->
+ buffpek),
+ &sort_param->tempfile))
+ goto err;
+ sort_keys[0]= (uchar*) (sort_keys+keys);
+ memcpy(sort_keys[0], sort_keys[idx - 1],
+ (size_t) sort_param->key_length);
+ idx= 1;
+ }
+ sort_keys[idx]=sort_keys[idx - 1] + sort_param->key_length;
+ }
+ if (error > 0)
+ goto err;
+ if (sort_param->buffpek.elements)
+ {
+ if (sort_param->write_keys(sort_param,sort_keys, idx,
+ (BUFFPEK *) alloc_dynamic(&sort_param->
+ buffpek),
+ &sort_param->tempfile))
+ goto err;
+ sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx;
+ }
+ else
+ sort_param->keys= idx;
+
+ sort_param->sort_keys_length= keys;
+ goto ok;
+
+err:
+ DBUG_PRINT("error", ("got some error"));
+ sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */
+ my_free((uchar*) sort_keys,MYF(MY_ALLOW_ZERO_PTR));
+ sort_param->sort_keys=0;
+ delete_dynamic(& sort_param->buffpek);
+ close_cached_file(&sort_param->tempfile);
+ close_cached_file(&sort_param->tempfile_for_exceptions);
+
+ok:
+ free_root(&sort_param->wordroot, MYF(0));
+ /*
+ Detach from the share if the writer is involved. Avoid others to
+ be blocked. This includes a flush of the write buffer. This will
+ also indicate EOF to the readers.
+ */
+ if (sort_param->sort_info->info->rec_cache.share)
+ remove_io_thread(&sort_param->sort_info->info->rec_cache);
+
+ /* Readers detach from the share if any. Avoid others to be blocked. */
+ if (sort_param->read_cache.share)
+ remove_io_thread(&sort_param->read_cache);
+
+ pthread_mutex_lock(&sort_param->sort_info->mutex);
+ if (!--sort_param->sort_info->threads_running)
+ pthread_cond_signal(&sort_param->sort_info->cond);
+ pthread_mutex_unlock(&sort_param->sort_info->mutex);
+ DBUG_PRINT("exit", ("======== ending thread ========"));
+ }
+ my_thread_end();
+ return NULL;
+}
+
+
+int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param)
+{
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ ulong length, keys;
+ double *rec_per_key_part= param->new_rec_per_key_part;
+ int got_error=sort_info->got_error;
+ uint i;
+ MARIA_HA *info=sort_info->info;
+ MARIA_SHARE *share= info->s;
+ MARIA_SORT_PARAM *sinfo;
+ uchar *mergebuf=0;
+ DBUG_ENTER("_ma_thr_write_keys");
+ LINT_INIT(length);
+
+ for (i= 0, sinfo= sort_param ;
+ i < sort_info->total_keys ;
+ i++, rec_per_key_part+=sinfo->keyinfo->keysegs, sinfo++)
+ {
+ if (!sinfo->sort_keys)
+ {
+ got_error=1;
+ my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ continue;
+ }
+ if (!got_error)
+ {
+ maria_set_key_active(share->state.key_map, sinfo->key);
+
+ if (!sinfo->buffpek.elements)
+ {
+ if (param->testflag & T_VERBOSE)
+ {
+ printf("Key %d - Dumping %u keys\n",sinfo->key+1, sinfo->keys);
+ fflush(stdout);
+ }
+ if (write_index(sinfo, sinfo->sort_keys, sinfo->keys) ||
+ flush_maria_ft_buf(sinfo) || _ma_flush_pending_blocks(sinfo))
+ got_error=1;
+ }
+ if (!got_error && param->testflag & T_STATISTICS)
+ maria_update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ sinfo->notnull: NULL,
+ (ulonglong) info->state->records);
+ }
+ my_free((uchar*) sinfo->sort_keys,MYF(0));
+ my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ sinfo->sort_keys=0;
+ }
+
+ for (i= 0, sinfo= sort_param ;
+ i < sort_info->total_keys ;
+ i++,
+ delete_dynamic(&sinfo->buffpek),
+ close_cached_file(&sinfo->tempfile),
+ close_cached_file(&sinfo->tempfile_for_exceptions),
+ sinfo++)
+ {
+ if (got_error)
+ continue;
+ if (sinfo->keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ sinfo->write_keys=write_keys_varlen;
+ sinfo->read_to_buffer=read_to_buffer_varlen;
+ sinfo->write_key=write_merge_key_varlen;
+ }
+ else
+ {
+ sinfo->write_keys=write_keys;
+ sinfo->read_to_buffer=read_to_buffer;
+ sinfo->write_key=write_merge_key;
+ }
+ if (sinfo->buffpek.elements)
+ {
+ uint maxbuffer=sinfo->buffpek.elements-1;
+ if (!mergebuf)
+ {
+ length=param->sort_buffer_length;
+ while (length >= MIN_SORT_MEMORY && !mergebuf)
+ {
+ mergebuf=my_malloc(length, MYF(0));
+ length=length*3/4;
+ }
+ if (!mergebuf)
+ {
+ got_error=1;
+ continue;
+ }
+ }
+ keys=length/sinfo->key_length;
+ if (maxbuffer >= MERGEBUFF2)
+ {
+ if (param->testflag & T_VERBOSE)
+ printf("Key %d - Merging %u keys\n",sinfo->key+1, sinfo->keys);
+ if (merge_many_buff(sinfo, keys, (uchar **) mergebuf,
+ dynamic_element(&sinfo->buffpek, 0, BUFFPEK *),
+ (int*) &maxbuffer, &sinfo->tempfile))
+ {
+ got_error=1;
+ continue;
+ }
+ }
+ if (flush_io_cache(&sinfo->tempfile) ||
+ reinit_io_cache(&sinfo->tempfile,READ_CACHE,0L,0,0))
+ {
+ got_error=1;
+ continue;
+ }
+ if (param->testflag & T_VERBOSE)
+ printf("Key %d - Last merge and dumping keys\n", sinfo->key+1);
+ if (merge_index(sinfo, keys, (uchar**) mergebuf,
+ dynamic_element(&sinfo->buffpek,0,BUFFPEK *),
+ maxbuffer,&sinfo->tempfile) ||
+ flush_maria_ft_buf(sinfo) ||
+ _ma_flush_pending_blocks(sinfo))
+ {
+ got_error=1;
+ continue;
+ }
+ }
+ if (my_b_inited(&sinfo->tempfile_for_exceptions))
+ {
+ uint key_length;
+
+ if (param->testflag & T_VERBOSE)
+ printf("Key %d - Dumping 'long' keys\n", sinfo->key+1);
+
+ if (flush_io_cache(&sinfo->tempfile_for_exceptions) ||
+ reinit_io_cache(&sinfo->tempfile_for_exceptions,READ_CACHE,0L,0,0))
+ {
+ got_error=1;
+ continue;
+ }
+
+ while (!got_error &&
+ !my_b_read(&sinfo->tempfile_for_exceptions,(uchar*)&key_length,
+ sizeof(key_length)))
+ {
+ uchar maria_ft_buf[HA_FT_MAXBYTELEN + HA_FT_WLEN + 10];
+ if (key_length > sizeof(maria_ft_buf) ||
+ my_b_read(&sinfo->tempfile_for_exceptions, (uchar*)maria_ft_buf,
+ (uint)key_length) ||
+ _ma_ck_write(info, sinfo->key, maria_ft_buf,
+ key_length - info->s->rec_reflength))
+ got_error=1;
+ }
+ }
+ }
+ my_free((uchar*) mergebuf,MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(got_error);
+}
+#endif /* THREAD */
+
+/* Write all keys in memory to file for later merge */
+
+static int write_keys(MARIA_SORT_PARAM *info, register uchar **sort_keys,
+ uint count, BUFFPEK *buffpek, IO_CACHE *tempfile)
+{
+ uchar **end;
+ uint sort_length=info->key_length;
+ DBUG_ENTER("write_keys");
+
+ my_qsort2((uchar*) sort_keys,count,sizeof(uchar*),(qsort2_cmp) info->key_cmp,
+ info);
+ if (!my_b_inited(tempfile) &&
+ open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ buffpek->file_pos=my_b_tell(tempfile);
+ buffpek->count=count;
+
+ for (end=sort_keys+count ; sort_keys != end ; sort_keys++)
+ {
+ if (my_b_write(tempfile, *sort_keys, (uint) sort_length))
+ DBUG_RETURN(1); /* purecov: inspected */
+ }
+ DBUG_RETURN(0);
+} /* write_keys */
+
+
+static inline int
+my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, uchar *bufs)
+{
+ int err;
+ uint16 len= _ma_keylength(info->keyinfo, bufs);
+
+ /* The following is safe as this is a local file */
+ if ((err= my_b_write(to_file, (uchar*)&len, sizeof(len))))
+ return (err);
+ if ((err= my_b_write(to_file,bufs, (uint) len)))
+ return (err);
+ return (0);
+}
+
+
+static int write_keys_varlen(MARIA_SORT_PARAM *info,
+ register uchar **sort_keys,
+ uint count, BUFFPEK *buffpek,
+ IO_CACHE *tempfile)
+{
+ uchar **end;
+ int err;
+ DBUG_ENTER("write_keys_varlen");
+
+ my_qsort2((uchar*) sort_keys,count,sizeof(uchar*),(qsort2_cmp) info->key_cmp,
+ info);
+ if (!my_b_inited(tempfile) &&
+ open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ buffpek->file_pos=my_b_tell(tempfile);
+ buffpek->count=count;
+ for (end=sort_keys+count ; sort_keys != end ; sort_keys++)
+ {
+ if ((err= my_var_write(info,tempfile, *sort_keys)))
+ DBUG_RETURN(err);
+ }
+ DBUG_RETURN(0);
+} /* write_keys_varlen */
+
+
+static int write_key(MARIA_SORT_PARAM *info, uchar *key,
+ IO_CACHE *tempfile)
+{
+ uint key_length=info->real_key_length;
+ DBUG_ENTER("write_key");
+
+ if (!my_b_inited(tempfile) &&
+ open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1);
+
+ if (my_b_write(tempfile, (uchar*)&key_length,sizeof(key_length)) ||
+ my_b_write(tempfile, key, (uint) key_length))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+} /* write_key */
+
+
+/* Write index */
+
+static int write_index(MARIA_SORT_PARAM *info,
+ register uchar **sort_keys,
+ register uint count)
+{
+ DBUG_ENTER("write_index");
+
+ my_qsort2((uchar*) sort_keys,(size_t) count,sizeof(uchar*),
+ (qsort2_cmp) info->key_cmp,info);
+ while (count--)
+ {
+ if ((*info->key_write)(info, *sort_keys++))
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ DBUG_RETURN(0);
+} /* write_index */
+
+
+ /* Merge buffers to make < MERGEBUFF2 buffers */
+
+static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
+ uchar **sort_keys, BUFFPEK *buffpek,
+ int *maxbuffer, IO_CACHE *t_file)
+{
+ register int i;
+ IO_CACHE t_file2, *from_file, *to_file, *temp;
+ BUFFPEK *lastbuff;
+ DBUG_ENTER("merge_many_buff");
+
+ if (*maxbuffer < MERGEBUFF2)
+ DBUG_RETURN(0); /* purecov: inspected */
+ if (flush_io_cache(t_file) ||
+ open_cached_file(&t_file2,my_tmpdir(info->tmpdir),"ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ from_file= t_file ; to_file= &t_file2;
+ while (*maxbuffer >= MERGEBUFF2)
+ {
+ reinit_io_cache(from_file,READ_CACHE,0L,0,0);
+ reinit_io_cache(to_file,WRITE_CACHE,0L,0,0);
+ lastbuff=buffpek;
+ for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF)
+ {
+ if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
+ buffpek+i,buffpek+i+MERGEBUFF-1))
+ goto cleanup;
+ }
+ if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
+ buffpek+i,buffpek+ *maxbuffer))
+ break; /* purecov: inspected */
+ if (flush_io_cache(to_file))
+ break; /* purecov: inspected */
+ temp=from_file; from_file=to_file; to_file=temp;
+ *maxbuffer= (int) (lastbuff-buffpek)-1;
+ }
+cleanup:
+ close_cached_file(to_file); /* This holds old result */
+ if (to_file == t_file)
+ *t_file=t_file2; /* Copy result file */
+
+ DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */
+} /* merge_many_buff */
+
+
+/*
+ Read data to buffer
+
+ SYNOPSIS
+ read_to_buffer()
+ fromfile File to read from
+ buffpek Where to read from
+ sort_length max length to read
+ RESULT
+ > 0 Ammount of bytes read
+ -1 Error
+*/
+
+static uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
+ uint sort_length)
+{
+ register uint count;
+ uint length;
+
+ if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ {
+ if (my_pread(fromfile->file,(uchar*) buffpek->base,
+ (length= sort_length*count),buffpek->file_pos,MYF_RW))
+ return((uint) -1); /* purecov: inspected */
+ buffpek->key=buffpek->base;
+ buffpek->file_pos+= length; /* New filepos */
+ buffpek->count-= count;
+ buffpek->mem_count= count;
+ }
+ return (count*sort_length);
+} /* read_to_buffer */
+
+static uint read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek,
+ uint sort_length)
+{
+ register uint count;
+ uint16 length_of_key = 0;
+ uint idx;
+ uchar *buffp;
+
+ if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ {
+ buffp= buffpek->base;
+
+ for (idx=1;idx<=count;idx++)
+ {
+ if (my_pread(fromfile->file,(uchar*)&length_of_key,sizeof(length_of_key),
+ buffpek->file_pos,MYF_RW))
+ return((uint) -1);
+ buffpek->file_pos+=sizeof(length_of_key);
+ if (my_pread(fromfile->file,(uchar*) buffp,length_of_key,
+ buffpek->file_pos,MYF_RW))
+ return((uint) -1);
+ buffpek->file_pos+=length_of_key;
+ buffp = buffp + sort_length;
+ }
+ buffpek->key=buffpek->base;
+ buffpek->count-= count;
+ buffpek->mem_count= count;
+ }
+ return (count*sort_length);
+} /* read_to_buffer_varlen */
+
+
+static int write_merge_key_varlen(MARIA_SORT_PARAM *info,
+ IO_CACHE *to_file, uchar* key,
+ uint sort_length, uint count)
+{
+ uint idx;
+ uchar *bufs = key;
+
+ for (idx=1;idx<=count;idx++)
+ {
+ int err;
+ if ((err= my_var_write(info, to_file, bufs)))
+ return (err);
+ bufs=bufs+sort_length;
+ }
+ return(0);
+}
+
+
+static int write_merge_key(MARIA_SORT_PARAM *info __attribute__((unused)),
+ IO_CACHE *to_file, uchar *key,
+ uint sort_length, uint count)
+{
+ return my_b_write(to_file, key, (size_t) sort_length*count);
+}
+
+/*
+ Merge buffers to one buffer
+ If to_file == 0 then use info->key_write
+*/
+
+static int NEAR_F
+merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
+ IO_CACHE *to_file, uchar **sort_keys, BUFFPEK *lastbuff,
+ BUFFPEK *Fb, BUFFPEK *Tb)
+{
+ int error;
+ uint sort_length,maxcount;
+ ha_rows count;
+ my_off_t to_start_filepos;
+ uchar *strpos;
+ BUFFPEK *buffpek,**refpek;
+ QUEUE queue;
+ volatile int *killed= _ma_killed_ptr(info->sort_info->param);
+ DBUG_ENTER("merge_buffers");
+
+ count=error=0;
+ maxcount=keys/((uint) (Tb-Fb) +1);
+ LINT_INIT(to_start_filepos);
+ if (to_file)
+ to_start_filepos=my_b_tell(to_file);
+ strpos= (uchar*) sort_keys;
+ sort_length=info->key_length;
+
+ if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0,
+ (int (*)(void*, uchar *,uchar*)) info->key_cmp,
+ (void*) info))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
+ {
+ count+= buffpek->count;
+ buffpek->base= strpos;
+ buffpek->max_keys=maxcount;
+ strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek,
+ sort_length));
+ if (error == -1)
+ goto err; /* purecov: inspected */
+ queue_insert(&queue,(uchar*) buffpek);
+ }
+
+ while (queue.elements > 1)
+ {
+ for (;;)
+ {
+ if (*killed)
+ {
+ error=1; goto err;
+ }
+ buffpek=(BUFFPEK*) queue_top(&queue);
+ if (to_file)
+ {
+ if (info->write_key(info,to_file,(uchar*) buffpek->key,
+ (uint) sort_length,1))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ else
+ {
+ if ((*info->key_write)(info,(void*) buffpek->key))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ buffpek->key+=sort_length;
+ if (! --buffpek->mem_count)
+ {
+ if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length)))
+ {
+ uchar *base= buffpek->base;
+ uint max_keys=buffpek->max_keys;
+
+ VOID(queue_remove(&queue,0));
+
+ /* Put room used by buffer to use in other buffer */
+ for (refpek= (BUFFPEK**) &queue_top(&queue);
+ refpek <= (BUFFPEK**) &queue_end(&queue);
+ refpek++)
+ {
+ buffpek= *refpek;
+ if (buffpek->base+buffpek->max_keys*sort_length == base)
+ {
+ buffpek->max_keys+=max_keys;
+ break;
+ }
+ else if (base+max_keys*sort_length == buffpek->base)
+ {
+ buffpek->base=base;
+ buffpek->max_keys+=max_keys;
+ break;
+ }
+ }
+ break; /* One buffer have been removed */
+ }
+ }
+ else if (error == -1)
+ goto err; /* purecov: inspected */
+ queue_replaced(&queue); /* Top element has been replaced */
+ }
+ }
+ buffpek=(BUFFPEK*) queue_top(&queue);
+ buffpek->base= (uchar*) sort_keys;
+ buffpek->max_keys=keys;
+ do
+ {
+ if (to_file)
+ {
+ if (info->write_key(info,to_file,(uchar*) buffpek->key,
+ sort_length,buffpek->mem_count))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ else
+ {
+ register uchar *end;
+ strpos= buffpek->key;
+ for (end= strpos+buffpek->mem_count*sort_length;
+ strpos != end ;
+ strpos+=sort_length)
+ {
+ if ((*info->key_write)(info, (uchar*) strpos))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ }
+ }
+ while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) !=
+ -1 && error != 0);
+
+ lastbuff->count=count;
+ if (to_file)
+ lastbuff->file_pos=to_start_filepos;
+err:
+ delete_queue(&queue);
+ DBUG_RETURN(error);
+} /* merge_buffers */
+
+
+ /* Do a merge to output-file (save only positions) */
+
+static int NEAR_F
+merge_index(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys,
+ BUFFPEK *buffpek, int maxbuffer, IO_CACHE *tempfile)
+{
+ DBUG_ENTER("merge_index");
+ if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek,
+ buffpek+maxbuffer))
+ DBUG_RETURN(1); /* purecov: inspected */
+ DBUG_RETURN(0);
+} /* merge_index */
+
+
+static int flush_maria_ft_buf(MARIA_SORT_PARAM *info)
+{
+ int err=0;
+ if (info->sort_info->ft_buf)
+ {
+ err=_ma_sort_ft_buf_flush(info);
+ my_free((uchar*)info->sort_info->ft_buf, MYF(0));
+ info->sort_info->ft_buf=0;
+ }
+ return err;
+}
diff --git a/storage/maria/ma_sp_defs.h b/storage/maria/ma_sp_defs.h
new file mode 100644
index 00000000000..a70695bea3a
--- /dev/null
+++ b/storage/maria/ma_sp_defs.h
@@ -0,0 +1,47 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _SP_DEFS_H
+#define _SP_DEFS_H
+
+#define SPDIMS 2
+#define SPTYPE HA_KEYTYPE_DOUBLE
+#define SPLEN 8
+
+#ifdef HAVE_SPATIAL
+
+enum wkbType
+{
+ wkbPoint = 1,
+ wkbLineString = 2,
+ wkbPolygon = 3,
+ wkbMultiPoint = 4,
+ wkbMultiLineString = 5,
+ wkbMultiPolygon = 6,
+ wkbGeometryCollection = 7
+};
+
+enum wkbByteOrder
+{
+ wkbXDR = 0, /* Big Endian */
+ wkbNDR = 1 /* Little Endian */
+};
+
+uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, uchar *key,
+ const uchar *record, my_off_t filepos);
+
+#endif /*HAVE_SPATIAL*/
+#endif /* _SP_DEFS_H */
diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c
new file mode 100644
index 00000000000..5f83b432228
--- /dev/null
+++ b/storage/maria/ma_sp_key.c
@@ -0,0 +1,286 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_SPATIAL
+
+#include "ma_sp_defs.h"
+
+static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ double *mbr, int top);
+static int sp_mbr_from_wkb(uchar (*wkb), uint size, uint n_dims, double *mbr);
+
+uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, uchar *key,
+ const uchar *record, my_off_t filepos)
+{
+ HA_KEYSEG *keyseg;
+ MARIA_KEYDEF *keyinfo = &info->s->keyinfo[keynr];
+ uint len = 0;
+ uchar *pos;
+ uint dlen;
+ uchar *dptr;
+ double mbr[SPDIMS * 2];
+ uint i;
+
+ keyseg = &keyinfo->seg[-1];
+ pos = (uchar*)record + keyseg->start;
+
+ dlen = _ma_calc_blob_length(keyseg->bit_start, pos);
+ memcpy_fixed(&dptr, pos + keyseg->bit_start, sizeof(char*));
+ if (!dptr)
+ {
+ my_errno= HA_ERR_NULL_IN_SPATIAL;
+ return 0;
+ }
+ sp_mbr_from_wkb(dptr + 4, dlen - 4, SPDIMS, mbr); /* SRID */
+
+ for (i = 0, keyseg = keyinfo->seg; keyseg->type; keyseg++, i++)
+ {
+ uint length = keyseg->length, start= keyseg->start;
+ double val;
+
+ DBUG_ASSERT(length == sizeof(double));
+ DBUG_ASSERT(!(start % sizeof(double)));
+ DBUG_ASSERT(start < sizeof(mbr));
+ DBUG_ASSERT(keyseg->type == HA_KEYTYPE_DOUBLE);
+
+ val= mbr[start / sizeof (double)];
+#ifdef HAVE_ISNAN
+ if (isnan(val))
+ {
+ bzero(key, length);
+ key+= length;
+ len+= length;
+ continue;
+ }
+#endif
+
+ if (keyseg->flag & HA_SWAP_KEY)
+ {
+ uchar buf[sizeof(double)];
+
+ float8store(buf, val);
+ pos= &buf[length];
+ while (pos > buf)
+ *key++ = *--pos;
+ }
+ else
+ {
+ float8store((uchar *)key, val);
+ key += length;
+ }
+ len+= length;
+ }
+ _ma_dpointer(info, key, filepos);
+ return len;
+}
+
+/*
+Calculate minimal bounding rectangle (mbr) of the spatial object
+stored in "well-known binary representation" (wkb) format.
+*/
+static int sp_mbr_from_wkb(uchar *wkb, uint size, uint n_dims, double *mbr)
+{
+ uint i;
+
+ for (i=0; i < n_dims; ++i)
+ {
+ mbr[i * 2] = DBL_MAX;
+ mbr[i * 2 + 1] = -DBL_MAX;
+ }
+
+ return sp_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1);
+}
+
+/*
+ Add one point stored in wkb to mbr
+*/
+
+static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order __attribute__((unused)),
+ double *mbr)
+{
+ double ord;
+ double *mbr_end= mbr + n_dims * 2;
+
+ while (mbr < mbr_end)
+ {
+ if ((*wkb) > end - 8)
+ return -1;
+ float8get(ord, (const uchar*) *wkb);
+ (*wkb)+= 8;
+ if (ord < *mbr)
+ *mbr= ord;
+ mbr++;
+ if (ord > *mbr)
+ *mbr= ord;
+ mbr++;
+ }
+ return 0;
+}
+
+
+static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr)
+{
+ return sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr);
+}
+
+
+static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr)
+{
+ uint n_points;
+
+ n_points = uint4korr(*wkb);
+ (*wkb) += 4;
+ for (; n_points > 0; --n_points)
+ {
+ /* Add next point to mbr */
+ if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ return 0;
+}
+
+
+static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr)
+{
+ uint n_linear_rings;
+ uint n_points;
+
+ n_linear_rings = uint4korr((*wkb));
+ (*wkb) += 4;
+
+ for (; n_linear_rings > 0; --n_linear_rings)
+ {
+ n_points = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_points > 0; --n_points)
+ {
+ /* Add next point to mbr */
+ if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ double *mbr, int top)
+{
+ int res;
+ uchar byte_order;
+ uint wkb_type;
+
+ byte_order = *(*wkb);
+ ++(*wkb);
+
+ wkb_type = uint4korr((*wkb));
+ (*wkb) += 4;
+
+ switch ((enum wkbType) wkb_type)
+ {
+ case wkbPoint:
+ res = sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbLineString:
+ res = sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbPolygon:
+ res = sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbMultiPoint:
+ {
+ uint n_items;
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ case wkbMultiLineString:
+ {
+ uint n_items;
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ case wkbMultiPolygon:
+ {
+ uint n_items;
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ case wkbGeometryCollection:
+ {
+ uint n_items;
+
+ if (!top)
+ return -1;
+
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ if (sp_get_geometry_mbr(wkb, end, n_dims, mbr, 0))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ default:
+ res = -1;
+ }
+ return res;
+}
+
+#endif /*HAVE_SPATIAL*/
diff --git a/storage/maria/ma_sp_test.c b/storage/maria/ma_sp_test.c
new file mode 100644
index 00000000000..b8c00753acb
--- /dev/null
+++ b/storage/maria/ma_sp_test.c
@@ -0,0 +1,568 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Testing of the basic functions of a MARIA spatial table */
+/* Written by Alex Barkov, who has a shared copyright to this code */
+
+#include "maria.h"
+
+#ifdef HAVE_SPATIAL
+#include "ma_sp_defs.h"
+
+#define MAX_REC_LENGTH 1024
+#define KEYALG HA_KEY_ALG_RTREE
+
+static void create_linestring(uchar *record,uint rownr);
+static void print_record(uchar * record,my_off_t offs,const char * tail);
+
+static void create_key(uchar *key,uint rownr);
+static void print_key(const uchar *key,const char * tail);
+
+static int run_test(const char *filename);
+static int read_with_pos(MARIA_HA * file, int silent);
+
+static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points,
+ uchar *wkb);
+static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims);
+
+static char blob_key[MAX_REC_LENGTH];
+
+
+int main(int argc __attribute__((unused)),char *argv[])
+{
+ MY_INIT(argv[0]);
+ maria_init();
+ exit(run_test("sp_test"));
+}
+
+
+int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ MARIA_UNIQUEDEF uniquedef;
+ MARIA_CREATE_INFO create_info;
+ MARIA_COLUMNDEF recinfo[20];
+ MARIA_KEYDEF keyinfo[20];
+ HA_KEYSEG keyseg[20];
+ key_range min_range, max_range;
+ int silent=0;
+ int create_flag=0;
+ int null_fields=0;
+ int nrecords=30;
+ int uniques=0;
+ int i;
+ int error;
+ int row_count=0;
+ uchar record[MAX_REC_LENGTH];
+ uchar key[MAX_REC_LENGTH];
+ uchar read_record[MAX_REC_LENGTH];
+ int upd=10;
+ ha_rows hrows;
+
+ /* Define a column for NULLs and DEL markers*/
+
+ recinfo[0].type=FIELD_NORMAL;
+ recinfo[0].length=1; /* For NULL bits */
+
+
+ /* Define spatial column */
+
+ recinfo[1].type=FIELD_BLOB;
+ recinfo[1].length=4 + portable_sizeof_char_ptr;
+
+
+
+ /* Define a key with 1 spatial segment */
+
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].flag=HA_SPATIAL;
+ keyinfo[0].key_alg=KEYALG;
+
+ keyinfo[0].seg[0].type= HA_KEYTYPE_BINARY;
+ keyinfo[0].seg[0].flag=0;
+ keyinfo[0].seg[0].start= 1;
+ keyinfo[0].seg[0].length=1; /* Spatial ignores it anyway */
+ keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].language=default_charset_info->number;
+ keyinfo[0].seg[0].bit_start=4; /* Long BLOB */
+
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=10000000;
+
+ if (maria_create(filename,
+ DYNAMIC_RECORD,
+ 1, /* keys */
+ keyinfo,
+ 2, /* columns */
+ recinfo,uniques,&uniquedef,&create_info,create_flag))
+ goto err;
+
+ if (!silent)
+ printf("- Open isam-file\n");
+
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ for (i=0; i<nrecords; i++ )
+ {
+ create_linestring(record,i);
+ error=maria_write(file,record);
+ print_record(record,maria_position(file),"\n");
+ if (!error)
+ {
+ row_count++;
+ }
+ else
+ {
+ printf("maria_write: %d\n", error);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Deleting rows with position\n");
+ for (i=0; i < nrecords/4; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"\n");
+ error=maria_delete(file,read_record);
+ if (error)
+ {
+ printf("pos: %2d maria_delete: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if (!silent)
+ printf("- Updating rows with position\n");
+ for (i=0; i < nrecords/2 ; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"");
+ create_linestring(record,i+nrecords*upd);
+ printf("\t-> ");
+ print_record(record,maria_position(file),"\n");
+ error=maria_update(file,read_record,record);
+ if (error)
+ {
+ printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Test maria_rkey then a sequence of maria_rnext_same\n");
+
+ create_key(key, nrecords*4/5);
+ print_key(key," search for INTERSECT\n");
+
+ if ((error=maria_rkey(file,read_record,0,key,0,HA_READ_MBR_INTERSECT)))
+ {
+ printf("maria_rkey: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rkey\n");
+ row_count=1;
+
+ for (;;)
+ {
+ if ((error=maria_rnext_same(file,read_record)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext_same\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_rfirst then a sequence of maria_rnext\n");
+
+ error=maria_rfirst(file,read_record,0);
+ if (error)
+ {
+ printf("maria_rfirst: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ row_count=1;
+ print_record(read_record,maria_position(file)," maria_frirst\n");
+
+ for(i=0;i<nrecords;i++) {
+ if ((error=maria_rnext(file,read_record,0)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_records_in_range()\n");
+
+ create_key(key, nrecords*upd);
+ print_key(key," INTERSECT\n");
+ min_range.key= key;
+ min_range.length= 1000; /* Big enough */
+ min_range.flag= HA_READ_MBR_INTERSECT;
+ max_range.key= record+1;
+ max_range.length= 1000; /* Big enough */
+ max_range.flag= HA_READ_KEY_EXACT;
+ hrows= maria_records_in_range(file,0, &min_range, &max_range);
+ printf(" %ld rows\n", (long) hrows);
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return 0;
+
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ maria_end();
+ return 1; /* skip warning */
+}
+
+
+static int read_with_pos (MARIA_HA * file,int silent)
+{
+ int error;
+ int i;
+ uchar read_record[MAX_REC_LENGTH];
+ int rows=0;
+
+ if (!silent)
+ printf("- Reading rows with position\n");
+ for (i=0;;i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ return error;
+ }
+ rows++;
+ print_record(read_record,maria_position(file),"\n");
+ }
+ printf(" %d rows\n",rows);
+ return 0;
+}
+
+
+#ifdef NOT_USED
+static void bprint_record(uchar * record,
+ my_off_t offs __attribute__((unused)),
+ const char * tail)
+{
+ int i;
+ char * pos;
+ i=(unsigned char)record[0];
+ printf("%02X ",i);
+
+ for( pos=record+1, i=0; i<32; i++,pos++)
+ {
+ int b=(unsigned char)*pos;
+ printf("%02X",b);
+ }
+ printf("%s",tail);
+}
+#endif
+
+
+static void print_record(uchar * record, my_off_t offs,const char * tail)
+{
+ uchar *pos;
+ char *ptr;
+ uint len;
+
+ printf(" rec=(%d)",(unsigned char)record[0]);
+ pos=record+1;
+ len=sint4korr(pos);
+ pos+=4;
+ printf(" len=%d ",len);
+ memcpy_fixed(&ptr,pos,sizeof(char*));
+ if (ptr)
+ maria_rtree_PrintWKB((uchar*) ptr,SPDIMS);
+ else
+ printf("<NULL> ");
+ printf(" offs=%ld ",(long int)offs);
+ printf("%s",tail);
+}
+
+
+#ifdef NOT_USED
+static void create_point(uchar *record,uint rownr)
+{
+ uint tmp;
+ char *ptr;
+ char *pos=record;
+ double x[200];
+ int i;
+
+ for(i=0;i<SPDIMS;i++)
+ x[i]=rownr;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ *pos=0x01; /* DEL marker */
+ pos++;
+
+ memset(blob_key,0,sizeof(blob_key));
+ tmp=maria_rtree_CreatePointWKB(x,SPDIMS,blob_key);
+
+ int4store(pos,tmp);
+ pos+=4;
+
+ ptr=blob_key;
+ memcpy_fixed(pos,&ptr,sizeof(char*));
+}
+#endif
+
+
+static void create_linestring(uchar *record,uint rownr)
+{
+ uint tmp;
+ char *ptr;
+ uchar *pos= record;
+ double x[200];
+ int i,j;
+ int npoints=2;
+
+ for(j=0;j<npoints;j++)
+ for(i=0;i<SPDIMS;i++)
+ x[i+j*SPDIMS]=rownr*j;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ *pos=0x01; /* DEL marker */
+ pos++;
+
+ memset(blob_key,0,sizeof(blob_key));
+ tmp=maria_rtree_CreateLineStringWKB(x,SPDIMS,npoints, (uchar*) blob_key);
+
+ int4store(pos,tmp);
+ pos+=4;
+
+ ptr=blob_key;
+ memcpy_fixed(pos,&ptr,sizeof(char*));
+}
+
+
+static void create_key(uchar *key,uint rownr)
+{
+ double c=rownr;
+ uchar *pos;
+ uint i;
+
+ bzero(key,MAX_REC_LENGTH);
+ for ( pos=key, i=0; i<2*SPDIMS; i++)
+ {
+ float8store(pos,c);
+ pos+=sizeof(c);
+ }
+}
+
+static void print_key(const uchar *key,const char * tail)
+{
+ double c;
+ uint i;
+
+ printf(" key=");
+ for (i=0; i<2*SPDIMS; i++)
+ {
+ float8get(c,key);
+ key+=sizeof(c);
+ printf("%.14g ",c);
+ }
+ printf("%s",tail);
+}
+
+
+#ifdef NOT_USED
+
+static int maria_rtree_CreatePointWKB(double *ords, uint n_dims, uchar *wkb)
+{
+ uint i;
+
+ *wkb = wkbXDR;
+ ++wkb;
+ int4store(wkb, wkbPoint);
+ wkb += 4;
+
+ for (i=0; i < n_dims; ++i)
+ {
+ float8store(wkb, ords[i]);
+ wkb += 8;
+ }
+ return 5 + n_dims * 8;
+}
+#endif
+
+
+static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points,
+ uchar *wkb)
+{
+ uint i;
+ uint n_ords = n_dims * n_points;
+
+ *wkb = wkbXDR;
+ ++wkb;
+ int4store(wkb, wkbLineString);
+ wkb += 4;
+ int4store(wkb, n_points);
+ wkb += 4;
+ for (i=0; i < n_ords; ++i)
+ {
+ float8store(wkb, ords[i]);
+ wkb += 8;
+ }
+ return 9 + n_points * n_dims * 8;
+}
+
+
+static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims)
+{
+ uint wkb_type;
+
+ ++wkb;
+ wkb_type = uint4korr(wkb);
+ wkb += 4;
+
+ switch ((enum wkbType)wkb_type)
+ {
+ case wkbPoint:
+ {
+ uint i;
+ double ord;
+
+ printf("POINT(");
+ for (i=0; i < n_dims; ++i)
+ {
+ float8get(ord, wkb);
+ wkb += 8;
+ printf("%.14g", ord);
+ if (i < n_dims - 1)
+ printf(" ");
+ else
+ printf(")");
+ }
+ break;
+ }
+ case wkbLineString:
+ {
+ uint p, i;
+ uint n_points;
+ double ord;
+
+ printf("LineString(");
+ n_points = uint4korr(wkb);
+ wkb += 4;
+ for (p=0; p < n_points; ++p)
+ {
+ for (i=0; i < n_dims; ++i)
+ {
+ float8get(ord, wkb);
+ wkb += 8;
+ printf("%.14g", ord);
+ if (i < n_dims - 1)
+ printf(" ");
+ }
+ if (p < n_points - 1)
+ printf(", ");
+ else
+ printf(")");
+ }
+ break;
+ }
+ case wkbPolygon:
+ {
+ printf("POLYGON(...)");
+ break;
+ }
+ case wkbMultiPoint:
+ {
+ printf("MULTIPOINT(...)");
+ break;
+ }
+ case wkbMultiLineString:
+ {
+ printf("MULTILINESTRING(...)");
+ break;
+ }
+ case wkbMultiPolygon:
+ {
+ printf("MULTIPOLYGON(...)");
+ break;
+ }
+ case wkbGeometryCollection:
+ {
+ printf("GEOMETRYCOLLECTION(...)");
+ break;
+ }
+ default:
+ {
+ printf("UNKNOWN GEOMETRY TYPE");
+ break;
+ }
+ }
+}
+
+#else
+int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused)))
+{
+ exit(0);
+}
+#endif /*HAVE_SPATIAL*/
diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c
new file mode 100644
index 00000000000..33f6e9f9fbe
--- /dev/null
+++ b/storage/maria/ma_static.c
@@ -0,0 +1,82 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Static variables for MARIA library. All definied here for easy making of
+ a shared library
+*/
+
+#ifndef _global_h
+#include "maria_def.h"
+#include "trnman.h"
+#endif
+
+LIST *maria_open_list=0;
+uchar maria_file_magic[]=
+{ (uchar) 254, (uchar) 254, (uchar) 9, '\001', };
+uchar maria_pack_file_magic[]=
+{ (uchar) 254, (uchar) 254, (uchar) 10, '\001', };
+/* Unique number for this maria instance */
+uchar maria_uuid[MY_UUID_SIZE];
+uint maria_quick_table_bits=9;
+ulong maria_block_size= MARIA_KEY_BLOCK_LENGTH;
+my_bool maria_flush= 0, maria_single_user= 0;
+my_bool maria_delay_key_write= 0, maria_page_checksums= 1;
+#if defined(THREAD) && !defined(DONT_USE_RW_LOCKS)
+ulong maria_concurrent_insert= 2;
+#else
+ulong maria_concurrent_insert= 0;
+#endif
+my_off_t maria_max_temp_length= MAX_FILE_SIZE;
+ulong maria_bulk_insert_tree_size=8192*1024;
+ulong maria_data_pointer_size= 4;
+
+PAGECACHE maria_pagecache_var;
+PAGECACHE *maria_pagecache= &maria_pagecache_var;
+
+PAGECACHE maria_log_pagecache_var;
+PAGECACHE *maria_log_pagecache= &maria_log_pagecache_var;
+MY_TMPDIR *maria_tmpdir; /* Tempdir for redo */
+
+/**
+ @brief when transactionality does not matter we can use this transaction
+
+ Used in external programs like ma_test*, and also internally inside
+ libmaria when there is no transaction around and the operation isn't
+ transactional (CREATE/DROP/RENAME/OPTIMIZE/REPAIR).
+*/
+TRN dummy_transaction_object;
+
+/* Enough for comparing if number is zero */
+uchar maria_zero_string[]= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+/*
+ read_vec[] is used for converting between P_READ_KEY.. and SEARCH_
+ Position is , == , >= , <= , > , <
+*/
+
+uint maria_read_vec[]=
+{
+ SEARCH_FIND, SEARCH_FIND | SEARCH_BIGGER, SEARCH_FIND | SEARCH_SMALLER,
+ SEARCH_NO_FIND | SEARCH_BIGGER, SEARCH_NO_FIND | SEARCH_SMALLER,
+ SEARCH_FIND | SEARCH_PREFIX, SEARCH_LAST, SEARCH_LAST | SEARCH_SMALLER,
+ MBR_CONTAIN, MBR_INTERSECT, MBR_WITHIN, MBR_DISJOINT, MBR_EQUAL
+};
+
+uint maria_readnext_vec[]=
+{
+ SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER,
+ SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER
+};
diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c
new file mode 100644
index 00000000000..b189bce67da
--- /dev/null
+++ b/storage/maria/ma_statrec.c
@@ -0,0 +1,290 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+ /* Functions to handle fixed-length-records */
+
+#include "maria_def.h"
+
+
+my_bool _ma_write_static_record(MARIA_HA *info, const uchar *record)
+{
+ uchar temp[8]; /* max pointer length */
+ if (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end)
+ {
+ my_off_t filepos=info->s->state.dellink;
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_read(info, &temp[0],info->s->base.rec_reflength,
+ info->s->state.dellink+1,
+ MYF(MY_NABP)))
+ goto err;
+ info->s->state.dellink= _ma_rec_pos(info, temp);
+ info->state->del--;
+ info->state->empty-=info->s->base.pack_reclength;
+ if (info->s->file_write(info, record, info->s->base.reclength,
+ filepos, MYF(MY_NABP)))
+ goto err;
+ }
+ else
+ {
+ if (info->state->data_file_length > info->s->base.max_data_file_length-
+ info->s->base.pack_reclength)
+ {
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ return(2);
+ }
+ if (info->opt_flag & WRITE_CACHE_USED)
+ { /* Cash in use */
+ if (my_b_write(&info->rec_cache, record,
+ info->s->base.reclength))
+ goto err;
+ if (info->s->base.pack_reclength != info->s->base.reclength)
+ {
+ uint length=info->s->base.pack_reclength - info->s->base.reclength;
+ bzero(temp,length);
+ if (my_b_write(&info->rec_cache, temp,length))
+ goto err;
+ }
+ }
+ else
+ {
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_write(info, record, info->s->base.reclength,
+ info->state->data_file_length,
+ info->s->write_flag))
+ goto err;
+ if (info->s->base.pack_reclength != info->s->base.reclength)
+ {
+ uint length=info->s->base.pack_reclength - info->s->base.reclength;
+ bzero(temp,length);
+ if (info->s->file_write(info, temp,length,
+ info->state->data_file_length+
+ info->s->base.reclength,
+ info->s->write_flag))
+ goto err;
+ }
+ }
+ info->state->data_file_length+=info->s->base.pack_reclength;
+ info->s->state.split++;
+ }
+ return 0;
+ err:
+ return 1;
+}
+
+my_bool _ma_update_static_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec __attribute__ ((unused)),
+ const uchar *record)
+{
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ return (info->s->file_write(info,
+ record, info->s->base.reclength,
+ pos,
+ MYF(MY_NABP)) != 0);
+}
+
+
+my_bool _ma_delete_static_record(MARIA_HA *info,
+ const uchar *record __attribute__ ((unused)))
+{
+ uchar temp[9]; /* 1+sizeof(uint32) */
+ info->state->del++;
+ info->state->empty+=info->s->base.pack_reclength;
+ temp[0]= '\0'; /* Mark that record is deleted */
+ _ma_dpointer(info,temp+1,info->s->state.dellink);
+ info->s->state.dellink= info->cur_row.lastpos;
+ info->rec_cache.seek_not_done=1;
+ return (info->s->file_write(info, temp, 1+info->s->rec_reflength,
+ info->cur_row.lastpos, MYF(MY_NABP)) != 0);
+}
+
+
+my_bool _ma_cmp_static_record(register MARIA_HA *info,
+ register const uchar *old)
+{
+ DBUG_ENTER("_ma_cmp_static_record");
+
+ /* We are going to do changes; dont let anybody disturb */
+ dont_break(); /* Dont allow SIGHUP or SIGINT */
+
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ if (flush_io_cache(&info->rec_cache))
+ {
+ DBUG_RETURN(1);
+ }
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ }
+
+ if ((info->opt_flag & READ_CHECK_USED))
+ { /* If check isn't disabled */
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_read(info, info->rec_buff, info->s->base.reclength,
+ info->cur_row.lastpos, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ if (memcmp(info->rec_buff, old, (uint) info->s->base.reclength))
+ {
+ DBUG_DUMP("read",old,info->s->base.reclength);
+ DBUG_DUMP("disk",info->rec_buff,info->s->base.reclength);
+ my_errno=HA_ERR_RECORD_CHANGED; /* Record have changed */
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const uchar *record, MARIA_RECORD_POS pos)
+{
+ DBUG_ENTER("_ma_cmp_static_unique");
+
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_read(info, info->rec_buff, info->s->base.reclength,
+ pos, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ DBUG_RETURN(_ma_unique_comp(def, record, (uchar*) info->rec_buff,
+ def->null_are_equal));
+}
+
+
+/*
+ Read a fixed-length-record
+
+ RETURN
+ 0 Ok
+ 1 record delete
+ -1 on read-error or locking-error
+*/
+
+int _ma_read_static_record(register MARIA_HA *info, register uchar *record,
+ MARIA_RECORD_POS pos)
+{
+ int error;
+
+ if (pos != HA_OFFSET_ERROR)
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file <= pos &&
+ flush_io_cache(&info->rec_cache))
+ return(my_errno);
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+
+ error=info->s->file_read(info, record,info->s->base.reclength,
+ pos, MYF(MY_NABP));
+ if (! error)
+ {
+ fast_ma_writeinfo(info);
+ if (!*record)
+ {
+ /* Record is deleted */
+ return ((my_errno=HA_ERR_RECORD_DELETED));
+ }
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ return(0);
+ }
+ }
+ fast_ma_writeinfo(info); /* No such record */
+ return(my_errno);
+}
+
+
+
+int _ma_read_rnd_static_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks)
+{
+ int locked,error,cache_read;
+ uint cache_length;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_read_rnd_static_record");
+
+ cache_read=0;
+ cache_length=0;
+ if (info->opt_flag & READ_CACHE_USED)
+ { /* Cache in use */
+ if (filepos == my_b_tell(&info->rec_cache) &&
+ (skip_deleted_blocks || !filepos))
+ {
+ cache_read=1; /* Read record using cache */
+ cache_length=(uint) (info->rec_cache.read_end - info->rec_cache.read_pos);
+ }
+ else
+ info->rec_cache.seek_not_done=1; /* Filepos is changed */
+ }
+ locked=0;
+ if (info->lock_type == F_UNLCK)
+ {
+ if (filepos >= info->state->data_file_length)
+ { /* Test if new records */
+ if (_ma_readinfo(info,F_RDLCK,0))
+ DBUG_RETURN(my_errno);
+ locked=1;
+ }
+ else
+ { /* We don't nead new info */
+#ifndef UNSAFE_LOCKING
+ if ((! cache_read || share->base.reclength > cache_length) &&
+ share->tot_locks == 0)
+ { /* record not in cache */
+ locked=1;
+ }
+#else
+ info->tmp_lock_type=F_RDLCK;
+#endif
+ }
+ }
+ if (filepos >= info->state->data_file_length)
+ {
+ DBUG_PRINT("test",("filepos: %ld (%ld) records: %ld del: %ld",
+ (long) filepos/share->base.reclength, (long) filepos,
+ (long) info->state->records, (long) info->state->del));
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(my_errno=HA_ERR_END_OF_FILE);
+ }
+ info->cur_row.lastpos= filepos;
+ info->cur_row.nextpos= filepos+share->base.pack_reclength;
+
+ if (! cache_read) /* No cacheing */
+ {
+ error= _ma_read_static_record(info, buf, filepos);
+ DBUG_RETURN(error);
+ }
+
+ /* Read record with cacheing */
+ error=my_b_read(&info->rec_cache,(uchar*) buf,share->base.reclength);
+ if (info->s->base.pack_reclength != info->s->base.reclength && !error)
+ {
+ char tmp[8]; /* Skill fill bytes */
+ error=my_b_read(&info->rec_cache,(uchar*) tmp,
+ info->s->base.pack_reclength - info->s->base.reclength);
+ }
+ if (locked)
+ VOID(_ma_writeinfo(info,0)); /* Unlock keyfile */
+ if (!error)
+ {
+ if (!buf[0])
+ { /* Record is removed */
+ DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED);
+ }
+ /* Found and may be updated */
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+ DBUG_RETURN(0);
+ }
+ /* my_errno should be set if rec_cache.error == -1 */
+ if (info->rec_cache.error != -1 || my_errno == 0)
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(my_errno); /* Something wrong (EOF?) */
+}
diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c
new file mode 100644
index 00000000000..363bceb7067
--- /dev/null
+++ b/storage/maria/ma_test1.c
@@ -0,0 +1,885 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Testing of the basic functions of a MARIA table */
+
+#include "maria_def.h"
+#include <my_getopt.h>
+#include <m_string.h>
+#include "ma_control_file.h"
+#include "ma_loghandler.h"
+#include "ma_checkpoint.h"
+#include "trnman.h"
+
+extern PAGECACHE *maria_log_pagecache;
+extern const char *maria_data_root;
+
+#define MAX_REC_LENGTH 1024
+
+static void usage();
+
+static int rec_pointer_size=0, flags[50], testflag, checkpoint;
+static int key_field=FIELD_SKIP_PRESPACE,extra_field=FIELD_SKIP_ENDSPACE;
+static int key_type=HA_KEYTYPE_NUM;
+static int create_flag=0;
+static ulong blob_length;
+static enum data_file_type record_type= DYNAMIC_RECORD;
+
+static uint insert_count, update_count, remove_count;
+static uint pack_keys=0, pack_seg=0, key_length;
+static uint unique_key=HA_NOSAME;
+static uint die_in_middle_of_transaction;
+static my_bool pagecacheing, null_fields, silent, skip_update, opt_unique;
+static my_bool verbose, skip_delete, transactional;
+static MARIA_COLUMNDEF recinfo[4];
+static MARIA_KEYDEF keyinfo[10];
+static HA_KEYSEG keyseg[10];
+static HA_KEYSEG uniqueseg[10];
+
+static int run_test(const char *filename);
+static void get_options(int argc, char *argv[]);
+static void create_key(uchar *key,uint rownr);
+static void create_record(uchar *record,uint rownr);
+static void update_record(uchar *record);
+
+
+/*
+ These are here only for testing of recovery with undo. We are not
+ including maria_def.h here as this test is also to be an example of
+ how to use maria outside of the maria directory
+*/
+
+extern int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
+ enum flush_type flush_type_for_data,
+ enum flush_type flush_type_for_index);
+#define MARIA_FLUSH_DATA 1
+
+
+int main(int argc,char *argv[])
+{
+ MY_INIT(argv[0]);
+ my_init();
+ get_options(argc,argv);
+ maria_data_root= ".";
+ /* Maria requires that we always have a page cache */
+ if (maria_init() ||
+ (init_pagecache(maria_pagecache, maria_block_size * 16, 0, 0,
+ maria_block_size, MY_WME) == 0) ||
+ ma_control_file_create_or_open() ||
+ (init_pagecache(maria_log_pagecache,
+ TRANSLOG_PAGECACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, MY_WME) == 0) ||
+ translog_init(maria_data_root, TRANSLOG_FILE_SIZE,
+ 0, 0, maria_log_pagecache,
+ TRANSLOG_DEFAULT_FLAGS, 0) ||
+ (transactional && (trnman_init(0) || ma_checkpoint_init(0))))
+ {
+ fprintf(stderr, "Error in initialization\n");
+ exit(1);
+ }
+
+ exit(run_test("test1"));
+}
+
+
+static int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ int i,j= 0,error,deleted,rec_length,uniques=0;
+ uint offset_to_key;
+ ha_rows found,row_count;
+ uchar record[MAX_REC_LENGTH],key[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH];
+ MARIA_UNIQUEDEF uniquedef;
+ MARIA_CREATE_INFO create_info;
+
+ if (die_in_middle_of_transaction)
+ null_fields= 1;
+
+ bzero((char*) recinfo,sizeof(recinfo));
+ bzero((char*) &create_info,sizeof(create_info));
+
+ /* First define 2 columns */
+ create_info.null_bytes= 1;
+ recinfo[0].type= key_field;
+ recinfo[0].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr :
+ key_length);
+ if (key_field == FIELD_VARCHAR)
+ recinfo[0].length+= HA_VARCHAR_PACKLENGTH(key_length);
+ recinfo[1].type=extra_field;
+ recinfo[1].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr : 24);
+ if (extra_field == FIELD_VARCHAR)
+ recinfo[1].length+= HA_VARCHAR_PACKLENGTH(recinfo[1].length);
+ recinfo[1].null_bit= null_fields ? 2 : 0;
+
+ if (opt_unique)
+ {
+ recinfo[2].type=FIELD_CHECK;
+ recinfo[2].length=MARIA_UNIQUE_HASH_LENGTH;
+ }
+ rec_length= recinfo[0].length+recinfo[1].length+recinfo[2].length;
+
+ if (key_type == HA_KEYTYPE_VARTEXT1 &&
+ key_length > 255)
+ key_type= HA_KEYTYPE_VARTEXT2;
+
+ /* Define a key over the first column */
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[0].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[0].seg[0].type= key_type;
+ keyinfo[0].seg[0].flag= pack_seg;
+ keyinfo[0].seg[0].start=1;
+ keyinfo[0].seg[0].length=key_length;
+ keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].language= default_charset_info->number;
+ if (pack_seg & HA_BLOB_PART)
+ {
+ keyinfo[0].seg[0].bit_start=4; /* Length of blob length */
+ }
+ keyinfo[0].flag = (uint8) (pack_keys | unique_key);
+
+ bzero((uchar*) flags,sizeof(flags));
+ if (opt_unique)
+ {
+ uint start;
+ uniques=1;
+ bzero((char*) &uniquedef,sizeof(uniquedef));
+ bzero((char*) uniqueseg,sizeof(uniqueseg));
+ uniquedef.seg=uniqueseg;
+ uniquedef.keysegs=2;
+
+ /* Make a unique over all columns (except first NULL fields) */
+ for (i=0, start=1 ; i < 2 ; i++)
+ {
+ uniqueseg[i].start=start;
+ start+=recinfo[i].length;
+ uniqueseg[i].length=recinfo[i].length;
+ uniqueseg[i].language= default_charset_info->number;
+ }
+ uniqueseg[0].type= key_type;
+ uniqueseg[0].null_bit= null_fields ? 2 : 0;
+ uniqueseg[1].type= HA_KEYTYPE_TEXT;
+ if (extra_field == FIELD_BLOB)
+ {
+ uniqueseg[1].length=0; /* The whole blob */
+ uniqueseg[1].bit_start=4; /* long blob */
+ uniqueseg[1].flag|= HA_BLOB_PART;
+ }
+ else if (extra_field == FIELD_VARCHAR)
+ {
+ uniqueseg[1].flag|= HA_VAR_LENGTH_PART;
+ uniqueseg[1].type= (HA_VARCHAR_PACKLENGTH(recinfo[1].length-1) == 1 ?
+ HA_KEYTYPE_VARTEXT1 : HA_KEYTYPE_VARTEXT2);
+ }
+ }
+ else
+ uniques=0;
+
+ offset_to_key= test(null_fields);
+ if (key_field == FIELD_BLOB || key_field == FIELD_VARCHAR)
+ offset_to_key+= 2;
+
+ if (!silent)
+ printf("- Creating maria file\n");
+ create_info.max_rows=(ulong) (rec_pointer_size ?
+ (1L << (rec_pointer_size*8))/40 :
+ 0);
+ create_info.transactional= transactional;
+ if (maria_create(filename, record_type, 1, keyinfo,2+opt_unique,recinfo,
+ uniques, &uniquedef, &create_info,
+ create_flag))
+ goto err;
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ if (maria_begin(file))
+ goto err;
+ my_errno=0;
+ row_count=deleted=0;
+ for (i=49 ; i>=1 ; i-=2 )
+ {
+ if (insert_count-- == 0)
+ {
+ if (testflag)
+ break;
+ VOID(maria_close(file));
+ exit(0);
+ }
+ j=i%25 +1;
+ create_record(record,j);
+ error=maria_write(file,record);
+ if (!error)
+ row_count++;
+ flags[j]=1;
+ if (verbose || error)
+ printf("J= %2d maria_write: %d errno: %d\n", j,error,my_errno);
+ }
+
+ if (maria_commit(file) || maria_begin(file))
+ goto err;
+
+ if (checkpoint == 1 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ if (testflag == 1)
+ goto end;
+
+ /* Insert 2 rows with null values */
+ if (null_fields)
+ {
+ create_record(record,0);
+ error=maria_write(file,record);
+ if (!error)
+ row_count++;
+ if (verbose || error)
+ printf("J= NULL maria_write: %d errno: %d\n", error,my_errno);
+ error=maria_write(file,record);
+ if (!error)
+ row_count++;
+ if (verbose || error)
+ printf("J= NULL maria_write: %d errno: %d\n", error,my_errno);
+ flags[0]=2;
+ }
+
+ if (checkpoint == 2 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ if (testflag == 2)
+ {
+ printf("Terminating after inserts\n");
+ goto end;
+ }
+
+ if (maria_commit(file) || maria_begin(file))
+ goto err;
+
+ if (!skip_update)
+ {
+ if (opt_unique)
+ {
+ if (!silent)
+ printf("- Checking unique constraint\n");
+ create_record(record,j); /* Check last created row */
+ if (!maria_write(file,record) || my_errno != HA_ERR_FOUND_DUPP_UNIQUE)
+ {
+ printf("unique check failed\n");
+ }
+ }
+ if (!silent)
+ printf("- Updating rows\n");
+
+ /* Update first last row to force extend of file */
+ if (maria_rsame(file,read_record,-1))
+ {
+ printf("Can't find last row with maria_rsame\n");
+ }
+ else
+ {
+ memcpy(record,read_record,rec_length);
+ update_record(record);
+ if (maria_update(file,read_record,record))
+ {
+ printf("Can't update last row: %.*s\n",
+ keyinfo[0].seg[0].length,read_record+1);
+ }
+ }
+
+ /* Read through all rows and update them */
+ assert(maria_scan_init(file) == 0);
+
+ found=0;
+ while ((error= maria_scan(file,read_record)) == 0)
+ {
+ if (--update_count == 0) { VOID(maria_close(file)) ; exit(0) ; }
+ memcpy(record,read_record,rec_length);
+ update_record(record);
+ if (maria_update(file,read_record,record))
+ {
+ printf("Can't update row: %.*s, error: %d\n",
+ keyinfo[0].seg[0].length,record+1,my_errno);
+ }
+ found++;
+ }
+ if (found != row_count)
+ printf("Found %ld of %ld rows\n", (ulong) found, (ulong) row_count);
+ maria_scan_end(file);
+ }
+
+ if (checkpoint == 3 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ if (testflag == 3)
+ {
+ printf("Terminating after updates\n");
+ goto end;
+ }
+ if (!silent)
+ printf("- Reopening file\n");
+ if (maria_commit(file))
+ goto err;
+ if (maria_close(file))
+ goto err;
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+ if (maria_begin(file))
+ goto err;
+ if (!skip_delete)
+ {
+ if (!silent)
+ printf("- Removing keys\n");
+
+ for (i=0 ; i <= 10 ; i++)
+ {
+ /*
+ If you want to debug the problem in ma_test_recovery with BLOBs
+ (see @todo there), you can break out of the loop after just one
+ delete, it is enough, like this:
+ if (i==1) break;
+ */
+ /* testing */
+ if (remove_count-- == 0)
+ {
+ fprintf(stderr,
+ "delete-rows number of rows deleted; Going down hard!\n");
+ goto end;
+ }
+ j=i*2;
+ if (!flags[j])
+ continue;
+ create_key(key,j);
+ my_errno=0;
+ if ((error = maria_rkey(file, read_record, 0, key,
+ HA_WHOLE_KEY, HA_READ_KEY_EXACT)))
+ {
+ if (verbose || (flags[j] >= 1 ||
+ (error && my_errno != HA_ERR_KEY_NOT_FOUND)))
+ printf("key: '%.*s' maria_rkey: %3d errno: %3d\n",
+ (int) key_length,key+offset_to_key,error,my_errno);
+ }
+ else
+ {
+ error=maria_delete(file,read_record);
+ if (verbose || error)
+ printf("key: '%.*s' maria_delete: %3d errno: %3d\n",
+ (int) key_length, key+offset_to_key, error, my_errno);
+ if (! error)
+ {
+ deleted++;
+ flags[j]--;
+ }
+ }
+ }
+ }
+
+ if (checkpoint == 4 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ if (testflag == 4)
+ {
+ printf("Terminating after deletes\n");
+ goto end;
+ }
+
+ if (!silent)
+ printf("- Reading rows with key\n");
+ record[1]= 0; /* For nicer printf */
+ for (i=0 ; i <= 25 ; i++)
+ {
+ create_key(key,i);
+ my_errno=0;
+ error=maria_rkey(file,read_record,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT);
+ if (verbose ||
+ (error == 0 && flags[i] == 0 && unique_key) ||
+ (error && (flags[i] != 0 || my_errno != HA_ERR_KEY_NOT_FOUND)))
+ {
+ printf("key: '%.*s' maria_rkey: %3d errno: %3d record: %s\n",
+ (int) key_length,key+offset_to_key,error,my_errno,record+1);
+ }
+ }
+
+ if (!silent)
+ printf("- Reading rows with position\n");
+ if (maria_scan_init(file))
+ {
+ fprintf(stderr, "maria_scan_init failed\n");
+ goto err;
+ }
+
+ for (i=1,found=0 ; i <= 30 ; i++)
+ {
+ my_errno=0;
+ if ((error= maria_scan(file, read_record)) == HA_ERR_END_OF_FILE)
+ {
+ if (found != row_count-deleted)
+ printf("Found only %ld of %ld rows\n", (ulong) found,
+ (ulong) (row_count - deleted));
+ break;
+ }
+ if (!error)
+ found++;
+ if (verbose || (error != 0 && error != HA_ERR_RECORD_DELETED &&
+ error != HA_ERR_END_OF_FILE))
+ {
+ printf("pos: %2d maria_rrnd: %3d errno: %3d record: %s\n",
+ i-1,error,my_errno,read_record+1);
+ }
+ }
+ maria_scan_end(file);
+
+end:
+ if (die_in_middle_of_transaction)
+ {
+ /* As commit record is not done, UNDO entries needs to be rolled back */
+ switch (die_in_middle_of_transaction) {
+ case 1:
+ /*
+ Flush changed pages go to disk. That will also flush log. Recovery
+ will skip REDOs and apply UNDOs.
+ */
+ _ma_flush_table_files(file, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_RELEASE, FLUSH_RELEASE);
+ break;
+ case 2:
+ /*
+ Just flush log. Pages are likely to not be on disk. Recovery will
+ then execute REDOs and UNDOs.
+ */
+ if (translog_flush(file->trn->undo_lsn))
+ goto err;
+ break;
+ case 3:
+ /*
+ Flush nothing. Pages and log are likely to not be on disk. Recovery
+ will then do nothing.
+ */
+ break;
+ case 4:
+ /*
+ Flush changed data pages go to disk. Changed index pages are not
+ flushed. Recovery will skip some REDOs and apply UNDOs.
+ */
+ _ma_flush_table_files(file, MARIA_FLUSH_DATA, FLUSH_RELEASE,
+ FLUSH_RELEASE);
+ /*
+ We have to flush log separately as the redo for the last key page
+ may not be flushed
+ */
+ if (translog_flush(file->trn->undo_lsn))
+ goto err;
+ break;
+ }
+ printf("Dying on request without maria_commit()/maria_close()\n");
+ exit(0);
+ }
+
+ if (maria_commit(file))
+ goto err;
+ if (maria_close(file))
+ goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return (0);
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+}
+
+
+static void create_key_part(uchar *key,uint rownr)
+{
+ if (!unique_key)
+ rownr&=7; /* Some identical keys */
+ if (keyinfo[0].seg[0].type == HA_KEYTYPE_NUM)
+ {
+ sprintf((char*) key,"%*d",keyinfo[0].seg[0].length,rownr);
+ }
+ else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT1 ||
+ keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT2)
+ { /* Alpha record */
+ /* Create a key that may be easily packed */
+ bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B');
+ sprintf((char*) key+keyinfo[0].seg[0].length-2,"%-2d",rownr);
+ if ((rownr & 7) == 0)
+ {
+ /* Change the key to force a unpack of the next key */
+ bfill(key+3,keyinfo[0].seg[0].length-5,rownr < 10 ? 'a' : 'b');
+ }
+ }
+ else
+ { /* Alpha record */
+ if (keyinfo[0].seg[0].flag & HA_SPACE_PACK)
+ sprintf((char*) key,"%-*d",keyinfo[0].seg[0].length,rownr);
+ else
+ {
+ /* Create a key that may be easily packed */
+ bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B');
+ sprintf((char*) key+keyinfo[0].seg[0].length-2,"%-2d",rownr);
+ if ((rownr & 7) == 0)
+ {
+ /* Change the key to force a unpack of the next key */
+ key[1]= (rownr < 10 ? 'a' : 'b');
+ }
+ }
+ }
+}
+
+
+static void create_key(uchar *key,uint rownr)
+{
+ if (keyinfo[0].seg[0].null_bit)
+ {
+ if (rownr == 0)
+ {
+ key[0]=1; /* null key */
+ key[1]=0; /* For easy print of key */
+ return;
+ }
+ *key++=0;
+ }
+ if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ {
+ uint tmp;
+ create_key_part(key+2,rownr);
+ tmp=strlen((char*) key+2);
+ int2store(key,tmp);
+ }
+ else
+ create_key_part(key,rownr);
+}
+
+
+static uchar blob_key[MAX_REC_LENGTH];
+static uchar blob_record[MAX_REC_LENGTH+20*20];
+
+
+static void create_record(uchar *record,uint rownr)
+{
+ uchar *pos;
+ bzero((char*) record,MAX_REC_LENGTH);
+ record[0]=1; /* delete marker */
+ if (rownr == 0 && keyinfo[0].seg[0].null_bit)
+ record[0]|=keyinfo[0].seg[0].null_bit; /* Null key */
+
+ pos=record+1;
+ if (recinfo[0].type == FIELD_BLOB)
+ {
+ uint tmp;
+ uchar *ptr;
+ create_key_part(blob_key,rownr);
+ tmp=strlen((char*) blob_key);
+ int4store(pos,tmp);
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ pos+=recinfo[0].length;
+ }
+ else if (recinfo[0].type == FIELD_VARCHAR)
+ {
+ uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1);
+ create_key_part(pos+pack_length,rownr);
+ tmp= strlen((char*) pos+pack_length);
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) tmp;
+ else
+ int2store(pos,tmp);
+ pos+= recinfo[0].length;
+ }
+ else
+ {
+ create_key_part(pos,rownr);
+ pos+=recinfo[0].length;
+ }
+ if (recinfo[1].type == FIELD_BLOB)
+ {
+ uint tmp;
+ uchar *ptr;;
+ sprintf((char*) blob_record,"... row: %d", rownr);
+ strappend((char*) blob_record,max(MAX_REC_LENGTH-rownr,10),' ');
+ tmp=strlen((char*) blob_record);
+ int4store(pos,tmp);
+ ptr=blob_record;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ }
+ else if (recinfo[1].type == FIELD_VARCHAR)
+ {
+ uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ sprintf((char*) pos+pack_length, "... row: %d", rownr);
+ tmp= strlen((char*) pos+pack_length);
+ if (pack_length == 1)
+ *pos= (uchar) tmp;
+ else
+ int2store(pos,tmp);
+ }
+ else
+ {
+ sprintf((char*) pos,"... row: %d", rownr);
+ strappend((char*) pos,recinfo[1].length,' ');
+ }
+}
+
+/* change row to test re-packing of rows and reallocation of keys */
+
+static void update_record(uchar *record)
+{
+ uchar *pos=record+1;
+ if (recinfo[0].type == FIELD_BLOB)
+ {
+ uchar *column,*ptr;
+ int length;
+ length=uint4korr(pos); /* Long blob */
+ memcpy_fixed(&column,pos+4,sizeof(char*));
+ memcpy(blob_key,column,length); /* Move old key */
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */
+ if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM)
+ default_charset_info->cset->casedn(default_charset_info,
+ (char*) blob_key, length,
+ (char*) blob_key, length);
+ pos+=recinfo[0].length;
+ }
+ else if (recinfo[0].type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1);
+ uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos);
+ default_charset_info->cset->casedn(default_charset_info,
+ (char*) pos + pack_length, length,
+ (char*) pos + pack_length, length);
+ pos+=recinfo[0].length;
+ }
+ else
+ {
+ if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM)
+ default_charset_info->cset->casedn(default_charset_info,
+ (char*) pos, keyinfo[0].seg[0].length,
+ (char*) pos, keyinfo[0].seg[0].length);
+ pos+=recinfo[0].length;
+ }
+
+ if (recinfo[1].type == FIELD_BLOB)
+ {
+ uchar *column;
+ int length;
+ length=uint4korr(pos);
+ memcpy_fixed(&column,pos+4,sizeof(char*));
+ memcpy(blob_record,column,length);
+ bfill(blob_record+length,20,'.'); /* Make it larger */
+ length+=20;
+ int4store(pos,length);
+ column=blob_record;
+ memcpy_fixed(pos+4,&column,sizeof(char*));
+ }
+ else if (recinfo[1].type == FIELD_VARCHAR)
+ {
+ /* Second field is longer than 10 characters */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos);
+ pos= record+ recinfo[1].offset;
+ bfill(pos+pack_length+length,recinfo[1].length-length-pack_length,'.');
+ length=recinfo[1].length-pack_length;
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) length;
+ else
+ int2store(pos,length);
+ }
+ else
+ {
+ bfill(pos+recinfo[1].length-10,10,'.');
+ }
+}
+
+
+static struct my_option my_long_options[] =
+{
+ {"checkpoint", 'H', "Checkpoint at specified stage", (uchar**) &checkpoint,
+ (uchar**) &checkpoint, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"checksum", 'c', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifndef DBUG_OFF
+ {"debug", '#', "Undocumented",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"delete-rows", 'd', "Abort after this many rows has been deleted",
+ (uchar**) &remove_count, (uchar**) &remove_count, 0, GET_UINT, REQUIRED_ARG,
+ 1000, 0, 0, 0, 0, 0},
+ {"help", '?', "Display help and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"insert-rows", 'i', "Undocumented", (uchar**) &insert_count,
+ (uchar**) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0},
+ {"key-alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-binary-pack", 'B', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-blob", 'b', "Undocumented",
+ (uchar**) &blob_length, (uchar**) &blob_length,
+ 0, GET_ULONG, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-cache", 'K', "Undocumented", (uchar**) &pagecacheing,
+ (uchar**) &pagecacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-length", 'k', "Undocumented", (uchar**) &key_length,
+ (uchar**) &key_length, 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0},
+ {"key-multiple", 'm', "Don't use unique keys",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-prefix_pack", 'P', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-space_pack", 'p', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-varchar", 'w', "Test VARCHAR keys",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"null-fields", 'N', "Define fields with NULL",
+ (uchar**) &null_fields, (uchar**) &null_fields, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
+ {"row-fixed-size", 'S', "Fixed size records",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"rows-in-block", 'M', "Store rows in block format",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"row-pointer-size", 'R', "Undocumented", (uchar**) &rec_pointer_size,
+ (uchar**) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"silent", 's', "Undocumented",
+ (uchar**) &silent, (uchar**) &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0,
+ 0, 0},
+ {"skip-delete", 'U', "Don't test deletes", (uchar**) &skip_delete,
+ (uchar**) &skip_delete, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"skip-update", 'D', "Don't test updates", (uchar**) &skip_update,
+ (uchar**) &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"testflag", 't', "Stop test at specified stage", (uchar**) &testflag,
+ (uchar**) &testflag, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"test-undo", 'A',
+ "Abort hard. Used for testing recovery with undo",
+ (uchar**) &die_in_middle_of_transaction,
+ (uchar**) &die_in_middle_of_transaction,
+ 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"transactional", 'T',
+ "Test in transactional mode. (Only works with block format)",
+ (uchar**) &transactional, (uchar**) &transactional, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
+ {"unique", 'C', "Undocumented", (uchar**) &opt_unique,
+ (uchar**) &opt_unique, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"update-rows", 'u', "Max number of rows to update", (uchar**) &update_count,
+ (uchar**) &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Be more verbose", (uchar**) &verbose,
+ (uchar**) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Print version number and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument __attribute__((unused)))
+{
+ switch(optid) {
+ case 'a':
+ key_type= HA_KEYTYPE_TEXT;
+ break;
+ case 'c':
+ create_flag|= HA_CREATE_CHECKSUM | HA_CREATE_PAGE_CHECKSUM;
+ break;
+ case 'R': /* Length of record pointer */
+ if (rec_pointer_size > 3)
+ rec_pointer_size=0;
+ break;
+ case 'P':
+ pack_keys= HA_PACK_KEY; /* Use prefix compression */
+ break;
+ case 'B':
+ pack_keys= HA_BINARY_PACK_KEY; /* Use binary compression */
+ break;
+ case 'M':
+ record_type= BLOCK_RECORD;
+ break;
+ case 'S':
+ if (key_field == FIELD_VARCHAR)
+ {
+ create_flag=0; /* Static sized varchar */
+ record_type= STATIC_RECORD;
+ }
+ else if (key_field != FIELD_BLOB)
+ {
+ key_field=FIELD_NORMAL; /* static-size record */
+ extra_field=FIELD_NORMAL;
+ record_type= STATIC_RECORD;
+ }
+ break;
+ case 'p':
+ pack_keys=HA_PACK_KEY; /* Use prefix + space packing */
+ pack_seg=HA_SPACE_PACK;
+ key_type=HA_KEYTYPE_TEXT;
+ break;
+ case 'm':
+ unique_key=0;
+ break;
+ case 'b':
+ key_field=FIELD_BLOB; /* blob key */
+ extra_field= FIELD_BLOB;
+ pack_seg|= HA_BLOB_PART;
+ key_type= HA_KEYTYPE_VARTEXT1;
+ if (record_type == STATIC_RECORD)
+ record_type= DYNAMIC_RECORD;
+ break;
+ case 'k':
+ if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH)
+ {
+ fprintf(stderr,"Wrong key length\n");
+ exit(1);
+ }
+ break;
+ case 'w':
+ key_field=FIELD_VARCHAR; /* varchar keys */
+ extra_field= FIELD_VARCHAR;
+ key_type= HA_KEYTYPE_VARTEXT1;
+ pack_seg|= HA_VAR_LENGTH_PART;
+ if (record_type == STATIC_RECORD)
+ record_type= DYNAMIC_RECORD;
+ break;
+ case 'K': /* Use key cacheing */
+ pagecacheing=1;
+ break;
+ case 'V':
+ printf("test1 Ver 1.2 \n");
+ exit(0);
+ case '#':
+ DBUG_PUSH(argument);
+ break;
+ case '?':
+ usage();
+ exit(1);
+ }
+ return 0;
+}
+
+
+/* Read options */
+
+static void get_options(int argc, char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ return;
+} /* get options */
+
+
+static void usage()
+{
+ printf("Usage: %s [options]\n\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c
new file mode 100644
index 00000000000..b196455e950
--- /dev/null
+++ b/storage/maria/ma_test2.c
@@ -0,0 +1,1197 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Test av isam-databas: stor test */
+
+#ifndef USE_MY_FUNC /* We want to be able to dbug this !! */
+#define USE_MY_FUNC
+#endif
+#ifdef DBUG_OFF
+#undef DBUG_OFF
+#endif
+#ifndef SAFEMALLOC
+#define SAFEMALLOC
+#endif
+#include "maria_def.h"
+#include "trnman.h"
+#include <m_ctype.h>
+#include <my_bit.h>
+#include "ma_checkpoint.h"
+
+#define STANDARD_LENGTH 37
+#define MARIA_KEYS 6
+#define MAX_PARTS 4
+#if !defined(MSDOS) && !defined(labs)
+#define labs(a) abs(a)
+#endif
+
+static void get_options(int argc, char *argv[]);
+static uint rnd(uint max_value);
+static void fix_length(uchar *record,uint length);
+static void put_blob_in_record(uchar *blob_pos,char **blob_buffer,
+ ulong *length);
+static void copy_key(MARIA_HA *info, uint inx, uchar *record, uchar *key);
+
+static int verbose=0,testflag=0,
+ first_key=0,async_io=0,pagecacheing=0,write_cacheing=0,locking=0,
+ rec_pointer_size=0,pack_fields=1,silent=0,
+ opt_quick_mode=0, transactional= 0, skip_update= 0,
+ die_in_middle_of_transaction= 0;
+static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1;
+static int create_flag= 0, srand_arg= 0, checkpoint= 0;
+static ulong pagecache_size=8192*32;
+static enum data_file_type record_type= DYNAMIC_RECORD;
+
+static uint keys=MARIA_KEYS,recant=1000;
+static uint use_blob=0;
+static uint16 key1[1001],key3[5000];
+static uchar record[300],record2[300],key[100],key2[100];
+static uchar read_record[300],read_record2[300],read_record3[300];
+static HA_KEYSEG glob_keyseg[MARIA_KEYS][MAX_PARTS];
+
+ /* Test program */
+
+int main(int argc, char *argv[])
+{
+ uint i;
+ int j,n1,n2,n3,error,k;
+ uint write_count,update,dupp_keys,opt_delete,start,length,blob_pos,
+ reclength,ant,found_parts;
+ my_off_t lastpos;
+ ha_rows range_records,records;
+ MARIA_HA *file;
+ MARIA_KEYDEF keyinfo[10];
+ MARIA_COLUMNDEF recinfo[10];
+ MARIA_INFO info;
+ const char *filename;
+ char *blob_buffer;
+ MARIA_CREATE_INFO create_info;
+ MY_INIT(argv[0]);
+
+ filename= "test2";
+ get_options(argc,argv);
+ if (! async_io)
+ my_disable_async_io=1;
+
+ maria_data_root= ".";
+ /* Maria requires that we always have a page cache */
+ if (maria_init() ||
+ (init_pagecache(maria_pagecache, pagecache_size, 0, 0,
+ maria_block_size, MY_WME) == 0) ||
+ ma_control_file_create_or_open() ||
+ (init_pagecache(maria_log_pagecache,
+ TRANSLOG_PAGECACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, MY_WME) == 0) ||
+ translog_init(maria_data_root, TRANSLOG_FILE_SIZE,
+ 0, 0, maria_log_pagecache,
+ TRANSLOG_DEFAULT_FLAGS, 0) ||
+ (transactional && (trnman_init(0) || ma_checkpoint_init(0))))
+ {
+ fprintf(stderr, "Error in initialization");
+ exit(1);
+ }
+
+ reclength=STANDARD_LENGTH+60+(use_blob ? 8 : 0);
+ blob_pos=STANDARD_LENGTH+60;
+ keyinfo[0].seg= &glob_keyseg[0][0];
+ keyinfo[0].seg[0].start=0;
+ keyinfo[0].seg[0].length=6;
+ keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[0].seg[0].language= default_charset_info->number;
+ keyinfo[0].seg[0].flag=(uint8) pack_seg;
+ keyinfo[0].seg[0].null_bit=0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].flag = pack_type;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[1].seg= &glob_keyseg[1][0];
+ keyinfo[1].seg[0].start=7;
+ keyinfo[1].seg[0].length=6;
+ keyinfo[1].seg[0].type=HA_KEYTYPE_BINARY;
+ keyinfo[1].seg[0].flag=0;
+ keyinfo[1].seg[0].null_bit=0;
+ keyinfo[1].seg[0].null_pos=0;
+ keyinfo[1].seg[1].start=0; /* two part key */
+ keyinfo[1].seg[1].length=6;
+ keyinfo[1].seg[1].type=HA_KEYTYPE_NUM;
+ keyinfo[1].seg[1].flag=HA_REVERSE_SORT;
+ keyinfo[1].seg[1].null_bit=0;
+ keyinfo[1].seg[1].null_pos=0;
+ keyinfo[1].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[1].keysegs=2;
+ keyinfo[1].flag =0;
+ keyinfo[1].block_length= MARIA_MIN_KEY_BLOCK_LENGTH; /* Diff blocklength */
+ keyinfo[2].seg= &glob_keyseg[2][0];
+ keyinfo[2].seg[0].start=12;
+ keyinfo[2].seg[0].length=8;
+ keyinfo[2].seg[0].type=HA_KEYTYPE_BINARY;
+ keyinfo[2].seg[0].flag=HA_REVERSE_SORT;
+ keyinfo[2].seg[0].null_bit=0;
+ keyinfo[2].seg[0].null_pos=0;
+ keyinfo[2].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[2].keysegs=1;
+ keyinfo[2].flag =HA_NOSAME;
+ keyinfo[2].block_length= 0; /* Default block length */
+ keyinfo[3].seg= &glob_keyseg[3][0];
+ keyinfo[3].seg[0].start=0;
+ keyinfo[3].seg[0].length=reclength-(use_blob ? 8 : 0);
+ keyinfo[3].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[3].seg[0].language=default_charset_info->number;
+ keyinfo[3].seg[0].flag=(uint8) pack_seg;
+ keyinfo[3].seg[0].null_bit=0;
+ keyinfo[3].seg[0].null_pos=0;
+ keyinfo[3].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[3].keysegs=1;
+ keyinfo[3].flag = pack_type;
+ keyinfo[3].block_length= 0; /* Default block length */
+ keyinfo[4].seg= &glob_keyseg[4][0];
+ keyinfo[4].seg[0].start=0;
+ keyinfo[4].seg[0].length=5;
+ keyinfo[4].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[4].seg[0].language=default_charset_info->number;
+ keyinfo[4].seg[0].flag=0;
+ keyinfo[4].seg[0].null_bit=0;
+ keyinfo[4].seg[0].null_pos=0;
+ keyinfo[4].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[4].keysegs=1;
+ keyinfo[4].flag = pack_type;
+ keyinfo[4].block_length= 0; /* Default block length */
+ keyinfo[5].seg= &glob_keyseg[5][0];
+ keyinfo[5].seg[0].start=0;
+ keyinfo[5].seg[0].length=4;
+ keyinfo[5].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[5].seg[0].language=default_charset_info->number;
+ keyinfo[5].seg[0].flag=pack_seg;
+ keyinfo[5].seg[0].null_bit=0;
+ keyinfo[5].seg[0].null_pos=0;
+ keyinfo[5].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[5].keysegs=1;
+ keyinfo[5].flag = pack_type;
+ keyinfo[5].block_length= 0; /* Default block length */
+
+ recinfo[0].type=pack_fields ? FIELD_SKIP_PRESPACE : 0;
+ recinfo[0].length=7;
+ recinfo[0].null_bit=0;
+ recinfo[0].null_pos=0;
+ recinfo[1].type=pack_fields ? FIELD_SKIP_PRESPACE : 0;
+ recinfo[1].length=5;
+ recinfo[1].null_bit=0;
+ recinfo[1].null_pos=0;
+ recinfo[2].type=pack_fields ? FIELD_SKIP_PRESPACE : 0;
+ recinfo[2].length=9;
+ recinfo[2].null_bit=0;
+ recinfo[2].null_pos=0;
+ recinfo[3].type=FIELD_NORMAL;
+ recinfo[3].length=STANDARD_LENGTH-7-5-9-4;
+ recinfo[3].null_bit=0;
+ recinfo[3].null_pos=0;
+ recinfo[4].type=pack_fields ? FIELD_SKIP_ZERO : 0;
+ recinfo[4].length=4;
+ recinfo[4].null_bit=0;
+ recinfo[4].null_pos=0;
+ recinfo[5].type=pack_fields ? FIELD_SKIP_ENDSPACE : 0;
+ recinfo[5].length=60;
+ recinfo[5].null_bit=0;
+ recinfo[5].null_pos=0;
+ if (use_blob)
+ {
+ recinfo[6].type=FIELD_BLOB;
+ recinfo[6].length=4+portable_sizeof_char_ptr;
+ recinfo[6].null_bit=0;
+ recinfo[6].null_pos=0;
+ }
+
+ write_count=update=dupp_keys=opt_delete=0;
+ blob_buffer=0;
+
+ for (i=1000 ; i>0 ; i--) key1[i]=0;
+ for (i=4999 ; i>0 ; i--) key3[i]=0;
+
+ if (!silent)
+ printf("- Creating maria-file\n");
+ file= 0;
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=(ha_rows) (rec_pointer_size ?
+ (1L << (rec_pointer_size*8))/
+ reclength : 0);
+ create_info.reloc_rows=(ha_rows) 100;
+ create_info.transactional= transactional;
+ if (maria_create(filename, record_type, keys,&keyinfo[first_key],
+ use_blob ? 7 : 6, &recinfo[0],
+ 0,(MARIA_UNIQUEDEF*) 0,
+ &create_info,create_flag))
+ goto err;
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+ maria_begin(file);
+ if (testflag == 1)
+ goto end;
+ if (checkpoint == 1 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+ if (!silent)
+ printf("- Writing key:s\n");
+ if (locking)
+ maria_lock_database(file,F_WRLCK);
+ if (write_cacheing)
+ maria_extra(file,HA_EXTRA_WRITE_CACHE,0);
+ if (opt_quick_mode)
+ maria_extra(file,HA_EXTRA_QUICK,0);
+
+ for (i=0 ; i < recant ; i++)
+ {
+ ulong blob_length;
+ n1=rnd(1000); n2=rnd(100); n3=rnd(5000);
+ sprintf((char*) record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count);
+ int4store(record+STANDARD_LENGTH-4,(long) i);
+ fix_length(record,(uint) STANDARD_LENGTH+rnd(60));
+ put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length);
+ DBUG_PRINT("test",("record: %d blob_length: %lu", i, blob_length));
+
+ if (maria_write(file,record))
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0)
+ {
+ printf("Error: %d in write at record: %d\n",my_errno,i);
+ goto err;
+ }
+ if (verbose) printf(" Double key: %d at record# %d\n", n3, i);
+ }
+ else
+ {
+ if (key3[n3] == 1 && first_key <3 && first_key+keys >= 3)
+ {
+ printf("Error: Didn't get error when writing second key: '%8d'\n",n3);
+ goto err;
+ }
+ write_count++; key1[n1]++; key3[n3]=1;
+ }
+
+ /* Check if we can find key without flushing database */
+ if (i % 10 == 0)
+ {
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ if (!j)
+ for (j=999 ; j>0 && key1[j] == 0 ; j--) ;
+ sprintf((char*) key,"%6d",j);
+ if (maria_rkey(file,read_record,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT))
+ {
+ printf("Test in loop: Can't find key: \"%s\"\n",key);
+ goto err;
+ }
+ }
+ }
+ if (checkpoint == 2 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ if (write_cacheing)
+ {
+ if (maria_extra(file,HA_EXTRA_NO_CACHE,0))
+ {
+ puts("got error from maria_extra(HA_EXTRA_NO_CACHE)");
+ goto err;
+ }
+ }
+
+ if (testflag == 2)
+ goto end;
+
+#ifdef REMOVE_WHEN_WE_HAVE_RESIZE
+ if (pagecacheing)
+ resize_pagecache(maria_pagecache, maria_block_size,
+ pagecache_size * 2, 0, 0);
+#endif
+ if (!silent)
+ printf("- Delete\n");
+ if (srand_arg)
+ srand(srand_arg);
+ for (i=0 ; i<recant/10 ; i++)
+ {
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ if (j != 0)
+ {
+ sprintf((char*) key,"%6d",j);
+ if (maria_rkey(file,read_record,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT))
+ {
+ printf("can't find key1: \"%s\"\n",key);
+ goto err;
+ }
+ if (bcmp(read_record+keyinfo[0].seg[0].start,
+ key, keyinfo[0].seg[0].length))
+ {
+ printf("Found wrong record when searching for key: \"%s\"\n",key);
+ goto err;
+ }
+ if (opt_delete == (uint) remove_count) /* While testing */
+ goto end;
+ if (maria_delete(file,read_record))
+ {
+ printf("error: %d; can't delete record: \"%s\"\n", my_errno,read_record);
+ goto err;
+ }
+ opt_delete++;
+ key1[atoi((char*) read_record+keyinfo[0].seg[0].start)]--;
+ key3[atoi((char*) read_record+keyinfo[2].seg[0].start)]=0;
+ }
+ else
+ puts("Warning: Skipping delete test because no dupplicate keys");
+ }
+ if (testflag == 3)
+ goto end;
+ if (checkpoint == 3 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ if (!silent)
+ printf("- Update\n");
+ if (srand_arg)
+ srand(srand_arg);
+ for (i=0 ; i<recant/10 ; i++)
+ {
+ n1=rnd(1000); n2=rnd(100); n3=rnd(5000);
+ sprintf((char*) record2,"%6d:%4d:%8d:XXX: %4d ",n1,n2,n3,update);
+ int4store(record2+STANDARD_LENGTH-4,(long) i);
+ fix_length(record2,(uint) STANDARD_LENGTH+rnd(60));
+
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ if (j != 0)
+ {
+ sprintf((char*) key,"%6d",j);
+ if (maria_rkey(file,read_record,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT))
+ {
+ printf("can't find key1: \"%s\"\n", (char*) key);
+ goto err;
+ }
+ if (bcmp(read_record+keyinfo[0].seg[0].start,
+ key, keyinfo[0].seg[0].length))
+ {
+ printf("Found wrong record when searching for key: \"%s\"; Found \"%.*s\"\n",
+ key, keyinfo[0].seg[0].length,
+ read_record+keyinfo[0].seg[0].start);
+ goto err;
+ }
+ if (use_blob)
+ {
+ ulong blob_length;
+ if (i & 1)
+ put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length);
+ else
+ bmove(record+blob_pos,read_record+blob_pos,8);
+ }
+ if (skip_update)
+ continue;
+ if (maria_update(file,read_record,record2))
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0)
+ {
+ printf("error: %d; can't update:\nFrom: \"%s\"\nTo: \"%s\"\n",
+ my_errno,read_record,record2);
+ goto err;
+ }
+ if (verbose)
+ printf("Double key when tried to update:\nFrom: \"%s\"\nTo: \"%s\"\n",record,record2);
+ }
+ else
+ {
+ key1[atoi((char*) read_record+keyinfo[0].seg[0].start)]--;
+ key3[atoi((char*) read_record+keyinfo[2].seg[0].start)]=0;
+ key1[n1]++; key3[n3]=1;
+ update++;
+ }
+ }
+ }
+ if (testflag == 4)
+ goto end;
+ if (checkpoint == 4 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ for (i=999, dupp_keys=j=0 ; i>0 ; i--)
+ {
+ if (key1[i] > dupp_keys)
+ {
+ dupp_keys=key1[i]; j=i;
+ }
+ }
+ sprintf((char*) key,"%6d",j);
+ start=keyinfo[0].seg[0].start;
+ length=keyinfo[0].seg[0].length;
+ if (dupp_keys)
+ {
+ if (!silent)
+ printf("- Same key: first - next -> last - prev -> first\n");
+ DBUG_PRINT("progpos",("first - next -> last - prev -> first"));
+ if (verbose) printf(" Using key: \"%s\" Keys: %d\n",key,dupp_keys);
+
+ if (maria_rkey(file,read_record,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT))
+ goto err;
+ if (maria_rsame(file,read_record2,-1))
+ goto err;
+ if (memcmp(read_record,read_record2,reclength) != 0)
+ {
+ printf("maria_rsame didn't find same record\n");
+ goto err;
+ }
+ info.recpos=maria_position(file);
+ if (maria_rfirst(file,read_record2,0) ||
+ maria_rsame_with_pos(file,read_record2,0,info.recpos) ||
+ memcmp(read_record,read_record2,reclength) != 0)
+ {
+ printf("maria_rsame_with_pos didn't find same record\n");
+ goto err;
+ }
+ {
+ info.recpos= maria_position(file);
+ int skr=maria_rnext(file,read_record2,0);
+ if ((skr && my_errno != HA_ERR_END_OF_FILE) ||
+ maria_rprev(file,read_record2,0) ||
+ memcmp(read_record,read_record2,reclength) != 0 ||
+ info.recpos != maria_position(file))
+ {
+ printf("maria_rsame_with_pos lost position\n");
+ goto err;
+ }
+ }
+ ant=1;
+ while (maria_rnext(file,read_record2,0) == 0 &&
+ memcmp(read_record2+start,key,length) == 0) ant++;
+ if (ant != dupp_keys)
+ {
+ printf("next: Found: %d keys of %d\n",ant,dupp_keys);
+ goto err;
+ }
+ ant=0;
+ while (maria_rprev(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys)
+ {
+ printf("prev: Found: %d records of %d\n",ant,dupp_keys);
+ goto err;
+ }
+
+ /* Check of maria_rnext_same */
+ if (maria_rkey(file,read_record,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT))
+ goto err;
+ ant=1;
+ while (!maria_rnext_same(file,read_record3) && ant < dupp_keys+10)
+ ant++;
+ if (ant != dupp_keys || my_errno != HA_ERR_END_OF_FILE)
+ {
+ printf("maria_rnext_same: Found: %d records of %d\n",ant,dupp_keys);
+ goto err;
+ }
+ }
+
+ if (!silent)
+ printf("- All keys: first - next -> last - prev -> first\n");
+ DBUG_PRINT("progpos",("All keys: first - next -> last - prev -> first"));
+ ant=1;
+ if (maria_rfirst(file,read_record,0))
+ {
+ printf("Can't find first record\n");
+ goto err;
+ }
+ while ((error=maria_rnext(file,read_record3,0)) == 0 && ant < write_count+10)
+ ant++;
+ if (ant != write_count - opt_delete || error != HA_ERR_END_OF_FILE)
+ {
+ printf("next: I found: %d records of %d (error: %d)\n",
+ ant, write_count - opt_delete, error);
+ goto err;
+ }
+ if (maria_rlast(file,read_record2,0) ||
+ bcmp(read_record2,read_record3,reclength))
+ {
+ printf("Can't find last record\n");
+ DBUG_DUMP("record2",(uchar*) read_record2,reclength);
+ DBUG_DUMP("record3",(uchar*) read_record3,reclength);
+ goto err;
+ }
+ ant=1;
+ while (maria_rprev(file,read_record3,0) == 0 && ant < write_count+10)
+ ant++;
+ if (ant != write_count - opt_delete)
+ {
+ printf("prev: I found: %d records of %d\n",ant,write_count);
+ goto err;
+ }
+ if (bcmp(read_record,read_record3,reclength))
+ {
+ printf("Can't find first record\n");
+ goto err;
+ }
+
+ if (!silent)
+ printf("- Test if: Read first - next - prev - prev - next == first\n");
+ DBUG_PRINT("progpos",("- Read first - next - prev - prev - next == first"));
+ if (maria_rfirst(file,read_record,0) ||
+ maria_rnext(file,read_record3,0) ||
+ maria_rprev(file,read_record3,0) ||
+ maria_rprev(file,read_record3,0) == 0 ||
+ maria_rnext(file,read_record3,0))
+ goto err;
+ if (bcmp(read_record,read_record3,reclength) != 0)
+ printf("Can't find first record\n");
+
+ if (!silent)
+ printf("- Test if: Read last - prev - next - next - prev == last\n");
+ DBUG_PRINT("progpos",("Read last - prev - next - next - prev == last"));
+ if (maria_rlast(file,read_record2,0) ||
+ maria_rprev(file,read_record3,0) ||
+ maria_rnext(file,read_record3,0) ||
+ maria_rnext(file,read_record3,0) == 0 ||
+ maria_rprev(file,read_record3,0))
+ goto err;
+ if (bcmp(read_record2,read_record3,reclength))
+ printf("Can't find last record\n");
+#ifdef NOT_ANYMORE
+ if (!silent)
+ puts("- Test read key-part");
+ strmov(key2,key);
+ for(i=strlen(key2) ; i-- > 1 ;)
+ {
+ key2[i]=0;
+
+ /* The following row is just to catch some bugs in the key code */
+ bzero((char*) file->lastkey,file->s->base.max_key_length*2);
+ if (maria_rkey(file,read_record,0,key2,(uint) i,HA_READ_PREFIX))
+ goto err;
+ if (bcmp(read_record+start,key,(uint) i))
+ {
+ puts("Didn't find right record");
+ goto err;
+ }
+ }
+#endif
+ if (dupp_keys > 2)
+ {
+ if (!silent)
+ printf("- Read key (first) - next - delete - next -> last\n");
+ DBUG_PRINT("progpos",("first - next - delete - next -> last"));
+ if (maria_rkey(file,read_record,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT))
+ goto err;
+ if (maria_rnext(file,read_record3,0)) goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=1;
+ while (maria_rnext(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-1)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-1);
+ goto err;
+ }
+ }
+ if (dupp_keys>4)
+ {
+ if (!silent)
+ printf("- Read last of key - prev - delete - prev -> first\n");
+ DBUG_PRINT("progpos",("last - prev - delete - prev -> first"));
+ if (maria_rprev(file,read_record3,0)) goto err;
+ if (maria_rprev(file,read_record3,0)) goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=1;
+ while (maria_rprev(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-2)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-2);
+ goto err;
+ }
+ }
+ if (dupp_keys > 6)
+ {
+ if (!silent)
+ printf("- Read first - delete - next -> last\n");
+ DBUG_PRINT("progpos",("first - delete - next -> last"));
+ if (maria_rkey(file,read_record3,0,key,HA_WHOLE_KEY,HA_READ_KEY_EXACT))
+ goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=1;
+ if (maria_rnext(file,read_record,0))
+ goto err; /* Skall finnas poster */
+ while (maria_rnext(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-3)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-3);
+ goto err;
+ }
+
+ if (!silent)
+ printf("- Read last - delete - prev -> first\n");
+ DBUG_PRINT("progpos",("last - delete - prev -> first"));
+ if (maria_rprev(file,read_record3,0)) goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=0;
+ while (maria_rprev(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-4)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-4);
+ goto err;
+ }
+ }
+
+ if (!silent)
+ puts("- Test if: Read rrnd - same");
+ DBUG_PRINT("progpos",("Read rrnd - same"));
+ assert(maria_scan_init(file) == 0);
+ for (i=0 ; i < write_count ; i++)
+ {
+ int tmp;
+ if ((tmp= maria_scan(file,read_record)) &&
+ tmp != HA_ERR_END_OF_FILE &&
+ tmp != HA_ERR_RECORD_DELETED)
+ {
+ printf("Got error %d when scanning table\n", tmp);
+ break;
+ }
+ }
+ maria_scan_end(file);
+ if (i != write_count && i != write_count - opt_delete)
+ {
+ printf("Found wrong number of rows while scanning table\n");
+ goto err;
+ }
+
+ bmove(read_record2,read_record,reclength);
+ for (i=min(2,keys) ; i-- > 0 ;)
+ {
+ if (maria_rsame(file,read_record2,(int) i)) goto err;
+ if (bcmp(read_record,read_record2,reclength) != 0)
+ {
+ printf("maria_rsame didn't find same record\n");
+ goto err;
+ }
+ }
+ if (!silent)
+ puts("- Test maria_records_in_range");
+ maria_status(file,&info,HA_STATUS_VARIABLE);
+ for (i=0 ; i < info.keys ; i++)
+ {
+ key_range min_key, max_key;
+ if (maria_rfirst(file,read_record,(int) i) ||
+ maria_rlast(file,read_record2,(int) i))
+ goto err;
+ copy_key(file,(uint) i,(uchar*) read_record,(uchar*) key);
+ copy_key(file,(uint) i,(uchar*) read_record2,(uchar*) key2);
+ min_key.key= key;
+ min_key.keypart_map= HA_WHOLE_KEY;
+ min_key.flag= HA_READ_KEY_EXACT;
+ max_key.key= key2;
+ max_key.keypart_map= HA_WHOLE_KEY;
+ max_key.flag= HA_READ_AFTER_KEY;
+
+ range_records= maria_records_in_range(file,(int) i, &min_key, &max_key);
+ if (range_records < info.records*8/10 ||
+ range_records > info.records*12/10)
+ {
+ printf("maria_records_range returned %ld; Should be about %ld\n",
+ (long) range_records,(long) info.records);
+ goto err;
+ }
+ if (verbose)
+ {
+ printf("maria_records_range returned %ld; Exact is %ld (diff: %4.2g %%)\n",
+ (long) range_records, (long) info.records,
+ labs((long) range_records - (long) info.records)*100.0/
+ info.records);
+ }
+ }
+ for (i=0 ; i < 5 ; i++)
+ {
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ for (k=rnd(1000)+1 ; k>0 && key1[k] == 0 ; k--) ;
+ if (j != 0 && k != 0)
+ {
+ key_range min_key, max_key;
+ if (j > k)
+ swap_variables(int, j, k);
+ sprintf((char*) key,"%6d",j);
+ sprintf((char*) key2,"%6d",k);
+
+ min_key.key= key;
+ min_key.keypart_map= HA_WHOLE_KEY;
+ min_key.flag= HA_READ_AFTER_KEY;
+ max_key.key= key2;
+ max_key.keypart_map= HA_WHOLE_KEY;
+ max_key.flag= HA_READ_BEFORE_KEY;
+ range_records= maria_records_in_range(file, 0, &min_key, &max_key);
+ records=0;
+ for (j++ ; j < k ; j++)
+ records+=key1[j];
+ if ((long) range_records < (long) records*7/10-2 ||
+ (long) range_records > (long) records*14/10+2)
+ {
+ printf("maria_records_range for key: %d returned %lu; Should be about %lu\n",
+ i, (ulong) range_records, (ulong) records);
+ goto err;
+ }
+ if (verbose && records)
+ {
+ printf("maria_records_range returned %lu; Exact is %lu (diff: %4.2g %%)\n",
+ (ulong) range_records, (ulong) records,
+ labs((long) range_records-(long) records)*100.0/records);
+
+ }
+ }
+ }
+
+ if (!silent)
+ printf("- maria_info\n");
+ maria_status(file,&info,HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ if (info.records != write_count-opt_delete || info.deleted > opt_delete + update
+ || info.keys != keys)
+ {
+ puts("Wrong info from maria_info");
+ printf("Got: records: %lu delete: %lu i_keys: %d\n",
+ (ulong) info.records, (ulong) info.deleted, info.keys);
+ goto err;
+ }
+ if (verbose)
+ {
+ char buff[80];
+ get_date(buff,3,info.create_time);
+ printf("info: Created %s\n",buff);
+ get_date(buff,3,info.check_time);
+ printf("info: checked %s\n",buff);
+ get_date(buff,3,info.update_time);
+ printf("info: Modified %s\n",buff);
+ }
+
+ maria_panic(HA_PANIC_WRITE);
+ maria_panic(HA_PANIC_READ);
+ if (maria_is_changed(file))
+ puts("Warning: maria_is_changed reported that datafile was changed");
+
+ if (!silent)
+ printf("- maria_extra(CACHE) + maria_rrnd.... + maria_extra(NO_CACHE)\n");
+ if (maria_reset(file) || maria_extra(file,HA_EXTRA_CACHE,0))
+ {
+ if (locking || (!use_blob && !pack_fields))
+ {
+ puts("got error from maria_extra(HA_EXTRA_CACHE)");
+ goto err;
+ }
+ }
+ ant=0;
+ assert(maria_scan_init(file) == 0);
+ while ((error= maria_scan(file,record)) != HA_ERR_END_OF_FILE &&
+ ant < write_count + 10)
+ ant+= error ? 0 : 1;
+ maria_scan_end(file);
+ if (ant != write_count-opt_delete)
+ {
+ printf("scan with cache: I can only find: %d records of %d\n",
+ ant,write_count-opt_delete);
+ maria_scan_end(file);
+ goto err;
+ }
+ if (maria_extra(file,HA_EXTRA_NO_CACHE,0))
+ {
+ puts("got error from maria_extra(HA_EXTRA_NO_CACHE)");
+ maria_scan_end(file);
+ goto err;
+ }
+ maria_scan_end(file);
+
+ ant=0;
+ maria_scan_init(file);
+ while ((error=maria_scan(file,record)) != HA_ERR_END_OF_FILE &&
+ ant < write_count + 10)
+ ant+= error ? 0 : 1;
+ if (ant != write_count-opt_delete)
+ {
+ printf("scan with cache: I can only find: %d records of %d\n",
+ ant,write_count-opt_delete);
+ maria_scan_end(file);
+ goto err;
+ }
+ maria_scan_end(file);
+
+ if (testflag == 5)
+ goto end;
+ if (checkpoint == 5 && ma_checkpoint_execute(CHECKPOINT_MEDIUM, FALSE))
+ goto err;
+
+ if (!silent)
+ printf("- Removing keys\n");
+ DBUG_PRINT("progpos",("Removing keys"));
+ lastpos = HA_OFFSET_ERROR;
+ /* DBUG_POP(); */
+ maria_reset(file);
+ found_parts=0;
+ maria_scan_init(file);
+ while ((error= maria_scan(file,read_record)) != HA_ERR_END_OF_FILE)
+ {
+ info.recpos=maria_position(file);
+ if (lastpos >= info.recpos && lastpos != HA_OFFSET_ERROR)
+ {
+ printf("maria_rrnd didn't advance filepointer; old: %ld, new: %ld\n",
+ (long) lastpos, (long) info.recpos);
+ goto err;
+ }
+ lastpos=info.recpos;
+ if (error == 0)
+ {
+ if (opt_delete == (uint) remove_count) /* While testing */
+ goto end;
+ if (rnd(2) == 1 && maria_rsame(file,read_record,-1))
+ {
+ printf("can't find record %lx\n",(long) info.recpos);
+ goto err;
+ }
+ if (use_blob)
+ {
+ ulong blob_length,pos;
+ uchar *ptr;
+ memcpy_fixed(&ptr, read_record+blob_pos+4, sizeof(ptr));
+ longget(blob_length,read_record+blob_pos);
+ for (pos=0 ; pos < blob_length ; pos++)
+ {
+ if (ptr[pos] != (uchar) (blob_length+pos))
+ {
+ printf("Found blob with wrong info at %ld\n",(long) lastpos);
+ maria_scan_end(file);
+ my_errno= 0;
+ goto err;
+ }
+ }
+ }
+ if (maria_delete(file,read_record))
+ {
+ printf("can't delete record: %6.6s, delete_count: %d\n",
+ read_record, opt_delete);
+ maria_scan_end(file);
+ goto err;
+ }
+ opt_delete++;
+ }
+ else
+ found_parts++;
+ }
+ if (my_errno != HA_ERR_END_OF_FILE && my_errno != HA_ERR_RECORD_DELETED)
+ printf("error: %d from maria_rrnd\n",my_errno);
+ if (write_count != opt_delete)
+ {
+ printf("Deleted only %d of %d records (%d parts)\n",opt_delete,write_count,
+ found_parts);
+ maria_scan_end(file);
+ goto err;
+ }
+end:
+ maria_scan_end(file);
+ if (die_in_middle_of_transaction)
+ {
+ /* As commit record is not done, UNDO entries needs to be rolled back */
+ switch (die_in_middle_of_transaction) {
+ case 1:
+ /*
+ Flush changed data and index pages go to disk
+ That will also flush log. Recovery will skip REDOs and apply UNDOs.
+ */
+ _ma_flush_table_files(file, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_RELEASE, FLUSH_RELEASE);
+ break;
+ case 2:
+ /*
+ Just flush log. Pages are likely to not be on disk. Recovery will
+ then execute REDOs and UNDOs.
+ */
+ if (translog_flush(file->trn->undo_lsn))
+ goto err;
+ break;
+ case 3:
+ /*
+ Flush nothing. Pages and log are likely to not be on disk. Recovery
+ will then do nothing.
+ */
+ break;
+ case 4:
+ /*
+ Flush changed data pages go to disk. Changed index pages are not
+ flushed. Recovery will skip some REDOs and apply UNDOs.
+ */
+ _ma_flush_table_files(file, MARIA_FLUSH_DATA, FLUSH_RELEASE,
+ FLUSH_RELEASE);
+ /*
+ We have to flush log separately as the redo for the last key page
+ may not be flushed
+ */
+ if (translog_flush(file->trn->undo_lsn))
+ goto err;
+ break;
+ }
+ printf("Dying on request without maria_commit()/maria_close()\n");
+ exit(0);
+ }
+ if (maria_commit(file))
+ goto err;
+ if (maria_close(file))
+ {
+ file= 0;
+ goto err;
+ }
+ file= 0;
+ maria_panic(HA_PANIC_CLOSE); /* Should close log */
+ if (!silent)
+ {
+ printf("\nFollowing test have been made:\n");
+ printf("Write records: %d\nUpdate records: %d\nSame-key-read: %d\nDelete records: %d\n", write_count,update,dupp_keys,opt_delete);
+ if (rec_pointer_size)
+ printf("Record pointer size: %d\n",rec_pointer_size);
+ printf("maria_block_size: %lu\n", maria_block_size);
+ if (write_cacheing)
+ puts("Key cache resized");
+ if (write_cacheing)
+ puts("Write cacheing used");
+ if (write_cacheing)
+ puts("quick mode");
+ if (async_io && locking)
+ puts("Asyncron io with locking used");
+ else if (locking)
+ puts("Locking used");
+ if (use_blob)
+ puts("blobs used");
+ printf("key cache status: \n\
+blocks used:%10lu\n\
+not flushed:%10lu\n\
+w_requests: %10lu\n\
+writes: %10lu\n\
+r_requests: %10lu\n\
+reads: %10lu\n",
+ maria_pagecache->blocks_used,
+ maria_pagecache->global_blocks_changed,
+ (ulong) maria_pagecache->global_cache_w_requests,
+ (ulong) maria_pagecache->global_cache_write,
+ (ulong) maria_pagecache->global_cache_r_requests,
+ (ulong) maria_pagecache->global_cache_read);
+ }
+ maria_end();
+ my_free(blob_buffer, MYF(MY_ALLOW_ZERO_PTR));
+ my_end(silent ? MY_CHECK_ERROR : MY_CHECK_ERROR | MY_GIVE_INFO);
+ return(0);
+err:
+ printf("got error: %d when using MARIA-database\n",my_errno);
+ if (file)
+ {
+ if (maria_commit(file))
+ goto err;
+ VOID(maria_close(file));
+ }
+ maria_end();
+ return(1);
+} /* main */
+
+
+/* Read options */
+
+static void get_options(int argc, char **argv)
+{
+ char *pos,*progname;
+
+ progname= argv[0];
+
+ while (--argc >0 && *(pos = *(++argv)) == '-' ) {
+ switch(*++pos) {
+ case 'B':
+ pack_type= HA_BINARY_PACK_KEY;
+ break;
+ case 'b':
+ use_blob= 1000;
+ if (*++pos)
+ use_blob= atol(pos);
+ break;
+ case 'K': /* Use key cacheing */
+ pagecacheing=1;
+ if (*++pos)
+ pagecache_size=atol(pos);
+ break;
+ case 'W': /* Use write cacheing */
+ write_cacheing=1;
+ if (*++pos)
+ my_default_record_cache_size=atoi(pos);
+ break;
+ case 'd':
+ remove_count= atoi(++pos);
+ break;
+ case 'i':
+ if (*++pos)
+ srand(srand_arg= atoi(pos));
+ break;
+ case 'L':
+ locking=1;
+ break;
+ case 'A': /* use asyncron io */
+ async_io=1;
+ if (*++pos)
+ my_default_record_cache_size=atoi(pos);
+ break;
+ case 'v': /* verbose */
+ verbose=1;
+ break;
+ case 'm': /* records */
+ if ((recant=atoi(++pos)) < 10 && testflag > 2)
+ {
+ fprintf(stderr,"record count must be >= 10 (if testflag > 2)\n");
+ exit(1);
+ }
+ break;
+ case 'e': /* maria_block_length */
+ case 'E':
+ if ((maria_block_size= atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH ||
+ maria_block_size > MARIA_MAX_KEY_BLOCK_LENGTH)
+ {
+ fprintf(stderr,"Wrong maria_block_length\n");
+ exit(1);
+ }
+ maria_block_size= my_round_up_to_next_power(maria_block_size);
+ break;
+ case 'f':
+ if ((first_key=atoi(++pos)) < 0 || first_key >= MARIA_KEYS)
+ first_key=0;
+ break;
+ case 'H':
+ checkpoint= atoi(++pos);
+ break;
+ case 'k':
+ if ((keys=(uint) atoi(++pos)) < 1 ||
+ keys > (uint) (MARIA_KEYS-first_key))
+ keys=MARIA_KEYS-first_key;
+ break;
+ case 'M':
+ record_type= BLOCK_RECORD;
+ break;
+ case 'P':
+ pack_type=0; /* Don't use DIFF_LENGTH */
+ pack_seg=0;
+ break;
+ case 'R': /* Length of record pointer */
+ rec_pointer_size=atoi(++pos);
+ if (rec_pointer_size > 7)
+ rec_pointer_size=0;
+ break;
+ case 'S':
+ pack_fields=0; /* Static-length-records */
+ record_type= STATIC_RECORD;
+ break;
+ case 's':
+ silent=1;
+ break;
+ case 't':
+ testflag=atoi(++pos); /* testmod */
+ break;
+ case 'T':
+ transactional= 1;
+ break;
+ case 'u':
+ die_in_middle_of_transaction= atoi(++pos);
+ break;
+ case 'q':
+ opt_quick_mode=1;
+ break;
+ case 'c':
+ create_flag|= HA_CREATE_CHECKSUM | HA_CREATE_PAGE_CHECKSUM;
+ break;
+ case 'D':
+ create_flag|=HA_CREATE_DELAY_KEY_WRITE;
+ break;
+ case 'g':
+ skip_update= TRUE;
+ break;
+ case '?':
+ case 'I':
+ case 'V':
+ printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE);
+ puts("By Monty, for your professional use\n");
+ printf("Usage: %s [-?AbBcDIKLPRqSsTVWltv] [-k#] [-f#] [-m#] [-e#] [-E#] [-t#]\n",
+ progname);
+ exit(0);
+ case '#':
+ DBUG_PUSH (++pos);
+ break;
+ default:
+ printf("Illegal option: '%c'\n",*pos);
+ break;
+ }
+ }
+ return;
+} /* get options */
+
+ /* Get a random value 0 <= x <= n */
+
+static uint rnd(uint max_value)
+{
+ return (uint) ((rand() & 32767)/32767.0*max_value);
+} /* rnd */
+
+
+ /* Create a variable length record */
+
+static void fix_length(uchar *rec, uint length)
+{
+ bmove(rec+STANDARD_LENGTH,
+ "0123456789012345678901234567890123456789012345678901234567890",
+ length-STANDARD_LENGTH);
+ strfill((char*) rec+length,STANDARD_LENGTH+60-length,' ');
+} /* fix_length */
+
+
+ /* Put maybe a blob in record */
+
+static void put_blob_in_record(uchar *blob_pos, char **blob_buffer,
+ ulong *blob_length)
+{
+ ulong i,length;
+ *blob_length= 0;
+ if (use_blob)
+ {
+ if (rnd(10) == 0)
+ {
+ if (! *blob_buffer &&
+ !(*blob_buffer=my_malloc((uint) use_blob,MYF(MY_WME))))
+ {
+ use_blob=0;
+ return;
+ }
+ length=rnd(use_blob);
+ for (i=0 ; i < length ; i++)
+ (*blob_buffer)[i]=(char) (length+i);
+ int4store(blob_pos,length);
+ memcpy_fixed(blob_pos+4,(char*) blob_buffer,sizeof(char*));
+ *blob_length= length;
+ }
+ else
+ {
+ int4store(blob_pos,0);
+ }
+ }
+ return;
+}
+
+
+static void copy_key(MARIA_HA *info,uint inx,uchar *rec,uchar *key_buff)
+{
+ HA_KEYSEG *keyseg;
+
+ for (keyseg=info->s->keyinfo[inx].seg ; keyseg->type ; keyseg++)
+ {
+ memcpy(key_buff,rec+keyseg->start,(size_t) keyseg->length);
+ key_buff+=keyseg->length;
+ }
+ return;
+}
diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c
new file mode 100644
index 00000000000..de8b4c83a51
--- /dev/null
+++ b/storage/maria/ma_test3.c
@@ -0,0 +1,501 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Test av locking */
+
+#ifndef __NETWARE__
+
+#include "maria.h"
+#include <sys/types.h>
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+
+#if defined(HAVE_LRAND48)
+#define rnd(X) (lrand48() % X)
+#define rnd_init(X) srand48(X)
+#else
+#define rnd(X) (random() % X)
+#define rnd_init(X) srandom(X)
+#endif
+
+
+const char *filename= "test3";
+uint tests=10,forks=10,pagecacheing=0;
+
+static void get_options(int argc, char *argv[]);
+void start_test(int id);
+int test_read(MARIA_HA *,int),test_write(MARIA_HA *,int,int),
+ test_update(MARIA_HA *,int,int),test_rrnd(MARIA_HA *,int);
+
+struct record {
+ uchar id[8];
+ uchar nr[4];
+ uchar text[10];
+} record;
+
+
+int main(int argc,char **argv)
+{
+ int status,wait_ret;
+ uint i=0;
+ MARIA_KEYDEF keyinfo[10];
+ MARIA_COLUMNDEF recinfo[10];
+ HA_KEYSEG keyseg[10][2];
+ MY_INIT(argv[0]);
+ get_options(argc,argv);
+
+ fprintf(stderr, "WARNING! this program is to test 'external locking'"
+ " (when several processes share a table through file locking)"
+ " which is not supported by Maria at all; expect errors."
+ " We may soon remove this program.\n");
+ maria_init();
+ bzero((char*) keyinfo,sizeof(keyinfo));
+ bzero((char*) recinfo,sizeof(recinfo));
+ bzero((char*) keyseg,sizeof(keyseg));
+ keyinfo[0].seg= &keyseg[0][0];
+ keyinfo[0].seg[0].start=0;
+ keyinfo[0].seg[0].length=8;
+ keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[0].seg[0].flag=HA_SPACE_PACK;
+ keyinfo[0].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].flag = (uint8) HA_PACK_KEY;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[1].seg= &keyseg[1][0];
+ keyinfo[1].seg[0].start=8;
+ keyinfo[1].seg[0].length=4; /* Long is always 4 in maria */
+ keyinfo[1].seg[0].type=HA_KEYTYPE_LONG_INT;
+ keyinfo[1].seg[0].flag=0;
+ keyinfo[1].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[1].keysegs=1;
+ keyinfo[1].flag =HA_NOSAME;
+ keyinfo[1].block_length= 0; /* Default block length */
+
+ recinfo[0].type=0;
+ recinfo[0].length=sizeof(record.id);
+ recinfo[1].type=0;
+ recinfo[1].length=sizeof(record.nr);
+ recinfo[2].type=0;
+ recinfo[2].length=sizeof(record.text);
+
+ puts("- Creating maria-file");
+ my_delete(filename,MYF(0)); /* Remove old locks under gdb */
+ if (maria_create(filename,BLOCK_RECORD, 2, &keyinfo[0],2,&recinfo[0],0,
+ (MARIA_UNIQUEDEF*) 0, (MARIA_CREATE_INFO*) 0,0))
+ exit(1);
+
+ rnd_init(0);
+ printf("- Starting %d processes\n",forks); fflush(stdout);
+ for (i=0 ; i < forks; i++)
+ {
+ if (!fork())
+ {
+ start_test(i+1);
+ sleep(1);
+ return 0;
+ }
+ VOID(rnd(1));
+ }
+
+ for (i=0 ; i < forks ; i++)
+ while ((wait_ret=wait(&status)) && wait_ret == -1);
+ maria_end();
+ return 0;
+}
+
+
+static void get_options(int argc, char **argv)
+{
+ char *pos,*progname;
+
+ progname= argv[0];
+
+ while (--argc >0 && *(pos = *(++argv)) == '-' ) {
+ switch(*++pos) {
+ case 'f':
+ forks=atoi(++pos);
+ break;
+ case 't':
+ tests=atoi(++pos);
+ break;
+ case 'K': /* Use key cacheing */
+ pagecacheing=1;
+ break;
+ case 'A': /* All flags */
+ pagecacheing=1;
+ break;
+ case '?':
+ case 'I':
+ case 'V':
+ printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE);
+ puts("By Monty, for your professional use\n");
+ puts("Test av locking with threads\n");
+ printf("Usage: %s [-?lKA] [-f#] [-t#]\n",progname);
+ exit(0);
+ case '#':
+ DBUG_PUSH (++pos);
+ break;
+ default:
+ printf("Illegal option: '%c'\n",*pos);
+ break;
+ }
+ }
+ return;
+}
+
+
+void start_test(int id)
+{
+ uint i;
+ int error,lock_type;
+ MARIA_INFO isam_info;
+ MARIA_HA *file,*file1,*file2=0,*lock;
+
+ if (!(file1=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)) ||
+ !(file2=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)))
+ {
+ fprintf(stderr,"Can't open isam-file: %s\n",filename);
+ exit(1);
+ }
+ if (pagecacheing && rnd(2) == 0)
+ init_pagecache(maria_pagecache, 65536L, 0, 0, MARIA_KEY_BLOCK_LENGTH,
+ MY_WME);
+ printf("Process %d, pid: %d\n",id,getpid()); fflush(stdout);
+
+ for (error=i=0 ; i < tests && !error; i++)
+ {
+ file= (rnd(2) == 1) ? file1 : file2;
+ lock=0 ; lock_type=0;
+ if (rnd(10) == 0)
+ {
+ if (maria_lock_database(lock=(rnd(2) ? file1 : file2),
+ lock_type=(rnd(2) == 0 ? F_RDLCK : F_WRLCK)))
+ {
+ fprintf(stderr,"%2d: start: Can't lock table %d\n",id,my_errno);
+ error=1;
+ break;
+ }
+ }
+ switch (rnd(4)) {
+ case 0: error=test_read(file,id); break;
+ case 1: error=test_rrnd(file,id); break;
+ case 2: error=test_write(file,id,lock_type); break;
+ case 3: error=test_update(file,id,lock_type); break;
+ }
+ if (lock)
+ maria_lock_database(lock,F_UNLCK);
+ }
+ if (!error)
+ {
+ maria_status(file1,&isam_info,HA_STATUS_VARIABLE);
+ printf("%2d: End of test. Records: %ld Deleted: %ld\n",
+ id,(long) isam_info.records, (long) isam_info.deleted);
+ fflush(stdout);
+ }
+
+ maria_close(file1);
+ maria_close(file2);
+ if (error)
+ {
+ printf("%2d: Aborted\n",id); fflush(stdout);
+ exit(1);
+ }
+}
+
+
+int test_read(MARIA_HA *file,int id)
+{
+ uint i,lock,found,next,prev;
+ ulong find;
+
+ lock=0;
+ if (rnd(2) == 0)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_RDLCK))
+ {
+ fprintf(stderr,"%2d: Can't lock table %d\n",id,my_errno);
+ return 1;
+ }
+ }
+
+ found=next=prev=0;
+ for (i=0 ; i < 100 ; i++)
+ {
+ find=rnd(100000);
+ if (!maria_rkey(file,record.id,1,(uchar*) &find, HA_WHOLE_KEY,
+ HA_READ_KEY_EXACT))
+ found++;
+ else
+ {
+ if (my_errno != HA_ERR_KEY_NOT_FOUND)
+ {
+ fprintf(stderr,"%2d: Got error %d from read in read\n",id,my_errno);
+ return 1;
+ }
+ else if (!maria_rnext(file,record.id,1))
+ next++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in read\n",id,my_errno);
+ return 1;
+ }
+ else if (!maria_rprev(file,record.id,1))
+ prev++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in read\n",
+ id,my_errno);
+ return 1;
+ }
+ }
+ }
+ }
+ }
+ if (lock)
+ {
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"%2d: Can't unlock table\n",id);
+ return 1;
+ }
+ }
+ printf("%2d: read: found: %5d next: %5d prev: %5d\n",
+ id,found,next,prev);
+ fflush(stdout);
+ return 0;
+}
+
+
+int test_rrnd(MARIA_HA *file,int id)
+{
+ uint count,lock;
+
+ lock=0;
+ if (rnd(2) == 0)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_RDLCK))
+ {
+ fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno);
+ maria_close(file);
+ return 1;
+ }
+ if (rnd(2) == 0)
+ maria_extra(file,HA_EXTRA_CACHE,0);
+ }
+
+ count=0;
+ if (maria_rrnd(file,record.id,0L))
+ {
+ if (my_errno == HA_ERR_END_OF_FILE)
+ goto end;
+ fprintf(stderr,"%2d: Can't read first record (%d)\n",id,my_errno);
+ return 1;
+ }
+ for (count=1 ; !maria_rrnd(file,record.id,HA_OFFSET_ERROR) ;count++) ;
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rrnd\n",id,my_errno);
+ return 1;
+ }
+
+end:
+ if (lock)
+ {
+ maria_extra(file,HA_EXTRA_NO_CACHE,0);
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"%2d: Can't unlock table\n",id);
+ exit(0);
+ }
+ }
+ printf("%2d: rrnd: %5d\n",id,count); fflush(stdout);
+ return 0;
+}
+
+
+int test_write(MARIA_HA *file,int id,int lock_type)
+{
+ uint i,tries,count,lock;
+
+ lock=0;
+ if (rnd(2) == 0 || lock_type == F_RDLCK)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_WRLCK))
+ {
+ if (lock_type == F_RDLCK && my_errno == EDEADLK)
+ {
+ printf("%2d: write: deadlock\n",id); fflush(stdout);
+ return 0;
+ }
+ fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno);
+ maria_close(file);
+ return 1;
+ }
+ if (rnd(2) == 0)
+ maria_extra(file,HA_EXTRA_WRITE_CACHE,0);
+ }
+
+ sprintf((char*) record.id,"%7d",getpid());
+ strnmov((char*) record.text,"Testing...", sizeof(record.text));
+
+ tries=(uint) rnd(100)+10;
+ for (i=count=0 ; i < tries ; i++)
+ {
+ uint32 tmp=rnd(80000)+20000;
+ int4store(record.nr,tmp);
+ if (!maria_write(file,record.id))
+ count++;
+ else
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY)
+ {
+ fprintf(stderr,"%2d: Got error %d (errno %d) from write\n",id,my_errno,
+ errno);
+ return 1;
+ }
+ }
+ }
+ if (lock)
+ {
+ maria_extra(file,HA_EXTRA_NO_CACHE,0);
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"%2d: Can't unlock table\n",id);
+ exit(0);
+ }
+ }
+ printf("%2d: write: %5d\n",id,count); fflush(stdout);
+ return 0;
+}
+
+
+int test_update(MARIA_HA *file,int id,int lock_type)
+{
+ uint i,lock,found,next,prev,update;
+ uint32 tmp;
+ char find[4];
+ struct record new_record;
+
+ lock=0;
+ if (rnd(2) == 0 || lock_type == F_RDLCK)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_WRLCK))
+ {
+ if (lock_type == F_RDLCK && my_errno == EDEADLK)
+ {
+ printf("%2d: write: deadlock\n",id); fflush(stdout);
+ return 0;
+ }
+ fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno);
+ return 1;
+ }
+ }
+ bzero((char*) &new_record,sizeof(new_record));
+ strmov((char*) new_record.text,"Updated");
+
+ found=next=prev=update=0;
+ for (i=0 ; i < 100 ; i++)
+ {
+ tmp=rnd(100000);
+ int4store(find,tmp);
+ if (!maria_rkey(file,record.id,1,(uchar*) find, HA_WHOLE_KEY,
+ HA_READ_KEY_EXACT))
+ found++;
+ else
+ {
+ if (my_errno != HA_ERR_KEY_NOT_FOUND)
+ {
+ fprintf(stderr,"%2d: Got error %d from read in update\n",id,my_errno);
+ return 1;
+ }
+ else if (!maria_rnext(file,record.id,1))
+ next++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in update\n",
+ id,my_errno);
+ return 1;
+ }
+ else if (!maria_rprev(file,record.id,1))
+ prev++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in update\n",
+ id,my_errno);
+ return 1;
+ }
+ continue;
+ }
+ }
+ }
+ memcpy_fixed(new_record.id,record.id,sizeof(record.id));
+ tmp=rnd(20000)+40000;
+ int4store(new_record.nr,tmp);
+ if (!maria_update(file,record.id,new_record.id))
+ update++;
+ else
+ {
+ if (my_errno != HA_ERR_RECORD_CHANGED &&
+ my_errno != HA_ERR_RECORD_DELETED &&
+ my_errno != HA_ERR_FOUND_DUPP_KEY)
+ {
+ fprintf(stderr,"%2d: Got error %d from update\n",id,my_errno);
+ return 1;
+ }
+ }
+ }
+ if (lock)
+ {
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"Can't unlock table,id, error%d\n",my_errno);
+ return 1;
+ }
+ }
+ printf("%2d: update: %5d\n",id,update); fflush(stdout);
+ return 0;
+}
+
+#else /* __NETWARE__ */
+
+#include <stdio.h>
+
+main()
+{
+ fprintf(stderr,"this test has not been ported to NetWare\n");
+ return 0;
+}
+
+#endif /* __NETWARE__ */
diff --git a/storage/maria/ma_test_all.res b/storage/maria/ma_test_all.res
new file mode 100644
index 00000000000..586aaf68020
--- /dev/null
+++ b/storage/maria/ma_test_all.res
@@ -0,0 +1,14 @@
+Running tests with dynamic row format
+Running tests with static row format
+Running tests with block row format
+Running tests with block row format and transactions
+ma_test2 -s -L -K -R1 -m2000 ; Should give error 135
+Error: 135 in write at record: 1099
+got error: 135 when using MARIA-database
+./maria_chk -sm test2 will warn that 'Datafile is almost full'
+maria_chk: MARIA file test2
+maria_chk: warning: Datafile is almost full, 65516 of 65534 used
+MARIA-table 'test2' is usable but should be fixed
+MARIA RECOVERY TESTS
+ALL RECOVERY TESTS OK
+!!!!!!!! BUT REMEMBER to FIX this BLOB issue !!!!!!!
diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh
new file mode 100755
index 00000000000..d084af49604
--- /dev/null
+++ b/storage/maria/ma_test_all.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+#
+# This file is now deprecated and has been replaced by
+# unittest/ma_test_all-t
+#
+
+./unittest/ma_test_all-t
diff --git a/storage/maria/ma_test_recovery b/storage/maria/ma_test_recovery
new file mode 100755
index 00000000000..ee1d8c5366f
--- /dev/null
+++ b/storage/maria/ma_test_recovery
@@ -0,0 +1,213 @@
+#!/bin/sh
+
+#set -x -v
+set -e
+silent="-s"
+if [ -z "$maria_path" ]
+then
+ maria_path="."
+fi
+
+# test data is always put in the current directory or a tmp subdirectory of it
+tmp="./tmp"
+
+if test '!' -d $tmp
+then
+ mkdir $tmp
+fi
+
+echo "MARIA RECOVERY TESTS"
+
+check_table_is_same()
+{
+ # Computes checksum of new table and compares to checksum of old table
+ # Shows any difference in table's state (info from the index's header)
+ # Data/key file length is random in ma_test2 (as it uses srand() which
+ # may differ between machines).
+
+ $maria_path/maria_chk -dvv $table | grep -v "Creation time:" | grep -v "file length"> $tmp/maria_chk_message.txt 2>&1
+
+ $maria_path/maria_chk -s -e --read-only $table
+ checksum2=`$maria_path/maria_chk -dss $table`
+ if test "$checksum" != "$checksum2"
+ then
+ echo "checksum differs for $table before and after recovery"
+ return 1;
+ fi
+
+ diff $tmp/maria_chk_message.good.txt $tmp/maria_chk_message.txt > $tmp/maria_chk_diff.txt || true
+ if [ -s $tmp/maria_chk_diff.txt ]
+ then
+ echo "Differences in maria_chk -dvv, recovery not yet perfect !"
+ echo "========DIFF START======="
+ cat $tmp/maria_chk_diff.txt
+ echo "========DIFF END======="
+ fi
+}
+
+apply_log()
+{
+ # applies log, can verify if applying did write to log or not
+
+ shouldchangelog=$1
+ if [ "$shouldchangelog" != "shouldnotchangelog" ] &&
+ [ "$shouldchangelog" != "shouldchangelog" ] &&
+ [ "$shouldchangelog" != "dontknow" ]
+ then
+ echo "bad argument '$shouldchangelog'"
+ return 1
+ fi
+ log_md5=`md5sum maria_log.*`
+ echo "applying log"
+ $maria_path/maria_read_log -a > $tmp/maria_read_log_$table.txt
+ log_md5_2=`md5sum maria_log.*`
+ if [ "$log_md5" != "$log_md5_2" ]
+ then
+ if [ "$shouldchangelog" == "shouldnotchangelog" ]
+ then
+ echo "maria_read_log should not have modified the log"
+ return 1
+ fi
+ else
+ if [ "$shouldchangelog" == "shouldchangelog" ]
+ then
+ echo "maria_read_log should have modified the log"
+ return 1
+ fi
+ fi
+}
+
+# To not flood the screen, we redirect all the commands below to a text file
+# and just give a final error if their output is not as expected
+
+(
+
+echo "Testing the REDO PHASE ALONE"
+# runs a program inserting/deleting rows, then moves the resulting table
+# elsewhere; applies the log and checks that the data file is
+# identical to the saved original.
+# Does not test the index file as we don't have logging for it yet.
+
+set -- "ma_test1 $silent -M -T -c" "ma_test2 $silent -L -K -W -P -M -T -c" "ma_test2 $silent -M -T -c -b65000"
+while [ $# != 0 ]
+do
+ prog=$1
+ rm -f maria_log.* maria_log_control
+ echo "TEST WITH $prog"
+ $maria_path/$prog
+ # derive table's name from program's name
+ table=`echo $prog | sed -e 's;.*ma_\(test[0-9]\).*;\1;' `
+ $maria_path/maria_chk -dvv $table | grep -v "Creation time:" | grep -v "file length"> $tmp/maria_chk_message.good.txt 2>&1
+ checksum=`$maria_path/maria_chk -dss $table`
+ mv $table.MAD $tmp/$table-good.MAD
+ mv $table.MAI $tmp/$table-good.MAI
+ apply_log "shouldnotchangelog"
+ cmp $table.MAD $tmp/$table-good.MAD
+ cmp $table.MAI $tmp/$table-good.MAI
+ check_table_is_same
+ echo "testing idempotency"
+ apply_log "shouldnotchangelog"
+ cmp $table.MAD $tmp/$table-good.MAD
+ cmp $table.MAI $tmp/$table-good.MAI
+ check_table_is_same
+ shift
+done
+
+echo "Testing the REDO AND UNDO PHASE"
+# The test programs look like:
+# work; commit (time T1); work; exit-without-commit (time T2)
+# We first run the test program and let it exit after T1's commit.
+# Then we run it again and let it exit at T2. Then we compare
+# and expect identity.
+
+for take_checkpoint in "no" "yes"
+do
+for blobs in "" "-b" # we test table without blobs and then table with blobs
+do
+ for test_undo in 1 2 3 4
+ do
+ # first iteration tests rollback of insert, second tests rollback of delete
+ set -- "ma_test1 $silent -M -T -c -N $blobs -H1" "--testflag=1" "--testflag=2 --test-undo=" "ma_test1 $silent -M -T -c -N $blobs -H2" "--testflag=3" "--testflag=4 --test-undo=" "ma_test1 $silent -M -T -c -N $blobs -H2 " "--testflag=2" "--testflag=3 --test-undo=" "ma_test2 $silent -L -K -W -P -M -T -c $blobs -H1" "-t1" "-t2 -u"
+ # -N (create NULL fields) is needed because --test-undo adds it anyway
+ while [ $# != 0 ]
+ do
+ prog=$1
+ if [ "$take_checkpoint" == "no" ]
+ then
+ prog=`echo $prog | sed 's/ -H[0-9]//'`
+ fi
+ commit_run_args=$2
+ abort_run_args=$3;
+ rm -f maria_log.* maria_log_control
+ echo "TEST WITH $prog $commit_run_args (commit at end)"
+ $maria_path/$prog $commit_run_args
+ # derive table's name from program's name
+ table=`echo $prog | sed -e 's;.*ma_\(test[0-9]\).*;\1;' `
+ $maria_path/maria_chk -dvv $table | grep -v "Creation time:" | grep -v "file length"> $tmp/maria_chk_message.good.txt 2>&1
+ checksum=`$maria_path/maria_chk -dss $table`
+ mv $table.MAD $tmp/$table-good.MAD
+ mv $table.MAI $tmp/$table-good.MAI
+ rm maria_log.* maria_log_control
+ echo "TEST WITH $prog $abort_run_args$test_undo (additional aborted work)"
+ $maria_path/$prog $abort_run_args$test_undo
+ cp $table.MAD $tmp/$table.MAD.before_undo
+ cp $table.MAI $tmp/$table.MAI.before_undo
+
+ # The lines below seem unneeded, will be removed soon
+ # We have to copy and restore logs, as running maria_read_log will
+ # change the maria_control_file
+# rm -f $tmp/maria_log.* $tmp/maria_log_control
+# cp $maria_path/maria_log* $tmp
+
+ if [ "$test_undo" != "3" ]
+ then
+ apply_log "shouldchangelog" # should undo aborted work
+ else
+ # probably nothing to undo went to log or data file
+ apply_log "dontknow"
+ fi
+ cp $table.MAD $tmp/$table.MAD.after_undo
+ cp $table.MAI $tmp/$table.MAI.after_undo
+
+ # It is impossible to do a "cmp" between .good and .after_undo,
+ # because the UNDO phase generated log
+ # records whose LSN tagged pages. Another reason is that rolling back
+ # INSERT only marks the rows free, does not empty them (optimization), so
+ # traces of the INSERT+rollback remain.
+
+ check_table_is_same
+ echo "testing idempotency"
+ apply_log "shouldnotchangelog"
+ cmp $table.MAD $tmp/$table.MAD.after_undo
+ # can't do this, creation time differs at least; enable it if you
+ # have a "cmp" which ignores the header.
+# cmp $table.MAI $tmp/$table.MAI.after_undo
+ check_table_is_same
+ echo "testing applying of CLRs to recreate table"
+ rm $table.MA?
+# cp $tmp/maria_log* $maria_path #unneeded
+ apply_log "shouldnotchangelog"
+ cmp $table.MAD $tmp/$table.MAD.after_undo
+ # can't do this, creation time differs at least
+# cmp $table.MAI $tmp/$table.MAI.after_undo
+ check_table_is_same
+ shift 3
+ done
+ rm -f $table.* $tmp/$table* $tmp/maria_chk_*.txt $tmp/maria_read_log_$table.txt
+done
+done
+done
+
+) 2>&1 > $tmp/ma_test_recovery.output
+
+# also note that maria_chk -dvv shows differences for ma_test2 in UNDO phase,
+# this is normal: removing records does not shrink the data/key file,
+# does not put back the "analyzed,optimized keys"(etc) index state.
+diff $maria_path/ma_test_recovery.expected $tmp/ma_test_recovery.output > /dev/null || diff_failed=1
+if [ "$diff_failed" == "1" ]
+ then
+ echo "UNEXPECTED OUTPUT OF TESTS, FAILED"
+ echo "For more info, do diff $maria_path/ma_test_recovery.expected $tmp/ma_test_recovery.output"
+ exit 1
+ fi
+echo "ALL RECOVERY TESTS OK"
diff --git a/storage/maria/ma_test_recovery.expected b/storage/maria/ma_test_recovery.expected
new file mode 100644
index 00000000000..814b0cc2eb8
--- /dev/null
+++ b/storage/maria/ma_test_recovery.expected
@@ -0,0 +1,942 @@
+Testing the REDO PHASE ALONE
+TEST WITH ma_test1 -s -M -T -c
+applying log
+testing idempotency
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c
+applying log
+testing idempotency
+applying log
+TEST WITH ma_test2 -s -M -T -c -b65000
+applying log
+testing idempotency
+applying log
+Testing the REDO AND UNDO PHASE
+TEST WITH ma_test1 -s -M -T -c -N --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 --test-undo=1 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N --testflag=4 --test-undo=1 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 --test-undo=1 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -u1 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 --test-undo=2 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N --testflag=4 --test-undo=2 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 --test-undo=2 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -u2 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 --test-undo=3 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N --testflag=4 --test-undo=3 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 --test-undo=3 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -u3 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 --test-undo=4 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N --testflag=4 --test-undo=4 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N --testflag=3 --test-undo=4 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -u4 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 --test-undo=1 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=4 --test-undo=1 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 --test-undo=1 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t2 -u1 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 --test-undo=2 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=4 --test-undo=2 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 --test-undo=2 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t2 -u2 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 --test-undo=3 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=4 --test-undo=3 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 --test-undo=3 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t2 -u3 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 --test-undo=4 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=4 --test-undo=4 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b --testflag=3 --test-undo=4 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -t2 -u4 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=2 --test-undo=1 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=4 --test-undo=1 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 --test-undo=1 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -u1 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=2 --test-undo=2 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=4 --test-undo=2 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 --test-undo=2 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -u2 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=2 --test-undo=3 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=4 --test-undo=3 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 --test-undo=3 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -u3 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -H1 --testflag=2 --test-undo=4 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=4 --test-undo=4 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -H2 --testflag=3 --test-undo=4 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -u4 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=2 --test-undo=1 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=4 --test-undo=1 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 --test-undo=1 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t2 -u1 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=2 --test-undo=2 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=4 --test-undo=2 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 --test-undo=2 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t2 -u2 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=2 --test-undo=3 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=4 --test-undo=3 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 --test-undo=3 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t2 -u3 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=1 (commit at end)
+TEST WITH ma_test1 -s -M -T -c -N -b -H1 --testflag=2 --test-undo=4 (additional aborted work)
+Terminating after inserts
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 (commit at end)
+Terminating after updates
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=4 --test-undo=4 (additional aborted work)
+Terminating after deletes
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=2 (commit at end)
+Terminating after inserts
+TEST WITH ma_test1 -s -M -T -c -N -b -H2 --testflag=3 --test-undo=4 (additional aborted work)
+Terminating after updates
+Dying on request without maria_commit()/maria_close()
+applying log
+testing idempotency
+applying log
+testing applying of CLRs to recreate table
+applying log
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t1 (commit at end)
+TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b -H1 -t2 -u4 (additional aborted work)
+Dying on request without maria_commit()/maria_close()
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing idempotency
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
+testing applying of CLRs to recreate table
+applying log
+Differences in maria_chk -dvv, recovery not yet perfect !
+========DIFF START=======
+6c6
+< Status: checked,analyzed,optimized keys,sorted index pages
+---
+> Status: changed
+========DIFF END=======
diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c
new file mode 100644
index 00000000000..3ab717887c7
--- /dev/null
+++ b/storage/maria/ma_unique.c
@@ -0,0 +1,235 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Functions to check if a row is unique */
+
+#include "maria_def.h"
+#include <m_ctype.h>
+
+my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record,
+ ha_checksum unique_hash, my_off_t disk_pos)
+{
+ my_off_t lastpos=info->cur_row.lastpos;
+ MARIA_KEYDEF *key= &info->s->keyinfo[def->key];
+ uchar *key_buff= info->lastkey2;
+ DBUG_ENTER("_ma_check_unique");
+ DBUG_PRINT("enter",("unique_hash: %lu", (ulong) unique_hash));
+
+ maria_unique_store(record+key->seg->start, unique_hash);
+ _ma_make_key(info,def->key,key_buff,record,0);
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ if (_ma_search(info,info->s->keyinfo+def->key,key_buff,
+ MARIA_UNIQUE_HASH_LENGTH,
+ SEARCH_FIND,info->s->state.key_root[def->key]))
+ {
+ info->page_changed=1; /* Can't optimize read next */
+ info->cur_row.lastpos= lastpos;
+ DBUG_RETURN(0); /* No matching rows */
+ }
+
+ for (;;)
+ {
+ if (info->cur_row.lastpos != disk_pos &&
+ !(*info->s->compare_unique)(info,def,record,info->cur_row.lastpos))
+ {
+ my_errno=HA_ERR_FOUND_DUPP_UNIQUE;
+ info->errkey= (int) def->key;
+ info->dup_key_pos= info->cur_row.lastpos;
+ info->page_changed= 1; /* Can't optimize read next */
+ info->cur_row.lastpos= lastpos;
+ DBUG_PRINT("info",("Found duplicate"));
+ DBUG_RETURN(1); /* Found identical */
+ }
+ if (_ma_search_next(info,info->s->keyinfo+def->key, info->lastkey,
+ MARIA_UNIQUE_HASH_LENGTH, SEARCH_BIGGER,
+ info->s->state.key_root[def->key]) ||
+ bcmp((char*) info->lastkey, (char*) key_buff,
+ MARIA_UNIQUE_HASH_LENGTH))
+ {
+ info->page_changed= 1; /* Can't optimize read next */
+ info->cur_row.lastpos= lastpos;
+ DBUG_RETURN(0); /* end of tree */
+ }
+ }
+}
+
+
+/*
+ Calculate a hash for a row
+
+ TODO
+ Add support for bit fields
+*/
+
+ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *record)
+{
+ const uchar *pos, *end;
+ ha_checksum crc= 0;
+ ulong seed1=0, seed2= 4;
+ HA_KEYSEG *keyseg;
+
+ for (keyseg=def->seg ; keyseg < def->end ; keyseg++)
+ {
+ enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
+ uint length=keyseg->length;
+
+ if (keyseg->null_bit)
+ {
+ if (record[keyseg->null_pos] & keyseg->null_bit)
+ {
+ /*
+ Change crc in a way different from an empty string or 0.
+ (This is an optimisation; The code will work even if this isn't
+ done)
+ */
+ crc=((crc << 8) + 511+
+ (crc >> (8*sizeof(ha_checksum)-8)));
+ continue;
+ }
+ }
+ pos= record+keyseg->start;
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= keyseg->bit_start;
+ uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos :
+ uint2korr(pos));
+ pos+= pack_length; /* Skip VARCHAR length */
+ set_if_smaller(length,tmp_length);
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos);
+ memcpy_fixed((uchar*) &pos,pos+keyseg->bit_start,sizeof(char*));
+ if (!length || length > tmp_length)
+ length=tmp_length; /* The whole blob */
+ }
+ end= pos+length;
+ if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
+ type == HA_KEYTYPE_VARTEXT2)
+ {
+ keyseg->charset->coll->hash_sort(keyseg->charset,
+ (const uchar*) pos, length, &seed1,
+ &seed2);
+ crc^= seed1;
+ }
+ else
+ while (pos != end)
+ crc=((crc << 8) +
+ (((uchar) *(uchar*) pos++))) +
+ (crc >> (8*sizeof(ha_checksum)-8));
+ }
+ return crc;
+}
+
+
+/*
+ compare unique key for two rows
+
+ TODO
+ Add support for bit fields
+
+ RETURN
+ 0 if both rows have equal unique value
+ 1 Rows are different
+*/
+
+my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const uchar *a, const uchar *b,
+ my_bool null_are_equal)
+{
+ const uchar *pos_a, *pos_b, *end;
+ HA_KEYSEG *keyseg;
+
+ for (keyseg=def->seg ; keyseg < def->end ; keyseg++)
+ {
+ enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
+ uint a_length, b_length;
+ a_length= b_length= keyseg->length;
+
+ /* If part is NULL it's regarded as different */
+ if (keyseg->null_bit)
+ {
+ uint tmp;
+ if ((tmp=(a[keyseg->null_pos] & keyseg->null_bit)) !=
+ (uint) (b[keyseg->null_pos] & keyseg->null_bit))
+ return 1;
+ if (tmp)
+ {
+ if (!null_are_equal)
+ return 1;
+ continue;
+ }
+ }
+ pos_a= a+keyseg->start;
+ pos_b= b+keyseg->start;
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= keyseg->bit_start;
+ if (pack_length == 1)
+ {
+ a_length= (uint) *(uchar*) pos_a++;
+ b_length= (uint) *(uchar*) pos_b++;
+ }
+ else
+ {
+ a_length= uint2korr(pos_a);
+ b_length= uint2korr(pos_b);
+ pos_a+= 2; /* Skip VARCHAR length */
+ pos_b+= 2;
+ }
+ set_if_smaller(a_length, keyseg->length); /* Safety */
+ set_if_smaller(b_length, keyseg->length); /* safety */
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ /* Only compare 'length' characters if length != 0 */
+ a_length= _ma_calc_blob_length(keyseg->bit_start,pos_a);
+ b_length= _ma_calc_blob_length(keyseg->bit_start,pos_b);
+ /* Check that a and b are of equal length */
+ if (keyseg->length)
+ {
+ /*
+ This is used in some cases when we are not interested in comparing
+ the whole length of the blob.
+ */
+ set_if_smaller(a_length, keyseg->length);
+ set_if_smaller(b_length, keyseg->length);
+ }
+ memcpy_fixed((uchar*) &pos_a,pos_a+keyseg->bit_start,sizeof(char*));
+ memcpy_fixed((uchar*) &pos_b,pos_b+keyseg->bit_start,sizeof(char*));
+ }
+ if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
+ type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
+ (uchar *) pos_b, b_length, 0, 1))
+ return 1;
+ }
+ else
+ {
+ if (a_length != b_length)
+ return 1;
+ end= pos_a+a_length;
+ while (pos_a != end)
+ {
+ if (*pos_a++ != *pos_b++)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c
new file mode 100644
index 00000000000..3d670e0d966
--- /dev/null
+++ b/storage/maria/ma_update.c
@@ -0,0 +1,241 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Update an old row in a MARIA table */
+
+#include "ma_fulltext.h"
+#include "ma_rt_index.h"
+
+int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec)
+{
+ int flag,key_changed,save_errno;
+ reg3 my_off_t pos;
+ uint i;
+ uchar old_key[HA_MAX_KEY_BUFF],*new_key;
+ my_bool auto_key_changed= 0;
+ ulonglong changed;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_update");
+ LINT_INIT(new_key);
+ LINT_INIT(changed);
+
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage",
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
+ if (!(info->update & HA_STATE_AKTIV))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND);
+ }
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ if (info->state->key_file_length >= share->base.margin_key_file_length)
+ {
+ DBUG_RETURN(my_errno=HA_ERR_INDEX_FILE_FULL);
+ }
+ pos= info->cur_row.lastpos;
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+
+ if ((*share->compare_record)(info,oldrec))
+ {
+ save_errno= my_errno;
+ DBUG_PRINT("warning", ("Got error from compare record"));
+ goto err_end; /* Record has changed */
+ }
+
+ /* Calculate and check all unique constraints */
+ key_changed=0;
+ for (i=0 ; i < share->state.header.uniques ; i++)
+ {
+ MARIA_UNIQUEDEF *def=share->uniqueinfo+i;
+ if (_ma_unique_comp(def, newrec, oldrec,1) &&
+ _ma_check_unique(info, def, newrec, _ma_unique_hash(def, newrec),
+ pos))
+ {
+ save_errno=my_errno;
+ goto err_end;
+ }
+ }
+ if (_ma_mark_file_changed(info))
+ {
+ save_errno=my_errno;
+ goto err_end;
+ }
+
+ /* Check which keys changed from the original row */
+
+ new_key= info->lastkey2;
+ changed=0;
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ if (share->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_cmp(info,i,oldrec, newrec))
+ {
+ if ((int) i == info->lastinx)
+ {
+ /*
+ We are changeing the index we are reading on. Mark that
+ the index data has changed and we need to do a full search
+ when doing read-next
+ */
+ key_changed|=HA_STATE_WRITTEN;
+ }
+ changed|=((ulonglong) 1 << i);
+ if (_ma_ft_update(info,i,old_key,oldrec,newrec,pos))
+ goto err;
+ }
+ }
+ else
+ {
+ uint new_length= _ma_make_key(info,i,new_key,newrec,pos);
+ uint old_length= _ma_make_key(info,i,old_key,oldrec,pos);
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ if (new_length != old_length ||
+ memcmp(old_key, new_key, new_length))
+ {
+ if ((int) i == info->lastinx)
+ key_changed|=HA_STATE_WRITTEN; /* Mark that keyfile changed */
+ changed|=((ulonglong) 1 << i);
+ share->keyinfo[i].version++;
+ if (share->keyinfo[i].ck_delete(info,i,old_key,old_length)) goto err;
+ if (share->keyinfo[i].ck_insert(info,i,new_key,new_length)) goto err;
+ if (share->base.auto_key == i+1)
+ auto_key_changed=1;
+ }
+ }
+ }
+ }
+
+ if (share->calc_checksum)
+ {
+ /*
+ We can't use the row based checksum as this doesn't have enough
+ precision (one byte, while the table's is more bytes).
+ At least _ma_check_unique() modifies the 'newrec' record, so checksum
+ has to be computed _after_ it. Nobody apparently modifies 'oldrec'.
+ We need to pass the old row's checksum down to (*update_record)(), we do
+ this via info->new_row.checksum (not intuitive but existing code
+ mandated that cur_row is the new row).
+ If (*update_record)() fails, table will be marked corrupted so no need
+ to revert the live checksum change.
+ */
+ info->state->checksum+= !share->now_transactional *
+ ((info->cur_row.checksum= (*share->calc_checksum)(info, newrec)) -
+ (info->new_row.checksum= (*share->calc_checksum)(info, oldrec)));
+ }
+ {
+ /*
+ Don't update index file if data file is not extended and no status
+ information changed
+ */
+ MARIA_STATUS_INFO state;
+ ha_rows org_split;
+ my_off_t org_delete_link;
+
+ memcpy((char*) &state, (char*) info->state, sizeof(state));
+ org_split= share->state.split;
+ org_delete_link= share->state.dellink;
+ if ((*share->update_record)(info, pos, oldrec, newrec))
+ goto err;
+ }
+ if (auto_key_changed & !share->now_transactional)
+ {
+ const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
+ const uchar *key= newrec + keyseg->start;
+ set_if_bigger(share->state.auto_increment,
+ ma_retrieve_auto_increment(key, keyseg->type));
+ }
+
+ /*
+ We can't yet have HA_STATE_AKTIV here, as block_record dosn't support
+ it
+ */
+ info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | key_changed);
+
+ /*
+ Every Maria function that updates Maria table must end with
+ call to _ma_writeinfo(). If operation (second param of
+ _ma_writeinfo()) is not 0 it sets share->changed to 1, that is
+ flags that data has changed. If operation is 0, this function
+ equals to no-op in this case.
+
+ ma_update() must always pass !0 value as operation, since even if
+ there is no index change there could be data change.
+ */
+ VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ if (info->invalidator != 0)
+ {
+ DBUG_PRINT("info", ("invalidator... '%s' (update)", share->open_file_name));
+ (*info->invalidator)(share->open_file_name);
+ info->invalidator=0;
+ }
+ DBUG_RETURN(0);
+
+err:
+ DBUG_PRINT("error",("key: %d errno: %d",i,my_errno));
+ save_errno=my_errno;
+ if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_OUT_OF_MEM ||
+ my_errno == HA_ERR_RECORD_FILE_FULL)
+ {
+ info->errkey= (int) i;
+ flag=0;
+ do
+ {
+ if (((ulonglong) 1 << i) & changed)
+ {
+ if (share->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if ((flag++ && _ma_ft_del(info,i,new_key,newrec,pos)) ||
+ _ma_ft_add(info,i,old_key,oldrec,pos))
+ break;
+ }
+ else
+ {
+ uint new_length= _ma_make_key(info,i,new_key,newrec,pos);
+ uint old_length= _ma_make_key(info,i,old_key,oldrec,pos);
+ if ((flag++ && _ma_ck_delete(info,i,new_key,new_length)) ||
+ _ma_ck_write(info,i,old_key,old_length))
+ break;
+ }
+ }
+ } while (i-- != 0);
+ }
+ else
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+ info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED |
+ key_changed);
+
+ err_end:
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ if (save_errno == HA_ERR_KEY_NOT_FOUND)
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ save_errno=HA_ERR_CRASHED;
+ }
+ DBUG_RETURN(my_errno=save_errno);
+} /* maria_update */
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
new file mode 100644
index 00000000000..4192009779e
--- /dev/null
+++ b/storage/maria/ma_write.c
@@ -0,0 +1,2115 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Write a row to a MARIA table */
+
+#include "ma_fulltext.h"
+#include "ma_rt_index.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+#include "ma_blockrec.h"
+
+#define MAX_POINTER_LENGTH 8
+
+ /* Functions declared in this file */
+
+static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uint comp_flag, uchar *key, uint key_length, my_off_t page,
+ my_off_t father_page, uchar *father_buff,
+ MARIA_PINNED_PAGE *father_page_link, uchar *father_keypos,
+ my_bool insert_last);
+static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ uchar *key, uchar *curr_buff, my_off_t page,
+ my_off_t father_page, uchar *father_buff,
+ uchar *father_keypos,
+ MARIA_KEY_PARAM *s_temp);
+static uchar *_ma_find_last_pos(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page, uchar *key,
+ uint *return_key_length, uchar **after_key);
+static int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,uchar *key,
+ uint key_length);
+static int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,uchar *key,
+ uint key_length);
+static int _ma_ck_write_btree_with_log(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ my_off_t *root, uint comp_flag);
+static my_bool _ma_log_new(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint page_length, uint key_nr, my_bool root_page);
+static my_bool _ma_log_change(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint length);
+static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos,
+ uint key_length, int move_length,
+ enum en_key_op prefix_or_suffix,
+ uchar *data, uint data_length,
+ uint changed_length);
+static my_bool _ma_log_del_prefix(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos, uint key_length,
+ int move_length);
+static my_bool _ma_log_key_middle(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint new_length,
+ uint data_added_first,
+ uint data_changed_first,
+ uint data_deleted_last,
+ uchar *key_pos,
+ uint key_length, int move_length);
+
+/*
+ @brief Default handler for returing position to new row
+*/
+
+MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info,
+ const uchar *record
+ __attribute__((unused)))
+{
+ return ((info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end) ?
+ info->s->state.dellink :
+ info->state->data_file_length);
+}
+
+my_bool _ma_write_abort_default(MARIA_HA *info __attribute__((unused)))
+{
+ return 0;
+}
+
+
+/* Write new record to a table */
+
+int maria_write(MARIA_HA *info, uchar *record)
+{
+ MARIA_SHARE *share= info->s;
+ uint i;
+ int save_errno;
+ MARIA_RECORD_POS filepos;
+ uchar *buff;
+ my_bool lock_tree= share->concurrent_insert;
+ my_bool fatal_error;
+ DBUG_ENTER("maria_write");
+ DBUG_PRINT("enter",("index_file: %d data_file: %d",
+ share->kfile.file, info->dfile.file));
+
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage",
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+ dont_break(); /* Dont allow SIGHUP or SIGINT */
+
+ if (share->base.reloc == (ha_rows) 1 &&
+ share->base.records == (ha_rows) 1 &&
+ info->state->records == (ha_rows) 1)
+ { /* System file */
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ goto err2;
+ }
+ if (info->state->key_file_length >= share->base.margin_key_file_length)
+ {
+ my_errno=HA_ERR_INDEX_FILE_FULL;
+ goto err2;
+ }
+ if (_ma_mark_file_changed(info))
+ goto err2;
+
+ /* Calculate and check all unique constraints */
+ for (i=0 ; i < share->state.header.uniques ; i++)
+ {
+ if (_ma_check_unique(info,share->uniqueinfo+i,record,
+ _ma_unique_hash(share->uniqueinfo+i,record),
+ HA_OFFSET_ERROR))
+ goto err2;
+ }
+
+ if ((info->opt_flag & OPT_NO_ROWS))
+ filepos= HA_OFFSET_ERROR;
+ else
+ {
+ /*
+ This may either calculate a record or, or write the record and return
+ the record id
+ */
+ if ((filepos= (*share->write_record_init)(info, record)) ==
+ HA_OFFSET_ERROR)
+ goto err2;
+ }
+
+ /* Write all keys to indextree */
+ buff= info->lastkey2;
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ bool local_lock_tree= (lock_tree &&
+ !(info->bulk_insert &&
+ is_tree_inited(&info->bulk_insert[i])));
+ if (local_lock_tree)
+ {
+ rw_wrlock(&share->key_root_lock[i]);
+ share->keyinfo[i].version++;
+ }
+ if (share->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_add(info,i, buff,record,filepos))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ DBUG_PRINT("error",("Got error: %d on write",my_errno));
+ goto err;
+ }
+ }
+ else
+ {
+ if (share->keyinfo[i].ck_insert(info,i,buff,
+ _ma_make_key(info,i,buff,record,
+ filepos)))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ DBUG_PRINT("error",("Got error: %d on write",my_errno));
+ goto err;
+ }
+ }
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ }
+ }
+ if (share->calc_write_checksum)
+ info->cur_row.checksum= (*share->calc_write_checksum)(info,record);
+ if (filepos != HA_OFFSET_ERROR)
+ {
+ if ((*share->write_record)(info,record))
+ goto err;
+ /**
+ @todo when we enable multiple writers, we will have to protect
+ 'records' and 'checksum' somehow.
+ */
+ info->state->checksum+= !share->now_transactional *
+ info->cur_row.checksum;
+ }
+ if ((share->base.auto_key != 0) & !share->now_transactional)
+ {
+ const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
+ const uchar *key= record + keyseg->start;
+ set_if_bigger(share->state.auto_increment,
+ ma_retrieve_auto_increment(key, keyseg->type));
+ }
+ info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_WRITTEN |
+ HA_STATE_ROW_CHANGED);
+ info->state->records+= !share->now_transactional; /*otherwise already done*/
+ info->cur_row.lastpos= filepos;
+ VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE));
+ if (info->invalidator != 0)
+ {
+ DBUG_PRINT("info", ("invalidator... '%s' (update)",
+ share->open_file_name));
+ (*info->invalidator)(share->open_file_name);
+ info->invalidator=0;
+ }
+
+ /*
+ Update status of the table. We need to do so after each row write
+ for the log tables, as we want the new row to become visible to
+ other threads as soon as possible. We don't lock mutex here
+ (as it is required by pthread memory visibility rules) as (1) it's
+ not critical to use outdated share->is_log_table value (2) locking
+ mutex here for every write is too expensive.
+ */
+ if (share->is_log_table)
+ _ma_update_status((void*) info);
+
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(0);
+
+err:
+ save_errno= my_errno;
+ fatal_error= 0;
+ if (my_errno == HA_ERR_FOUND_DUPP_KEY ||
+ my_errno == HA_ERR_RECORD_FILE_FULL ||
+ my_errno == HA_ERR_NULL_IN_SPATIAL ||
+ my_errno == HA_ERR_OUT_OF_MEM)
+ {
+ if (info->bulk_insert)
+ {
+ uint j;
+ for (j=0 ; j < share->base.keys ; j++)
+ maria_flush_bulk_insert(info, j);
+ }
+ info->errkey= (int) i;
+ /*
+ We delete keys in the reverse order of insertion. This is the order that
+ a rollback would do and is important for CLR_ENDs generated by
+ _ma_ft|ck_delete() and write_record_abort() to work (with any other
+ order they would cause wrong jumps in the chain).
+ */
+ while ( i-- > 0)
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ bool local_lock_tree= (lock_tree &&
+ !(info->bulk_insert &&
+ is_tree_inited(&info->bulk_insert[i])));
+ if (local_lock_tree)
+ rw_wrlock(&share->key_root_lock[i]);
+ /**
+ @todo RECOVERY BUG
+ The key deletes below should generate CLR_ENDs
+ */
+ if (share->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if (_ma_ft_del(info,i,buff,record,filepos))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ break;
+ }
+ }
+ else
+ {
+ uint key_length= _ma_make_key(info,i,buff,record,filepos);
+ if (_ma_ck_delete(info,i,buff,key_length))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ break;
+ }
+ }
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ }
+ }
+ }
+ else
+ fatal_error= 1;
+
+ if ((*share->write_record_abort)(info))
+ fatal_error= 1;
+ if (fatal_error)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+
+ info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED);
+ my_errno=save_errno;
+err2:
+ save_errno=my_errno;
+ DBUG_ASSERT(save_errno);
+ DBUG_PRINT("error", ("got error: %d", save_errno));
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(my_errno=save_errno);
+} /* maria_write */
+
+
+ /* Write one key to btree */
+
+int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key, uint key_length)
+{
+ DBUG_ENTER("_ma_ck_write");
+
+ if (info->bulk_insert && is_tree_inited(&info->bulk_insert[keynr]))
+ {
+ DBUG_RETURN(_ma_ck_write_tree(info, keynr, key, key_length));
+ }
+ DBUG_RETURN(_ma_ck_write_btree(info, keynr, key, key_length));
+} /* _ma_ck_write */
+
+
+/**********************************************************************
+ Insert key into btree (normal case)
+**********************************************************************/
+
+static int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length)
+{
+ int error;
+ MARIA_KEYDEF *keyinfo=info->s->keyinfo+keynr;
+ my_off_t *root=&info->s->state.key_root[keynr];
+ DBUG_ENTER("_ma_ck_write_btree");
+
+ error= _ma_ck_write_btree_with_log(info, keyinfo, key, key_length,
+ root, keyinfo->write_comp_flag);
+ if (info->ft1_to_ft2)
+ {
+ if (!error)
+ error= _ma_ft_convert_to_ft2(info, keynr, key);
+ delete_dynamic(info->ft1_to_ft2);
+ my_free((uchar*)info->ft1_to_ft2, MYF(0));
+ info->ft1_to_ft2=0;
+ }
+ DBUG_RETURN(error);
+} /* _ma_ck_write_btree */
+
+
+/**
+ @brief Write a key to the b-tree
+
+ @retval -1 error
+ @retval 0 ok
+*/
+
+static int _ma_ck_write_btree_with_log(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ my_off_t *root, uint comp_flag)
+{
+ MARIA_SHARE *share= info->s;
+ LSN lsn= LSN_IMPOSSIBLE;
+ int error;
+ my_off_t new_root= *root;
+ uchar key_buff[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("_ma_ck_write_btree_with_log");
+
+ if (share->now_transactional)
+ {
+ /* Save original value as the key may change */
+ memcpy(key_buff, key, key_length + share->rec_reflength);
+ }
+
+ error= _ma_ck_real_write_btree(info, keyinfo, key, key_length, &new_root,
+ comp_flag);
+ if (!error && share->now_transactional)
+ {
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ KEY_NR_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ struct st_msg_to_write_hook_for_undo_key msg;
+
+ /* Save if we need to write a clr record */
+ info->key_write_undo_lsn[keyinfo->key_nr]= info->trn->undo_lsn;
+ lsn_store(log_data, info->trn->undo_lsn);
+ key_nr_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
+ keyinfo->key_nr);
+ key_length+= share->rec_reflength;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_buff;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
+
+ msg.root= root;
+ msg.value= new_root;
+ msg.auto_increment= 0;
+ if (share->base.auto_key == ((uint)keyinfo->key_nr + 1))
+ {
+ const HA_KEYSEG *keyseg= keyinfo->seg;
+ uchar *to= key_buff;
+ if (keyseg->flag & HA_SWAP_KEY)
+ {
+ /* We put key from log record to "data record" packing format... */
+ uchar reversed[HA_MAX_KEY_BUFF];
+ uchar *key_ptr= to;
+ uchar *key_end= key_ptr + keyseg->length;
+ to= reversed + keyseg->length;
+ do
+ {
+ *--to= *key_ptr++;
+ } while (key_ptr != key_end);
+ }
+ /* ... so that we can read it with: */
+ msg.auto_increment=
+ ma_retrieve_auto_increment(to, keyseg->type);
+ /* and write_hook_for_undo_key_insert() will pick this. */
+ }
+
+ if (translog_write_record(&lsn, LOGREC_UNDO_KEY_INSERT,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length +
+ key_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data + LSN_STORE_SIZE, &msg))
+ error= -1;
+ }
+ else
+ {
+ *root= new_root;
+ _ma_fast_unlock_key_del(info);
+ }
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+
+ DBUG_RETURN(error);
+} /* _ma_ck_write_btree_with_log */
+
+
+/**
+ @brief Write a key to the b-tree
+
+ @retval -1 error
+ @retval 0 ok
+*/
+
+int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length, my_off_t *root,
+ uint comp_flag)
+{
+ int error;
+ DBUG_ENTER("_ma_ck_real_write_btree");
+
+ /* key_length parameter is used only if comp_flag is SEARCH_FIND */
+ if (*root == HA_OFFSET_ERROR ||
+ (error= w_search(info, keyinfo, comp_flag, key, key_length,
+ *root, (my_off_t) 0, (uchar*) 0,
+ (MARIA_PINNED_PAGE *) 0, (uchar*) 0, 1)) > 0)
+ error= _ma_enlarge_root(info, keyinfo, key, root);
+ DBUG_RETURN(error);
+} /* _ma_ck_real_write_btree */
+
+
+/**
+ @brief Make a new root with key as only pointer
+
+ @retval -1 error
+ @retval 0 ok
+*/
+
+int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, const uchar *key,
+ my_off_t *root)
+{
+ uint t_length, nod_flag, page_length;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_SHARE *share= info->s;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ int res= 0;
+ DBUG_ENTER("_ma_enlarge_root");
+
+ nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0;
+ /* Store pointer to prev page if nod */
+ _ma_kpointer(info, info->buff + share->keypage_header, *root);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar*) 0,
+ (uchar*) 0, (uchar*) 0, key,&s_temp);
+ page_length= share->keypage_header + t_length + nod_flag;
+
+ bzero(info->buff, share->keypage_header);
+ _ma_store_keynr(share, info->buff, keyinfo->key_nr);
+ _ma_store_page_used(share, info->buff, page_length);
+ if (nod_flag)
+ _ma_store_keypage_flag(share, info->buff, KEYPAGE_FLAG_ISNOD);
+ (*keyinfo->store_key)(keyinfo, info->buff + share->keypage_header +
+ nod_flag, &s_temp);
+
+ /* Mark that info->buff was used */
+ info->keyread_buff_used= info->page_changed= 1;
+ if ((*root= _ma_new(info, PAGECACHE_PRIORITY_HIGH, &page_link)) ==
+ HA_OFFSET_ERROR)
+ DBUG_RETURN(-1);
+
+ /*
+ Clear unitialized part of page to avoid valgrind/purify warnings
+ and to get a clean page that is easier to compress and compare with
+ pages generated with redo
+ */
+ bzero(info->buff + page_length, share->block_size - page_length);
+
+
+ if (share->now_transactional &&
+ _ma_log_new(info, *root, info->buff, page_length, keyinfo->key_nr, 1))
+ res= -1;
+ if (_ma_write_keypage(info, keyinfo, *root, page_link->write_lock,
+ PAGECACHE_PRIORITY_HIGH, info->buff))
+ res= -1;
+
+ DBUG_RETURN(res);
+} /* _ma_enlarge_root */
+
+
+/*
+ Search after a position for a key and store it there
+
+ @return
+ @retval -1 error
+ @retval 0 ok
+ @retval > 0 Key should be stored in higher tree
+*/
+
+static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uint comp_flag, uchar *key, uint key_length, my_off_t page,
+ my_off_t father_page, uchar *father_buff,
+ MARIA_PINNED_PAGE *father_page_link, uchar *father_keypos,
+ my_bool insert_last)
+{
+ int error,flag;
+ uint nod_flag, search_key_length;
+ uchar *temp_buff,*keypos;
+ uchar keybuff[HA_MAX_KEY_BUFF];
+ my_bool was_last_key;
+ my_off_t next_page, dup_key_pos;
+ MARIA_PINNED_PAGE *page_link;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("w_search");
+ DBUG_PRINT("enter",("page: %ld", (long) page));
+
+ search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY;
+ if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ DBUG_RETURN(-1);
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, temp_buff, 0, &page_link))
+ goto err;
+
+ flag=(*keyinfo->bin_search)(info,keyinfo,temp_buff,key,search_key_length,
+ comp_flag, &keypos, keybuff, &was_last_key);
+ nod_flag= _ma_test_if_nod(share, temp_buff);
+ if (flag == 0)
+ {
+ uint tmp_key_length;
+ /* get position to record with duplicated key */
+ tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,keybuff);
+ if (tmp_key_length)
+ dup_key_pos= _ma_dpos(info,0,keybuff+tmp_key_length);
+ else
+ dup_key_pos= HA_OFFSET_ERROR;
+
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ uint off;
+ int subkeys;
+
+ get_key_full_length_rdonly(off, keybuff);
+ subkeys=ft_sintXkorr(keybuff+off);
+ comp_flag=SEARCH_SAME;
+ if (subkeys >= 0)
+ {
+ /* normal word, one-level tree structure */
+ flag=(*keyinfo->bin_search)(info, keyinfo, temp_buff, key,
+ USE_WHOLE_KEY, comp_flag,
+ &keypos, keybuff, &was_last_key);
+ }
+ else
+ {
+ /* popular word. two-level tree. going down */
+ my_off_t root=dup_key_pos;
+ keyinfo= &share->ft2_keyinfo;
+ get_key_full_length_rdonly(off, key);
+ key+=off;
+ /* we'll modify key entry 'in vivo' */
+ keypos-= keyinfo->keylength + nod_flag;
+ error= _ma_ck_real_write_btree(info, keyinfo, key, 0,
+ &root, comp_flag);
+ _ma_dpointer(info, keypos+HA_FT_WLEN, root);
+ subkeys--; /* should there be underflow protection ? */
+ DBUG_ASSERT(subkeys < 0);
+ ft_intXstore(keypos, subkeys);
+ if (!error)
+ {
+ page_link->changed= 1;
+ error= _ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, temp_buff);
+ }
+ my_afree((uchar*) temp_buff);
+ DBUG_RETURN(error);
+ }
+ }
+ else /* not HA_FULLTEXT, normal HA_NOSAME key */
+ {
+ DBUG_PRINT("warning", ("Duplicate key"));
+ info->dup_key_pos= dup_key_pos;
+ my_afree((uchar*) temp_buff);
+ my_errno=HA_ERR_FOUND_DUPP_KEY;
+ DBUG_RETURN(-1);
+ }
+ }
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ DBUG_RETURN(-1);
+ if (!was_last_key)
+ insert_last=0;
+ next_page= _ma_kpos(nod_flag,keypos);
+ if (next_page == HA_OFFSET_ERROR ||
+ (error= w_search(info, keyinfo, comp_flag, key, key_length, next_page,
+ page, temp_buff, page_link, keypos, insert_last)) > 0)
+ {
+ error= _ma_insert(info, keyinfo, key, temp_buff, keypos, page, keybuff,
+ father_page, father_buff, father_page_link,
+ father_keypos, insert_last);
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS,temp_buff))
+ goto err;
+ }
+ my_afree((uchar*) temp_buff);
+ DBUG_RETURN(error);
+err:
+ my_afree((uchar*) temp_buff);
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ DBUG_RETURN (-1);
+} /* w_search */
+
+
+/*
+ Insert new key.
+
+ SYNOPSIS
+ _ma_insert()
+ info Open table information.
+ keyinfo Key definition information.
+ key New key
+ anc_buff Key page (beginning).
+ key_pos Position in key page where to insert.
+ anc_page Page number for anc_buff
+ key_buff Copy of previous key.
+ father_buff parent key page for balancing.
+ father_key_pos position in parent key page for balancing.
+ father_page position of parent key page in file.
+ insert_last If to append at end of page.
+
+ DESCRIPTION
+ Insert new key at right of key_pos.
+ Note that caller must save anc_buff
+
+ This function writes log records for all changed pages
+ (Including anc_buff and father page)
+
+ RETURN
+ < 0 Error.
+ 0 OK
+ 1 If key contains key to upper level (from balance page)
+ 2 If key contains key to upper level (from split space)
+*/
+
+int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *key, uchar *anc_buff, uchar *key_pos, my_off_t anc_page,
+ uchar *key_buff, my_off_t father_page, uchar *father_buff,
+ MARIA_PINNED_PAGE *father_page_link, uchar *father_key_pos,
+ my_bool insert_last)
+{
+ uint a_length, nod_flag, org_anc_length;
+ int t_length;
+ uchar *endpos, *prev_key;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_insert");
+ DBUG_PRINT("enter",("key_pos: 0x%lx", (ulong) key_pos));
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,
+ USE_WHOLE_KEY););
+
+ _ma_get_used_and_nod(share, anc_buff, a_length, nod_flag);
+ org_anc_length= a_length;
+ endpos= anc_buff+ a_length;
+ prev_key= (key_pos == anc_buff + share->keypage_header + nod_flag ?
+ (uchar*) 0 : key_buff);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
+ (key_pos == endpos ? (uchar*) 0 : key_pos),
+ prev_key, prev_key,
+ key,&s_temp);
+#ifndef DBUG_OFF
+ if (key_pos != anc_buff + share->keypage_header + nod_flag &&
+ (keyinfo->flag & (HA_BINARY_PACK_KEY | HA_PACK_KEY)))
+ {
+ DBUG_DUMP("prev_key",(uchar*) key_buff, _ma_keylength(keyinfo,key_buff));
+ }
+ if (keyinfo->flag & HA_PACK_KEY)
+ {
+ DBUG_PRINT("test",("t_length: %d ref_len: %d",
+ t_length,s_temp.ref_length));
+ DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx",
+ s_temp.n_ref_length, s_temp.n_length, (long) s_temp.key));
+ }
+#endif
+ if (t_length > 0)
+ {
+ if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(-1);
+ }
+ bmove_upp((uchar*) endpos+t_length,(uchar*) endpos,(uint) (endpos-key_pos));
+ }
+ else
+ {
+ if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
+ {
+ maria_print_error(share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(-1);
+ }
+ bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length);
+ }
+ (*keyinfo->store_key)(keyinfo,key_pos,&s_temp);
+ a_length+=t_length;
+ _ma_store_page_used(share, anc_buff, a_length);
+
+ /*
+ Check if the new key fits totally into the the page
+ (anc_buff is big enough to contain a full page + one key)
+ */
+ if (a_length <= (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)
+ {
+ if (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE - a_length < 32 &&
+ (keyinfo->flag & HA_FULLTEXT) && key_pos == endpos &&
+ share->base.key_reflength <= share->base.rec_reflength &&
+ share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))
+ {
+ /*
+ Normal word. One-level tree. Page is almost full.
+ Let's consider converting.
+ We'll compare 'key' and the first key at anc_buff
+ */
+ const uchar *a= key, *b= anc_buff + share->keypage_header + nod_flag;
+ uint alen, blen, ft2len= share->ft2_keyinfo.keylength;
+ /* the very first key on the page is always unpacked */
+ DBUG_ASSERT((*b & 128) == 0);
+#if HA_FT_MAXLEN >= 127
+ blen= mi_uint2korr(b); b+=2;
+#else
+ blen= *(uchar*) b++;
+#endif
+ get_key_length(alen,a);
+ DBUG_ASSERT(info->ft1_to_ft2==0);
+ if (alen == blen &&
+ ha_compare_text(keyinfo->seg->charset, (uchar*) a, alen,
+ (uchar*) b, blen, 0, 0) == 0)
+ {
+ /* Yup. converting */
+ info->ft1_to_ft2=(DYNAMIC_ARRAY *)
+ my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME));
+ my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50);
+
+ /*
+ Now, adding all keys from the page to dynarray
+ if the page is a leaf (if not keys will be deleted later)
+ */
+ if (!nod_flag)
+ {
+ /*
+ Let's leave the first key on the page, though, because
+ we cannot easily dispatch an empty page here
+ */
+ b+=blen+ft2len+2;
+ for (a=anc_buff+a_length ; b < a ; b+=ft2len+2)
+ insert_dynamic(info->ft1_to_ft2, (uchar*) b);
+
+ /* fixing the page's length - it contains only one key now */
+ _ma_store_page_used(share, anc_buff, share->keypage_header + blen +
+ ft2len + 2);
+ }
+ /* the rest will be done when we're back from recursion */
+ }
+ }
+ else
+ {
+ if (share->now_transactional &&
+ _ma_log_add(info, anc_page, anc_buff, (uint) (endpos - anc_buff),
+ key_pos, s_temp.changed_length, t_length, 0))
+ DBUG_RETURN(-1);
+ }
+ DBUG_RETURN(0); /* There is room on page */
+ }
+ /* Page is full */
+ if (nod_flag)
+ insert_last=0;
+ if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)) &&
+ father_buff && !insert_last && !info->quick_mode)
+ {
+ s_temp.key_pos= key_pos;
+ father_page_link->changed= 1;
+ DBUG_RETURN(_ma_balance_page(info, keyinfo, key, anc_buff, anc_page,
+ father_page, father_buff, father_key_pos,
+ &s_temp));
+ }
+ DBUG_RETURN(_ma_split_page(info, keyinfo, key, anc_page,
+ anc_buff, org_anc_length,
+ key_pos, s_temp.changed_length, t_length,
+ key_buff, insert_last));
+} /* _ma_insert */
+
+
+/**
+ @brief split a full page in two and assign emerging item to key
+
+ @fn _ma_split_page()
+ info Maria handler
+ keyinfo Key handler
+ key Buffer for middle key
+ split_page Address on disk for split_buff
+ split_buff Page buffer for page that should be split
+ org_split_length Original length of split_buff before key was inserted
+ inserted_key_pos Address in buffer where key was inserted
+ changed_length Number of bytes changed at 'inserted_key_pos'
+ move_length Number of bytes buffer was moved when key was inserted
+ key_buff Key buffer to use for temporary storage of key
+ insert_last_key If we are insert key on rightmost key page
+
+ @note
+ split_buff is not stored on disk (caller has to do this)
+
+ @return
+ @retval 2 ok (Middle key up from _ma_insert())
+ @retval -1 error
+*/
+
+int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *key, my_off_t split_page, uchar *split_buff,
+ uint org_split_length,
+ uchar *inserted_key_pos, uint changed_length,
+ int move_length,
+ uchar *key_buff, my_bool insert_last_key)
+{
+ uint length,a_length,key_ref_length,t_length,nod_flag,key_length;
+ uint page_length, split_length;
+ uchar *key_pos,*pos, *after_key, *new_buff;
+ my_off_t new_pos;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ MARIA_SHARE *share= info->s;
+ int res;
+ DBUG_ENTER("maria_split_page");
+
+ LINT_INIT(after_key);
+ DBUG_DUMP("buff", split_buff, _ma_get_page_used(share, split_buff));
+
+ info->page_changed=1; /* Info->buff is used */
+ info->keyread_buff_used=1;
+ new_buff= info->buff;
+ nod_flag= _ma_test_if_nod(share, split_buff);
+ key_ref_length= share->keypage_header + nod_flag;
+ if (insert_last_key)
+ key_pos= _ma_find_last_pos(info, keyinfo, split_buff,
+ key_buff, &key_length,
+ &after_key);
+ else
+ key_pos= _ma_find_half_pos(info, nod_flag, keyinfo, split_buff, key_buff,
+ &key_length, &after_key);
+ if (!key_pos)
+ DBUG_RETURN(-1);
+
+ split_length= (uint) (key_pos - split_buff);
+ a_length= _ma_get_page_used(share, split_buff);
+ _ma_store_page_used(share, split_buff, split_length);
+
+ key_pos=after_key;
+ if (nod_flag)
+ {
+ DBUG_PRINT("test",("Splitting nod"));
+ pos=key_pos-nod_flag;
+ memcpy((uchar*) new_buff + share->keypage_header, (uchar*) pos,
+ (size_t) nod_flag);
+ }
+
+ /* Move middle item to key and pointer to new page */
+ if ((new_pos= _ma_new(info, PAGECACHE_PRIORITY_HIGH, &page_link)) ==
+ HA_OFFSET_ERROR)
+ DBUG_RETURN(-1);
+ _ma_kpointer(info, _ma_move_key(keyinfo,key,key_buff),new_pos);
+
+ /* Store new page */
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff))
+ DBUG_RETURN(-1);
+
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar *) 0,
+ (uchar*) 0, (uchar*) 0,
+ key_buff, &s_temp);
+ length=(uint) ((split_buff + a_length) - key_pos);
+ memcpy((uchar*) new_buff+key_ref_length+t_length,(uchar*) key_pos,
+ (size_t) length);
+ (*keyinfo->store_key)(keyinfo,new_buff+key_ref_length,&s_temp);
+ page_length= length + t_length + key_ref_length;
+
+ bzero(new_buff, share->keypage_header);
+ if (nod_flag)
+ _ma_store_keypage_flag(share, new_buff, KEYPAGE_FLAG_ISNOD);
+ _ma_store_page_used(share, new_buff, page_length);
+ /* Copy key number */
+ new_buff[share->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
+ KEYPAGE_FLAG_SIZE]=
+ split_buff[share->keypage_header - KEYPAGE_USED_SIZE -
+ KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE];
+
+ res= 2; /* Middle key up */
+ if (share->now_transactional &&
+ _ma_log_new(info, new_pos, new_buff, page_length, keyinfo->key_nr, 0))
+ res= -1;
+ bzero(new_buff + page_length, share->block_size - page_length);
+
+ if (_ma_write_keypage(info, keyinfo, new_pos, page_link->write_lock,
+ DFLT_INIT_HITS, new_buff))
+ res= -1;
+
+ /* Save changes to split pages */
+ if (share->now_transactional &&
+ _ma_log_split(info, split_page, split_buff, org_split_length,
+ split_length,
+ inserted_key_pos, changed_length, move_length,
+ KEY_OP_NONE, (uchar*) 0, 0, 0))
+ res= -1;
+
+ DBUG_DUMP("key",(uchar*) key, _ma_keylength(keyinfo,key));
+ DBUG_RETURN(res);
+} /* _ma_split_page */
+
+
+/*
+ Calculate how to much to move to split a page in two
+
+ Returns pointer to start of key.
+ key will contain the key.
+ return_key_length will contain the length of key
+ after_key will contain the position to where the next key starts
+*/
+
+uchar *_ma_find_half_pos(MARIA_HA *info, uint nod_flag, MARIA_KEYDEF *keyinfo,
+ uchar *page, uchar *key, uint *return_key_length,
+ uchar **after_key)
+{
+ uint keys,length,key_ref_length;
+ uchar *end,*lastpos;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_find_half_pos");
+
+ key_ref_length= share->keypage_header + nod_flag;
+ length= _ma_get_page_used(share, page) - key_ref_length;
+ page+= key_ref_length; /* Point to first key */
+ if (!(keyinfo->flag &
+ (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY |
+ HA_BINARY_PACK_KEY)))
+ {
+ key_ref_length=keyinfo->keylength+nod_flag;
+ keys=length/(key_ref_length*2);
+ *return_key_length=keyinfo->keylength;
+ end=page+keys*key_ref_length;
+ *after_key=end+key_ref_length;
+ memcpy(key,end,key_ref_length);
+ DBUG_RETURN(end);
+ }
+
+ end=page+length/2-key_ref_length; /* This is aprox. half */
+ *key='\0';
+ do
+ {
+ lastpos=page;
+ if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key)))
+ DBUG_RETURN(0);
+ } while (page < end);
+ *return_key_length=length;
+ *after_key=page;
+ DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx",
+ (long) lastpos, (long) page, (long) end));
+ DBUG_RETURN(lastpos);
+} /* _ma_find_half_pos */
+
+
+/*
+ Split buffer at last key
+ Returns pointer to the start of the key before the last key
+ key will contain the last key
+*/
+
+static uchar *_ma_find_last_pos(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page,
+ uchar *key, uint *return_key_length,
+ uchar **after_key)
+{
+ uint keys,length,last_length,key_ref_length;
+ uchar *end,*lastpos,*prevpos;
+ uchar key_buff[HA_MAX_KEY_BUFF];
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_find_last_pos");
+
+ key_ref_length= share->keypage_header;
+ length= _ma_get_page_used(share, page) - key_ref_length;
+ page+=key_ref_length;
+ if (!(keyinfo->flag &
+ (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY |
+ HA_BINARY_PACK_KEY)))
+ {
+ keys=length/keyinfo->keylength-2;
+ *return_key_length=length=keyinfo->keylength;
+ end=page+keys*length;
+ *after_key=end+length;
+ memcpy(key,end,length);
+ DBUG_RETURN(end);
+ }
+
+ LINT_INIT(prevpos);
+ LINT_INIT(last_length);
+ end=page+length-key_ref_length;
+ *key='\0';
+ length=0;
+ lastpos=page;
+ while (page < end)
+ {
+ prevpos=lastpos; lastpos=page;
+ last_length=length;
+ memcpy(key, key_buff, length); /* previous key */
+ if (!(length=(*keyinfo->get_key)(keyinfo,0,&page,key_buff)))
+ {
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ }
+ *return_key_length=last_length;
+ *after_key=lastpos;
+ DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx",
+ (long) prevpos,(long) page,(long) end));
+ DBUG_RETURN(prevpos);
+} /* _ma_find_last_pos */
+
+
+/**
+ @brief Balance page with static size keys with page on right/left
+
+ @param key Middle key will be stored here
+
+ @notes
+ Father_buff will always be changed
+ Caller must handle saving of curr_buff
+
+ @return
+ @retval 0 Balance was done (father buff is saved)
+ @retval 1 Middle key up (father buff is not saved)
+ @retval -1 Error
+*/
+
+static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uchar *curr_buff,
+ my_off_t curr_page,
+ my_off_t father_page, uchar *father_buff,
+ uchar *father_key_pos, MARIA_KEY_PARAM *s_temp)
+{
+ MARIA_PINNED_PAGE *next_page_link;
+ MARIA_PINNED_PAGE tmp_page_link, *new_page_link= &tmp_page_link;
+ MARIA_SHARE *share= info->s;
+ my_bool right;
+ uint k_length,father_length,father_keylength,nod_flag,curr_keylength;
+ uint right_length,left_length,new_right_length,new_left_length,extra_length;
+ uint keys, tmp_length, extra_buff_length;
+ uchar *pos,*buff,*extra_buff, *parting_key;
+ my_off_t next_page,new_pos;
+ uchar tmp_part_key[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("_ma_balance_page");
+
+ k_length=keyinfo->keylength;
+ father_length= _ma_get_page_used(share, father_buff);
+ father_keylength= k_length + share->base.key_reflength;
+ nod_flag= _ma_test_if_nod(share, curr_buff);
+ curr_keylength=k_length+nod_flag;
+ info->page_changed=1;
+
+ if ((father_key_pos != father_buff+father_length &&
+ (info->state->records & 1)) ||
+ father_key_pos == father_buff+ share->keypage_header +
+ share->base.key_reflength)
+ {
+ right=1;
+ next_page= _ma_kpos(share->base.key_reflength,
+ father_key_pos+father_keylength);
+ buff=info->buff;
+ DBUG_PRINT("info", ("use right page: %lu", (ulong) next_page));
+ }
+ else
+ {
+ right=0;
+ father_key_pos-=father_keylength;
+ next_page= _ma_kpos(share->base.key_reflength,father_key_pos);
+ /* Move curr_buff so that it's on the left */
+ buff= curr_buff;
+ curr_buff= info->buff;
+ DBUG_PRINT("info", ("use left page: %lu", (ulong) next_page));
+ } /* father_key_pos ptr to parting key */
+
+ if (!_ma_fetch_keypage(info,keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, info->buff, 0, &next_page_link))
+ goto err;
+ next_page_link->changed= 1;
+ DBUG_DUMP("next", info->buff, _ma_get_page_used(share, info->buff));
+
+ /* Test if there is room to share keys */
+ left_length= _ma_get_page_used(share, curr_buff);
+ right_length= _ma_get_page_used(share, buff);
+ keys= ((left_length+right_length-share->keypage_header*2-nod_flag*2)/
+ curr_keylength);
+
+ if ((right ? right_length : left_length) + curr_keylength <=
+ (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)
+ {
+ /* Enough space to hold all keys in the two buffers ; Balance bufferts */
+ new_left_length= share->keypage_header+nod_flag+(keys/2)*curr_keylength;
+ new_right_length=share->keypage_header+nod_flag+(((keys+1)/2)*
+ curr_keylength);
+ _ma_store_page_used(share, curr_buff, new_left_length);
+ _ma_store_page_used(share, buff, new_right_length);
+
+ DBUG_PRINT("info", ("left_length: %u -> %u right_length: %u -> %u",
+ left_length, new_left_length,
+ right_length, new_right_length));
+ if (left_length < new_left_length)
+ {
+ uint length;
+ DBUG_PRINT("info", ("move keys to end of buff"));
+
+ /* Move keys buff -> curr_buff */
+ pos=curr_buff+left_length;
+ memcpy(pos,father_key_pos, (size_t) k_length);
+ memcpy(pos+k_length, buff + share->keypage_header,
+ (size_t) (length=new_left_length - left_length - k_length));
+ pos= buff + share->keypage_header + length;
+ memcpy(father_key_pos, pos, (size_t) k_length);
+ bmove(buff + share->keypage_header, pos + k_length, new_right_length);
+
+ if (share->now_transactional)
+ {
+ if (right)
+ {
+ /*
+ Log changes to page on left
+ The original page is on the left and stored in curr_buff
+ We have on the page the newly inserted key and data
+ from buff added last on the page
+ */
+ if (_ma_log_split(info, curr_page, curr_buff,
+ left_length - s_temp->move_length,
+ new_left_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_ADD_SUFFIX,
+ curr_buff + left_length,
+ new_left_length - left_length,
+ new_left_length - left_length+ k_length))
+ goto err;
+ /*
+ Log changes to page on right
+ This contains the original data with some keys deleted from
+ start of page
+ */
+ if (_ma_log_prefix(info, next_page, buff, 0,
+ ((int) new_right_length - (int) right_length)))
+ goto err;
+ }
+ else
+ {
+ /*
+ Log changes to page on right (the original page) which is in buff
+ Data is removed from start of page
+ The inserted key may be in buff or moved to curr_buff
+ */
+ if (_ma_log_del_prefix(info, curr_page, buff,
+ right_length - s_temp->changed_length,
+ new_right_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length))
+ goto err;
+ /*
+ Log changes to page on left, which has new data added last
+ */
+ if (_ma_log_suffix(info, next_page, curr_buff,
+ left_length, new_left_length))
+ goto err;
+ }
+ }
+ }
+ else
+ {
+ uint length;
+ DBUG_PRINT("info", ("move keys to start of buff"));
+
+ bmove_upp(buff + new_right_length, buff + right_length,
+ right_length - share->keypage_header);
+ length= new_right_length -right_length - k_length;
+ memcpy(buff + share->keypage_header + length, father_key_pos,
+ (size_t) k_length);
+ pos=curr_buff+new_left_length;
+ memcpy(father_key_pos, pos, (size_t) k_length);
+ memcpy(buff + share->keypage_header, pos+k_length, (size_t) length);
+
+ if (share->now_transactional)
+ {
+ if (right)
+ {
+ /*
+ Log changes to page on left
+ The original page is on the left and stored in curr_buff
+ The page is shortened from end and the key may be on the page
+ */
+ if (_ma_log_split(info, curr_page, curr_buff,
+ left_length - s_temp->move_length,
+ new_left_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_NONE, (uchar*) 0, 0, 0))
+ goto err;
+ /*
+ Log changes to page on right
+ This contains the original data, with some data from cur_buff
+ added first
+ */
+ if (_ma_log_prefix(info, next_page, buff,
+ (uint) (new_right_length - right_length),
+ (int) (new_right_length - right_length)))
+ goto err;
+ }
+ else
+ {
+ /*
+ Log changes to page on right (the original page) which is in buff
+ We have on the page the newly inserted key and data
+ from buff added first on the page
+ */
+ uint diff_length= new_right_length - right_length;
+ if (_ma_log_split(info, curr_page, buff,
+ left_length - s_temp->move_length,
+ new_right_length,
+ s_temp->key_pos + diff_length,
+ s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_ADD_PREFIX,
+ buff + share->keypage_header,
+ diff_length, diff_length + k_length))
+ goto err;
+ /*
+ Log changes to page on left, which is shortened from end
+ */
+ if (_ma_log_suffix(info, next_page, curr_buff,
+ left_length, new_left_length))
+ goto err;
+ }
+ }
+ }
+
+ /* Log changes to father (one level up) page */
+
+ if (share->now_transactional &&
+ _ma_log_change(info, father_page, father_buff, father_key_pos,
+ k_length))
+ goto err;
+
+ /*
+ next_page_link->changed is marked as true above and fathers
+ page_link->changed is marked as true in caller
+ */
+ if (_ma_write_keypage(info, keyinfo, next_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, info->buff) ||
+ _ma_write_keypage(info, keyinfo, father_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ father_buff))
+ goto err;
+ DBUG_RETURN(0);
+ }
+
+ /* curr_buff[] and buff[] are full, lets split and make new nod */
+
+ extra_buff= info->buff+share->base.max_key_block_length;
+ new_left_length= new_right_length= (share->keypage_header + nod_flag +
+ (keys+1) / 3 * curr_keylength);
+ /*
+ 5 is the minum number of keys we can have here. This comes from
+ the fact that each full page can store at least 2 keys and in this case
+ we have a 'split' key, ie 2+2+1 = 5
+ */
+ if (keys == 5) /* Too few keys to balance */
+ new_left_length-=curr_keylength;
+ extra_length= (nod_flag + left_length + right_length -
+ new_left_length - new_right_length - curr_keylength);
+ extra_buff_length= extra_length + share->keypage_header;
+ DBUG_PRINT("info",("left_length: %d right_length: %d new_left_length: %d new_right_length: %d extra_length: %d",
+ left_length, right_length,
+ new_left_length, new_right_length,
+ extra_length));
+ _ma_store_page_used(share, curr_buff, new_left_length);
+ _ma_store_page_used(share, buff, new_right_length);
+
+ bzero(extra_buff, share->keypage_header);
+ if (nod_flag)
+ _ma_store_keypage_flag(share, extra_buff, KEYPAGE_FLAG_ISNOD);
+ /* Copy key number */
+ extra_buff[share->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
+ KEYPAGE_FLAG_SIZE]=
+ buff[share->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
+ KEYPAGE_FLAG_SIZE];
+ _ma_store_page_used(share, extra_buff, extra_buff_length);
+
+ /* move first largest keys to new page */
+ pos=buff+right_length-extra_length;
+ memcpy(extra_buff + share->keypage_header, pos, extra_length);
+ /* Zero old data from buffer */
+ bzero(extra_buff + extra_buff_length,
+ share->block_size - extra_buff_length);
+
+ /* Save new parting key between buff and extra_buff */
+ memcpy(tmp_part_key, pos-k_length,k_length);
+ /* Make place for new keys */
+ bmove_upp(buff+ new_right_length, pos - k_length,
+ right_length - extra_length - k_length - share->keypage_header);
+ /* Copy keys from left page */
+ pos= curr_buff+new_left_length;
+ memcpy(buff + share->keypage_header, pos + k_length,
+ (size_t) (tmp_length= left_length - new_left_length - k_length));
+ /* Copy old parting key */
+ parting_key= buff + share->keypage_header + tmp_length;
+ memcpy(parting_key, father_key_pos, (size_t) k_length);
+
+ /* Move new parting keys up to caller */
+ memcpy((right ? key : father_key_pos),pos,(size_t) k_length);
+ memcpy((right ? father_key_pos : key),tmp_part_key, k_length);
+
+ if ((new_pos= _ma_new(info, DFLT_INIT_HITS, &new_page_link))
+ == HA_OFFSET_ERROR)
+ goto err;
+ _ma_kpointer(info,key+k_length,new_pos);
+
+ if (share->now_transactional)
+ {
+ if (right)
+ {
+ /*
+ Page order according to key values:
+ orignal_page (curr_buff), next_page (buff), extra_buff
+
+ cur_buff is shortened,
+ buff is getting new keys at start and shortened from end.
+ extra_buff is new page
+
+ Note that extra_buff (largest key parts) will be stored at the
+ place of the original 'right' page (next_page) and right page (buff)
+ will be stored at new_pos.
+
+ This makes the log entries smaller as right_page contains all
+ data to generate the data extra_buff
+ */
+
+ /*
+ Log changes to page on left (page shortened page at end)
+ */
+ if (_ma_log_split(info, curr_page, curr_buff,
+ left_length - s_temp->move_length, new_left_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_NONE, (uchar*) 0, 0, 0))
+ goto err;
+ /*
+ Log changes to right page (stored at next page)
+ This contains the last 'extra_buff' from 'buff'
+ */
+ if (_ma_log_prefix(info, next_page, extra_buff,
+ 0, (int) (extra_buff_length - right_length)))
+ goto err;
+
+ /*
+ Log changes to middle page, which is stored at the new page
+ position
+ */
+ if (_ma_log_new(info, new_pos, buff, new_right_length,
+ keyinfo->key_nr, 0))
+ goto err;
+ }
+ else
+ {
+ /*
+ Log changes to page on right (the original page) which is in buff
+ This contains the original data, with some data from curr_buff
+ added first and shortened at end
+ */
+ int data_added_first= left_length - new_left_length;
+ if (_ma_log_key_middle(info, curr_page, buff,
+ new_right_length,
+ data_added_first,
+ data_added_first,
+ extra_length,
+ s_temp->key_pos,
+ s_temp->changed_length,
+ s_temp->move_length))
+ goto err;
+
+ /* Log changes to page on left, which is shortened from end */
+ if (_ma_log_suffix(info, next_page, curr_buff,
+ left_length, new_left_length))
+ goto err;
+
+ /* Log change to rightmost (new) page */
+ if (_ma_log_new(info, new_pos, extra_buff,
+ extra_buff_length, keyinfo->key_nr, 0))
+ goto err;
+ }
+
+ /* Log changes to father (one level up) page */
+ if (share->now_transactional &&
+ _ma_log_change(info, father_page, father_buff, father_key_pos,
+ k_length))
+ goto err;
+ }
+
+ if (_ma_write_keypage(info, keyinfo, (right ? new_pos : next_page),
+ (right ? new_page_link->write_lock :
+ PAGECACHE_LOCK_LEFT_WRITELOCKED),
+ DFLT_INIT_HITS, info->buff) ||
+ _ma_write_keypage(info, keyinfo, (right ? next_page : new_pos),
+ (!right ? new_page_link->write_lock :
+ PAGECACHE_LOCK_LEFT_WRITELOCKED),
+ DFLT_INIT_HITS, extra_buff))
+ goto err;
+
+ DBUG_RETURN(1); /* Middle key up */
+
+err:
+ DBUG_RETURN(-1);
+} /* _ma_balance_page */
+
+
+/**********************************************************************
+ * Bulk insert code *
+ **********************************************************************/
+
+typedef struct {
+ MARIA_HA *info;
+ uint keynr;
+} bulk_insert_param;
+
+
+static int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length)
+{
+ int error;
+ DBUG_ENTER("_ma_ck_write_tree");
+
+ error= (tree_insert(&info->bulk_insert[keynr], key,
+ key_length + info->s->rec_reflength,
+ info->bulk_insert[keynr].custom_arg) ? 0 :
+ HA_ERR_OUT_OF_MEM) ;
+
+ DBUG_RETURN(error);
+} /* _ma_ck_write_tree */
+
+
+/* typeof(_ma_keys_compare)=qsort_cmp2 */
+
+static int keys_compare(bulk_insert_param *param, uchar *key1, uchar *key2)
+{
+ uint not_used[2];
+ return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg,
+ key1, key2, USE_WHOLE_KEY, SEARCH_SAME,
+ not_used);
+}
+
+
+static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param)
+{
+ /*
+ Probably I can use info->lastkey here, but I'm not sure,
+ and to be safe I'd better use local lastkey.
+ */
+ MARIA_SHARE *share= param->info->s;
+ uchar lastkey[HA_MAX_KEY_BUFF];
+ uint keylen;
+ MARIA_KEYDEF *keyinfo;
+
+ switch (mode) {
+ case free_init:
+ if (share->concurrent_insert)
+ {
+ rw_wrlock(&share->key_root_lock[param->keynr]);
+ share->keyinfo[param->keynr].version++;
+ }
+ return 0;
+ case free_free:
+ keyinfo=share->keyinfo+param->keynr;
+ keylen= _ma_keylength(keyinfo, key);
+ memcpy(lastkey, key, keylen);
+ return _ma_ck_write_btree(param->info, param->keynr, lastkey,
+ keylen - share->rec_reflength);
+ case free_end:
+ if (share->concurrent_insert)
+ rw_unlock(&share->key_root_lock[param->keynr]);
+ return 0;
+ }
+ return -1;
+}
+
+
+int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows)
+{
+ MARIA_SHARE *share= info->s;
+ MARIA_KEYDEF *key=share->keyinfo;
+ bulk_insert_param *params;
+ uint i, num_keys, total_keylength;
+ ulonglong key_map;
+ DBUG_ENTER("_ma_init_bulk_insert");
+ DBUG_PRINT("enter",("cache_size: %lu", cache_size));
+
+ DBUG_ASSERT(!info->bulk_insert &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT));
+
+ maria_clear_all_keys_active(key_map);
+ for (i=total_keylength=num_keys=0 ; i < share->base.keys ; i++)
+ {
+ if (! (key[i].flag & HA_NOSAME) && (share->base.auto_key != i + 1) &&
+ maria_is_key_active(share->state.key_map, i))
+ {
+ num_keys++;
+ maria_set_key_active(key_map, i);
+ total_keylength+=key[i].maxlength+TREE_ELEMENT_EXTRA_SIZE;
+ }
+ }
+
+ if (num_keys==0 ||
+ num_keys * MARIA_MIN_SIZE_BULK_INSERT_TREE > cache_size)
+ DBUG_RETURN(0);
+
+ if (rows && rows*total_keylength < cache_size)
+ cache_size= (ulong)rows;
+ else
+ cache_size/=total_keylength*16;
+
+ info->bulk_insert=(TREE *)
+ my_malloc((sizeof(TREE)*share->base.keys+
+ sizeof(bulk_insert_param)*num_keys),MYF(0));
+
+ if (!info->bulk_insert)
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+
+ params=(bulk_insert_param *)(info->bulk_insert+share->base.keys);
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ if (maria_is_key_active(key_map, i))
+ {
+ params->info=info;
+ params->keynr=i;
+ /* Only allocate a 16'th of the buffer at a time */
+ init_tree(&info->bulk_insert[i],
+ cache_size * key[i].maxlength,
+ cache_size * key[i].maxlength, 0,
+ (qsort_cmp2)keys_compare, 0,
+ (tree_element_free) keys_free, (void *)params++);
+ }
+ else
+ info->bulk_insert[i].root=0;
+ }
+
+ DBUG_RETURN(0);
+}
+
+void maria_flush_bulk_insert(MARIA_HA *info, uint inx)
+{
+ if (info->bulk_insert)
+ {
+ if (is_tree_inited(&info->bulk_insert[inx]))
+ reset_tree(&info->bulk_insert[inx]);
+ }
+}
+
+void maria_end_bulk_insert(MARIA_HA *info)
+{
+ DBUG_ENTER("maria_end_bulk_insert");
+ if (info->bulk_insert)
+ {
+ uint i;
+ for (i=0 ; i < info->s->base.keys ; i++)
+ {
+ if (is_tree_inited(& info->bulk_insert[i]))
+ delete_tree(&info->bulk_insert[i]);
+ }
+ my_free(info->bulk_insert, MYF(0));
+ info->bulk_insert=0;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ Dedicated functions that generate log entries
+****************************************************************************/
+
+/**
+ @brief Log creation of new page
+
+ @note
+ We don't have to store the page_length into the log entry as we can
+ calculate this from the length of the log entry
+
+ @retval 1 error
+ @retval 0 ok
+*/
+
+static my_bool _ma_log_new(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint page_length, uint key_nr, my_bool root_page)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE
+ +1];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_log_new");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ DBUG_ASSERT(share->now_transactional);
+
+ /* Store address of new root page */
+ page/= share->block_size;
+ page_store(log_data + FILEID_STORE_SIZE, page);
+
+ /* Store link to next unused page */
+ if (info->used_key_del == 2)
+ page= 0; /* key_del not changed */
+ else
+ page= ((share->current_key_del == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
+ share->current_key_del / share->block_size);
+
+ page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
+ key_nr_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE*2, key_nr);
+ log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE*2 + KEY_NR_STORE_SIZE]=
+ (uchar) root_page;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+
+ page_length-= LSN_STORE_SIZE;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= buff + LSN_STORE_SIZE;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= page_length;
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX_NEW_PAGE,
+ info->trn, info, sizeof(log_data) + page_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief
+ Log when some part of the key page changes
+*/
+
+static my_bool _ma_log_change(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 6], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uint offset= (uint) (key_pos - buff);
+ DBUG_ENTER("_ma_log_change");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ DBUG_ASSERT(info->s->now_transactional);
+
+ /* Store address of new root page */
+ page/= info->s->block_size;
+ page_store(log_data + FILEID_STORE_SIZE, page);
+ log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE;
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos[3]= KEY_OP_CHANGE;
+ int2store(log_pos+4, length);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= buff + offset;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info, sizeof(log_data) + length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief Write log entry for page splitting
+
+ @note
+ Write log entry for page that has got a key added to the page under
+ one and only one of the following senarios:
+ - Page is shortened from end
+ - Data is added to end of page
+ - Data added at front of page
+
+ @param prefix_or_suffix KEY_OP_NONE Ignored
+ KEY_OP_ADD_PREFIX Add data to start of page
+ KEY_OP_ADD_SUFFIX Add data to end of page
+
+*/
+
+static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos, uint key_length, int move_length,
+ enum en_key_op prefix_or_suffix,
+ uchar *data, uint data_length,
+ uint changed_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+3+3+3+3+2];
+ uchar *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3];
+ uint offset= (uint) (key_pos - buff);
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_split");
+ DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
+ (ulong) page, org_length, new_length));
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page/= info->s->block_size;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if (new_length <= offset || !key_pos)
+ {
+ /*
+ Page was split before inserted key. Write redo entry where
+ we just cut current page at page_length
+ */
+ uint length_offset= org_length - new_length;
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos+1, length_offset);
+ log_pos+= 3;
+ translog_parts= 1;
+ extra_length= 0;
+ }
+ else
+ {
+ /* Key was added to page which was split after the inserted key */
+ uint max_key_length;
+
+ /*
+ Handle case when split happened directly after the newly inserted key.
+ */
+ max_key_length= new_length - offset;
+ extra_length= min(key_length, max_key_length);
+
+ if ((int) new_length < (int) (org_length + move_length + data_length))
+ {
+ /* Shorten page */
+ uint diff= org_length + move_length + data_length - new_length;
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos + 1, diff);
+ log_pos+= 3;
+ }
+ else
+ {
+ DBUG_ASSERT(new_length == org_length + move_length + data_length);
+ }
+
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos+= 3;
+
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, extra_length);
+ log_pos+= 3;
+
+ /* Point to original inserted key data */
+ if (prefix_or_suffix == KEY_OP_ADD_PREFIX)
+ key_pos+= data_length;
+
+ translog_parts= 2;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= extra_length;
+ }
+
+ if (data_length)
+ {
+ /* Add prefix or suffix */
+ log_pos[0]= prefix_or_suffix;
+ int2store(log_pos+1, data_length);
+ log_pos+= 3;
+ if (prefix_or_suffix == KEY_OP_ADD_PREFIX)
+ {
+ int2store(log_pos+1, changed_length);
+ log_pos+= 2;
+ data_length= changed_length;
+ }
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= (char*) data;
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= data_length;
+ translog_parts++;
+ extra_length+= data_length;
+ }
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length +
+ extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got a key added to the page
+ and page is shortened from start of page
+
+ @fn _ma_log_del_prefix()
+ @param info Maria handler
+ @param page Page number
+ @param buff Page buffer
+ @param org_length Length of buffer when read
+ @param new_length Final length
+ @param key_pos Where on page buffer key was added. This is position
+ before prefix was removed
+ @param key_length How many bytes was changed at 'key_pos'
+ @param move_length How many bytes was moved up when key was added
+
+ @return
+ @retval 0 ok
+ @retval 1 error
+*/
+
+static my_bool _ma_log_del_prefix(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos, uint key_length,
+ int move_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 12], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uint offset= (uint) (key_pos - buff);
+ uint diff_length= org_length + move_length - new_length;
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_del_prefix");
+ DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
+ (ulong) page, org_length, new_length));
+
+ DBUG_ASSERT((int) diff_length > 0);
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page/= info->s->block_size;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ translog_parts= 1;
+ extra_length= 0;
+
+ if (offset < diff_length + info->s->keypage_header)
+ {
+ /*
+ Key is not anymore on page. Move data down, but take into account that
+ the original page had grown with 'move_length bytes'
+ */
+ DBUG_ASSERT(offset + key_length <= diff_length + info->s->keypage_header);
+
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, diff_length - move_length);
+ log_pos+= 3;
+ }
+ else
+ {
+ /*
+ Correct position to key, as data before key has been delete and key
+ has thus been moved down
+ */
+ offset-= diff_length;
+ key_pos-= diff_length;
+
+ /* Move data down */
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, diff_length);
+ log_pos+= 3;
+
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos+= 3;
+
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, key_length);
+ log_pos+= 3;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
+ translog_parts= 2;
+ extra_length= key_length;
+ }
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length +
+ extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got data added first and
+ data deleted last. Old changed key may be part of page
+*/
+
+static my_bool _ma_log_key_middle(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint new_length,
+ uint data_added_first,
+ uint data_changed_first,
+ uint data_deleted_last,
+ uchar *key_pos,
+ uint key_length, int move_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+5+3+3+3];
+ uchar *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4];
+ uint key_offset;
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_key_middle");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ /* new place of key after changes */
+ key_pos+= data_added_first;
+ key_offset= (uint) (key_pos - buff);
+ if (key_offset < new_length)
+ {
+ /* key is on page; Calculate how much of the key is there */
+ uint max_key_length= new_length - key_offset;
+ if (max_key_length < key_length)
+ {
+ /* Key is last on page */
+ key_length= max_key_length;
+ move_length= 0;
+ }
+ /*
+ Take into account that new data was added as part of original key
+ that also needs to be removed from page
+ */
+ data_deleted_last+= move_length;
+ }
+
+ page/= info->s->block_size;
+
+ /* First log changes to page */
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos+1, data_deleted_last);
+ log_pos+= 3;
+
+ log_pos[0]= KEY_OP_ADD_PREFIX;
+ int2store(log_pos+1, data_added_first);
+ int2store(log_pos+3, data_changed_first);
+ log_pos+= 5;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
+ info->s->keypage_header);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= data_changed_first;
+ translog_parts= 2;
+ extra_length= data_changed_first;
+
+ /* If changed key is on page, log those changes too */
+
+ if (key_offset < new_length)
+ {
+ uchar *start_log_pos= log_pos;
+
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, key_offset);
+ log_pos+= 3;
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, key_length);
+ log_pos+= 3;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char*) start_log_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].length= (uint) (log_pos -
+ start_log_pos);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (char*) key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].length= key_length;
+ translog_parts+=2;
+ extra_length+= log_array[TRANSLOG_INTERNAL_PARTS + 2].length + key_length;
+ }
+
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+#ifdef NOT_NEEDED
+
+/**
+ @brief
+ Write log entry for page that has got data added first and
+ data deleted last
+*/
+
+static my_bool _ma_log_middle(MARIA_HA *info, my_off_t page,
+ uchar *buff,
+ uint data_added_first, uint data_changed_first,
+ uint data_deleted_last)
+{
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 5], *log_pos;
+ DBUG_ENTER("_ma_log_middle");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ page/= info->s->block_size;
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, data_deleted_last);
+ log_pos+= 3;
+
+ log_pos[0]= KEY_OP_ADD_PREFIX;
+ int2store(log_pos+1, data_added_first);
+ int2store(log_pos+3, data_changed_first);
+ log_pos+= 5;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
+ info->s->keypage_header);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= data_changed_first;
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + data_changed_first,
+ TRANSLOG_INTERNAL_PARTS + 2,
+ log_array, log_data, NULL));
+}
+#endif
diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c
new file mode 100644
index 00000000000..70634ecce80
--- /dev/null
+++ b/storage/maria/maria_chk.c
@@ -0,0 +1,1808 @@
+/* Copyright (C) 2006-2003 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Describe, check and repair of MARIA tables */
+
+#include "ma_fulltext.h"
+#include <myisamchk.h>
+#include <my_bit.h>
+#include <m_ctype.h>
+#include <stdarg.h>
+#include <my_getopt.h>
+#ifdef HAVE_SYS_VADVICE_H
+#include <sys/vadvise.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+SET_STACK_SIZE(9000) /* Minimum stack size for program */
+
+#ifndef USE_RAID
+#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G)
+#define my_raid_delete(A,B,C) my_delete(A,B)
+#endif
+
+static uint decode_bits;
+static char **default_argv;
+static const char *load_default_groups[]= { "maria_chk", 0 };
+static const char *set_collation_name, *opt_tmpdir;
+static CHARSET_INFO *set_collation;
+static int stopwords_inited= 0;
+static MY_TMPDIR maria_chk_tmpdir;
+static my_bool opt_transaction_logging;
+
+static const char *type_names[]=
+{
+ "impossible","char","binary", "short", "long", "float",
+ "double","number","unsigned short",
+ "unsigned long","longlong","ulonglong","int24",
+ "uint24","int8","varchar", "varbin", "varchar2", "varbin2", "bit",
+ "?","?"
+};
+
+static const char *prefix_packed_txt="packed ",
+ *bin_packed_txt="prefix ",
+ *diff_txt="stripped ",
+ *null_txt="NULL",
+ *blob_txt="BLOB ";
+
+static const char *field_pack[]=
+{
+ "","no endspace", "no prespace",
+ "no zeros", "blob", "constant", "table-lockup",
+ "always zero","varchar","unique-hash","?","?"
+};
+
+static const char *record_formats[]=
+{
+ "Fixed length", "Packed", "Compressed", "Block", "?"
+};
+
+static const char *maria_stats_method_str="nulls_unequal";
+
+static void get_options(int *argc,char * * *argv);
+static void print_version(void);
+static void usage(void);
+static int maria_chk(HA_CHECK *param, char *filename);
+static void descript(HA_CHECK *param, register MARIA_HA *info, char *name);
+static int maria_sort_records(HA_CHECK *param, register MARIA_HA *info,
+ char *name, uint sort_key,
+ my_bool write_info, my_bool update_index);
+static int sort_record_index(MARIA_SORT_PARAM *sort_param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t page, uchar *buff,uint sortkey,
+ File new_file, my_bool update_index);
+
+HA_CHECK check_param;
+
+ /* Main program */
+
+int main(int argc, char **argv)
+{
+ int error;
+ MY_INIT(argv[0]);
+
+ maria_data_root= ".";
+ maria_chk_init(&check_param);
+ check_param.opt_lock_memory= 1; /* Lock memory if possible */
+ check_param.using_global_keycache = 0;
+ get_options(&argc,(char***) &argv);
+ maria_quick_table_bits=decode_bits;
+ error=0;
+ maria_init();
+
+ /*
+ If we are doing a repair, user may want to store this repair into the log
+ so that the log has a complete history and can be used to replay.
+ */
+ if (opt_transaction_logging && (check_param.testflag & T_REP_ANY) &&
+ (ma_control_file_create_or_open() ||
+ init_pagecache(maria_log_pagecache,
+ TRANSLOG_PAGECACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, MY_WME) == 0 ||
+ translog_init(maria_data_root, TRANSLOG_FILE_SIZE,
+ 0, 0, maria_log_pagecache,
+ TRANSLOG_DEFAULT_FLAGS, 0)))
+ {
+ _ma_check_print_error(&check_param,
+ "Can't initialize transaction logging. Run "
+ "recovery with switch --skip-transaction-log");
+ error= 1;
+ argc= 1; /* Force loop out */
+ }
+
+ while (--argc >= 0)
+ {
+ int new_error=maria_chk(&check_param, *(argv++));
+ if ((check_param.testflag & T_REP_ANY) != T_REP)
+ check_param.testflag&= ~T_REP;
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
+ if ((check_param.error_printed | check_param.warning_printed) &&
+ (check_param.testflag & T_FORCE_CREATE) &&
+ (!(check_param.testflag & (T_REP | T_REP_BY_SORT | T_SORT_RECORDS |
+ T_SORT_INDEX))))
+ {
+ ulonglong old_testflag=check_param.testflag;
+ if (!(check_param.testflag & T_REP))
+ check_param.testflag|= T_REP_BY_SORT;
+ check_param.testflag&= ~T_EXTEND; /* Don't needed */
+ error|=maria_chk(&check_param, argv[-1]);
+ check_param.testflag= old_testflag;
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
+ }
+ else
+ error|=new_error;
+ if (argc && (!(check_param.testflag & T_SILENT) ||
+ check_param.testflag & T_INFO))
+ {
+ puts("\n---------\n");
+ VOID(fflush(stdout));
+ }
+ }
+ if (check_param.total_files > 1)
+ { /* Only if descript */
+ char buff[22],buff2[22];
+ if (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO)
+ puts("\n---------");
+ printf("\nTotal of all %d MARIA-files:\nData records: %9s Deleted blocks: %9s\n",check_param.total_files,llstr(check_param.total_records,buff),
+ llstr(check_param.total_deleted,buff2));
+ }
+ free_defaults(default_argv);
+ free_tmpdir(&maria_chk_tmpdir);
+ maria_end();
+ my_end(check_param.testflag & T_INFO ?
+ MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
+ exit(error);
+#ifndef _lint
+ return 0; /* No compiler warning */
+#endif
+} /* main */
+
+enum options_mc {
+ OPT_CHARSETS_DIR=256, OPT_SET_COLLATION,OPT_START_CHECK_POS,
+ OPT_CORRECT_CHECKSUM, OPT_PAGE_BUFFER_SIZE,
+ OPT_KEY_CACHE_BLOCK_SIZE, OPT_MARIA_BLOCK_SIZE,
+ OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE,
+ OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN,
+ OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE,
+ OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD, OPT_TRANSACTION_LOG
+};
+
+static struct my_option my_long_options[] =
+{
+ {"analyze", 'a',
+ "Analyze distribution of keys. Will make some joins in MySQL faster. You can check the calculated distribution.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifdef __NETWARE__
+ {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"block-search", 'b',
+ "No help available.",
+ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"backup", 'B',
+ "Make a backup of the .MYD file as 'filename-time.BAK'.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"character-sets-dir", OPT_CHARSETS_DIR,
+ "Directory where character sets are.",
+ (uchar**) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"check", 'c',
+ "Check table for errors.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"check-only-changed", 'C',
+ "Check only tables that have changed since last check. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"correct-checksum", OPT_CORRECT_CHECKSUM,
+ "Correct checksum information for table.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifndef DBUG_OFF
+ {"debug", '#',
+ "Output debug log. Often this is 'd:t:o,filename'.",
+ 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"description", 'd',
+ "Prints some information about table.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"data-file-length", 'D',
+ "Max length of data file (when recreating data-file when it's full).",
+ (uchar**) &check_param.max_data_file_length,
+ (uchar**) &check_param.max_data_file_length,
+ 0, GET_LL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"extend-check", 'e',
+ "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"fast", 'F',
+ "Check only tables that haven't been closed properly. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"force", 'f',
+ "Restart with -r if there are any errors in the table. States will be updated as with --update-state.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"HELP", 'H',
+ "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"help", '?',
+ "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"information", 'i',
+ "Print statistics information about table that is checked.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"keys-used", 'k',
+ "Tell MARIA to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.",
+ (uchar**) &check_param.keys_in_use,
+ (uchar**) &check_param.keys_in_use,
+ 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0},
+ {"max-record-length", OPT_MAX_RECORD_LENGTH,
+ "Skip rows bigger than this if maria_chk can't allocate memory to hold it",
+ (uchar**) &check_param.max_record_length,
+ (uchar**) &check_param.max_record_length,
+ 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0},
+ {"medium-check", 'm',
+ "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"quick", 'q', "Faster repair by not modifying the data file.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"read-only", 'T',
+ "Don't mark table as checked.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"recover", 'r',
+ "Can fix almost anything except unique keys that aren't unique.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"parallel-recover", 'p',
+ "Same as '-r' but creates all the keys in parallel.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"safe-recover", 'o',
+ "Uses old recovery method; Slower than '-r' but can handle a couple of cases where '-r' reports that it can't fix the data file.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"sort-recover", 'n',
+ "Force recovering with sorting even if the temporary file was very big.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifdef DEBUG
+ {"start-check-pos", OPT_START_CHECK_POS,
+ "No help available.",
+ 0, 0, 0, GET_ULL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"set-auto-increment", 'A',
+ "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.",
+ (uchar**) &check_param.auto_increment_value,
+ (uchar**) &check_param.auto_increment_value,
+ 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ {"set-collation", OPT_SET_COLLATION,
+ "Change the collation used by the index",
+ (uchar**) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"set-variable", 'O',
+ "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"silent", 's',
+ "Only print errors. One can use two -s to make maria_chk very silent.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"sort-index", 'S',
+ "Sort index blocks. This speeds up 'read-next' in applications.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"sort-records", 'R',
+ "Sort records according to an index. This makes your data much more localized and may speed up things. (It may be VERY slow to do a sort the first time!)",
+ (uchar**) &check_param.opt_sort_key,
+ (uchar**) &check_param.opt_sort_key,
+ 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"tmpdir", 't',
+ "Path for temporary files.",
+ (uchar**) &opt_tmpdir,
+ 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"transaction-log", OPT_TRANSACTION_LOG,
+ "Log repair command to transaction log",
+ (uchar**) &opt_transaction_logging, (uchar**) &opt_transaction_logging,
+ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"update-state", 'U',
+ "Mark tables as crashed if any errors were found.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"unpack", 'u',
+ "Unpack file packed with mariapack.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"verbose", 'v',
+ "Print more information. This can be used with --description and --check. Use many -v for more verbosity!",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Print version and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"wait", 'w', "Wait if table is locked.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { "page_buffer_size", OPT_PAGE_BUFFER_SIZE,
+ "Size of page buffer. Used by --safe-repair",
+ (uchar**) &check_param.use_buffers, (uchar**) &check_param.use_buffers, 0,
+ GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) USE_BUFFER_INIT,
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0},
+ { "read_buffer_size", OPT_READ_BUFFER_SIZE, "",
+ (uchar**) &check_param.read_buffer_length,
+ (uchar**) &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
+ (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD,
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, "",
+ (uchar**) &check_param.write_buffer_length,
+ (uchar**) &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
+ (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD,
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ { "sort_buffer_size", OPT_SORT_BUFFER_SIZE,
+ "Size of sort buffer. Used by --recover",
+ (uchar**) &check_param.sort_buffer_length,
+ (uchar**) &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
+ (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD),
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "",
+ (uchar**) &check_param.sort_key_blocks,
+ (uchar**) &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG,
+ BUFFERS_WHEN_SORTING, 4L, 100L, 0L, 1L, 0},
+ { "decode_bits", OPT_DECODE_BITS, "", (uchar**) &decode_bits,
+ (uchar**) &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0},
+ { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", (uchar**) &ft_min_word_len,
+ (uchar**) &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN,
+ 0, 1, 0},
+ { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (uchar**) &ft_max_word_len,
+ (uchar**) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10,
+ HA_FT_MAXCHARLEN, 0, 1, 0},
+ { "maria_ft_stopword_file", OPT_FT_STOPWORD_FILE,
+ "Use stopwords from this file instead of built-in list.",
+ (uchar**) &ft_stopword_file, (uchar**) &ft_stopword_file, 0, GET_STR,
+ REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"stats_method", OPT_STATS_METHOD,
+ "Specifies how index statistics collection code should treat NULLs. "
+ "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
+ "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
+ (uchar**) &maria_stats_method_str, (uchar**) &maria_stats_method_str, 0,
+ GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+#include <help_start.h>
+
+static void print_version(void)
+{
+ printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE,
+ MACHINE_TYPE);
+ NETWARE_SET_SCREEN_MODE(1);
+}
+
+
+static void usage(void)
+{
+ print_version();
+ puts("By Monty, for your professional use");
+ puts("This software comes with NO WARRANTY: see the PUBLIC for details.\n");
+ puts("Description, check and repair of MARIA tables.");
+ puts("Used without options all tables on the command will be checked for errors");
+ printf("Usage: %s [OPTIONS] tables[.MYI]\n", my_progname_short);
+ printf("\nGlobal options:\n");
+#ifndef DBUG_OFF
+ printf("\
+ -#, --debug=... Output debug log. Often this is 'd:t:o,filename'.\n");
+#endif
+ printf("\
+ -?, --help Display this help and exit.\n\
+ -O, --set-variable var=option.\n\
+ Change the value of a variable. Please note that\n\
+ this option is deprecated; you can set variables\n\
+ directly with '--variable-name=value'.\n\
+ -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\
+ specified, separated by ");
+#if defined( __WIN__) || defined(__NETWARE__)
+ printf("semicolon (;)");
+#else
+ printf("colon (:)");
+#endif
+ printf(", they will be used\n\
+ in a round-robin fashion.\n\
+ -s, --silent Only print errors. One can use two -s to make\n\
+ maria_chk very silent.\n\
+ -v, --verbose Print more information. This can be used with\n\
+ --description and --check. Use many -v for more verbosity.\n\
+ -V, --version Print version and exit.\n\
+ -w, --wait Wait if table is locked.\n\n");
+#ifdef DEBUG
+ puts(" --start-check-pos=# Start reading file at given offset.\n");
+#endif
+
+ puts("Check options (check is the default action for maria_chk):\n\
+ -c, --check Check table for errors.\n\
+ -e, --extend-check Check the table VERY throughly. Only use this in\n\
+ extreme cases as maria_chk should normally be able to\n\
+ find out if the table is ok even without this switch.\n\
+ -F, --fast Check only tables that haven't been closed properly.\n\
+ -C, --check-only-changed\n\
+ Check only tables that have changed since last check.\n\
+ -f, --force Restart with '-r' if there are any errors in the table.\n\
+ States will be updated as with '--update-state'.\n\
+ -i, --information Print statistics information about table that is checked.\n\
+ -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\
+ all errors. Should be good enough for most cases.\n\
+ -U --update-state Mark tables as crashed if you find any errors.\n\
+ -T, --read-only Don't mark table as checked.\n");
+
+ puts("Recover (repair)/ options (When using '-r' or '-o'):\n\
+ -B, --backup Make a backup of the .MYD file as 'filename-time.BAK'.\n\
+ --correct-checksum Correct checksum information for table.\n\
+ -D, --data-file-length=# Max length of data file (when recreating data\n\
+ file when it's full).\n\
+ -e, --extend-check Try to recover every possible row from the data file\n\
+ Normally this will also find a lot of garbage rows;\n\
+ Don't use this option if you are not totally desperate.\n\
+ -f, --force Overwrite old temporary files.\n\
+ -k, --keys-used=# Tell MARIA to update only some specific keys. # is a\n\
+ bit mask of which keys to use. This can be used to\n\
+ get faster inserts.\n\
+ --max-record-length=#\n\
+ Skip rows bigger than this if maria_chk can't allocate\n\
+ memory to hold it.\n\
+ -r, --recover Can fix almost anything except unique keys that aren't\n\
+ unique.\n\
+ -n, --sort-recover Forces recovering with sorting even if the temporary\n\
+ file would be very big.\n\
+ -p, --parallel-recover\n\
+ Uses the same technique as '-r' and '-n', but creates\n\
+ all the keys in parallel, in different threads.\n\
+ -o, --safe-recover Uses old recovery method; Slower than '-r' but can\n\
+ handle a couple of cases where '-r' reports that it\n\
+ can't fix the data file.\n\
+ --transaction-log Log repair command to transaction log. This is needed\n\
+ if one wants to use the maria_read_log to repeat the \n\
+ repair\n\
+ --character-sets-dir=...\n\
+ Directory where character sets are.\n\
+ --set-collation=name\n\
+ Change the collation used by the index.\n\
+ -q, --quick Faster repair by not modifying the data file.\n\
+ One can give a second '-q' to force maria_chk to\n\
+ modify the original datafile in case of duplicate keys.\n\
+ NOTE: Tables where the data file is currupted can't be\n\
+ fixed with this option.\n\
+ -u, --unpack Unpack file packed with mariapack.\n\
+");
+
+ puts("Other actions:\n\
+ -a, --analyze Analyze distribution of keys. Will make some joins in\n\
+ MySQL faster. You can check the calculated distribution\n\
+ by using '--description --verbose table_name'.\n\
+ --stats_method=name Specifies how index statistics collection code should\n\
+ treat NULLs. Possible values of name are \"nulls_unequal\"\n\
+ (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\
+ \"nulls_ignored\".\n\
+ -d, --description Prints some information about table.\n\
+ -A, --set-auto-increment[=value]\n\
+ Force auto_increment to start at this or higher value\n\
+ If no value is given, then sets the next auto_increment\n\
+ value to the highest used value for the auto key + 1.\n\
+ -S, --sort-index Sort index blocks. This speeds up 'read-next' in\n\
+ applications.\n\
+ -R, --sort-records=#\n\
+ Sort records according to an index. This makes your\n\
+ data much more localized and may speed up things\n\
+ (It may be VERY slow to do a sort the first time!).\n\
+ -b, --block-search=#\n\
+ Find a record, a block at given offset belongs to.");
+
+ print_defaults("my", load_default_groups);
+ my_print_variables(my_long_options);
+}
+
+#include <help_end.h>
+
+const char *maria_stats_method_names[] = {"nulls_unequal", "nulls_equal",
+ "nulls_ignored", NullS};
+TYPELIB maria_stats_method_typelib= {
+ array_elements(maria_stats_method_names) - 1, "",
+ maria_stats_method_names, NULL};
+
+ /* Read options */
+
+static my_bool
+get_one_option(int optid,
+ const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch (optid) {
+#ifdef __NETWARE__
+ case OPT_AUTO_CLOSE:
+ setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
+ break;
+#endif
+ case 'a':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_STATISTICS;
+ else
+ check_param.testflag|= T_STATISTICS;
+ break;
+ case 'A':
+ if (argument)
+ check_param.auto_increment_value= strtoull(argument, NULL, 0);
+ else
+ check_param.auto_increment_value= 0; /* Set to max used value */
+ check_param.testflag|= T_AUTO_INC;
+ break;
+ case 'b':
+ check_param.search_after_block= strtoul(argument, NULL, 10);
+ break;
+ case 'B':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_BACKUP_DATA;
+ else
+ check_param.testflag|= T_BACKUP_DATA;
+ break;
+ case 'c':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_CHECK;
+ else
+ check_param.testflag|= T_CHECK;
+ break;
+ case 'C':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~(T_CHECK | T_CHECK_ONLY_CHANGED);
+ else
+ check_param.testflag|= T_CHECK | T_CHECK_ONLY_CHANGED;
+ break;
+ case 'D':
+ check_param.max_data_file_length=strtoll(argument, NULL, 10);
+ break;
+ case 's': /* silent */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~(T_SILENT | T_VERY_SILENT);
+ else
+ {
+ if (check_param.testflag & T_SILENT)
+ check_param.testflag|= T_VERY_SILENT;
+ check_param.testflag|= T_SILENT;
+ check_param.testflag&= ~T_WRITE_LOOP;
+ }
+ break;
+ case 'w':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_WAIT_FOREVER;
+ else
+ check_param.testflag|= T_WAIT_FOREVER;
+ break;
+ case 'd': /* description if isam-file */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_DESCRIPT;
+ else
+ check_param.testflag|= T_DESCRIPT;
+ break;
+ case 'e': /* extend check */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_EXTEND;
+ else
+ check_param.testflag|= T_EXTEND;
+ break;
+ case 'i':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_INFO;
+ else
+ check_param.testflag|= T_INFO;
+ break;
+ case 'f':
+ if (argument == disabled_my_option)
+ {
+ check_param.tmpfile_createflag= O_RDWR | O_TRUNC | O_EXCL;
+ check_param.testflag&= ~(T_FORCE_CREATE | T_UPDATE_STATE);
+ }
+ else
+ {
+ check_param.tmpfile_createflag= O_RDWR | O_TRUNC;
+ check_param.testflag|= T_FORCE_CREATE | T_UPDATE_STATE;
+ }
+ break;
+ case 'F':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_FAST;
+ else
+ check_param.testflag|= T_FAST;
+ break;
+ case 'k':
+ check_param.keys_in_use= (ulonglong) strtoll(argument, NULL, 10);
+ break;
+ case 'm':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_MEDIUM;
+ else
+ check_param.testflag|= T_MEDIUM; /* Medium check */
+ break;
+ case 'r': /* Repair table */
+ check_param.testflag&= ~T_REP_ANY;
+ if (argument != disabled_my_option)
+ check_param.testflag|= T_REP_BY_SORT;
+ break;
+ case 'p':
+ check_param.testflag&= ~T_REP_ANY;
+ if (argument != disabled_my_option)
+ check_param.testflag|= T_REP_PARALLEL;
+ break;
+ case 'o':
+ check_param.testflag&= ~T_REP_ANY;
+ check_param.force_sort= 0;
+ if (argument != disabled_my_option)
+ {
+ check_param.testflag|= T_REP;
+ my_disable_async_io= 1; /* More safety */
+ }
+ break;
+ case 'n':
+ check_param.testflag&= ~T_REP_ANY;
+ if (argument == disabled_my_option)
+ check_param.force_sort= 0;
+ else
+ {
+ check_param.testflag|= T_REP_BY_SORT;
+ check_param.force_sort= 1;
+ }
+ break;
+ case 'q':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~(T_QUICK | T_FORCE_UNIQUENESS);
+ else
+ check_param.testflag|=
+ (check_param.testflag & T_QUICK) ? T_FORCE_UNIQUENESS : T_QUICK;
+ break;
+ case 'u':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_UNPACK;
+ else
+ {
+ check_param.testflag|= T_UNPACK;
+ if (!(check_param.testflag & T_REP_ANY))
+ check_param.testflag|= T_REP_BY_SORT;
+ }
+ break;
+ case 'v': /* Verbose */
+ if (argument == disabled_my_option)
+ {
+ check_param.testflag&= ~T_VERBOSE;
+ check_param.verbose=0;
+ }
+ else
+ {
+ check_param.testflag|= T_VERBOSE;
+ check_param.verbose++;
+ }
+ break;
+ case 'R': /* Sort records */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_SORT_RECORDS;
+ else
+ {
+ check_param.testflag|= T_SORT_RECORDS;
+ check_param.opt_sort_key= (uint) atoi(argument) - 1;
+ if (check_param.opt_sort_key >= MARIA_MAX_KEY)
+ {
+ fprintf(stderr,
+ "The value of the sort key is bigger than max key: %d.\n",
+ MARIA_MAX_KEY);
+ exit(1);
+ }
+ }
+ break;
+ case 'S': /* Sort index */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_SORT_INDEX;
+ else
+ check_param.testflag|= T_SORT_INDEX;
+ break;
+ case 'T':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_READONLY;
+ else
+ check_param.testflag|= T_READONLY;
+ break;
+ case 'U':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_UPDATE_STATE;
+ else
+ check_param.testflag|= T_UPDATE_STATE;
+ break;
+ case '#':
+ DBUG_SET_INITIAL(argument ? argument : "d:t:o,/tmp/maria_chk.trace");
+ break;
+ case 'V':
+ print_version();
+ exit(0);
+ case OPT_CORRECT_CHECKSUM:
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_CALC_CHECKSUM;
+ else
+ check_param.testflag|= T_CALC_CHECKSUM;
+ break;
+ case OPT_STATS_METHOD:
+ {
+ int method;
+ enum_handler_stats_method method_conv;
+ LINT_INIT(method_conv);
+ maria_stats_method_str= argument;
+ if ((method=find_type(argument, &maria_stats_method_typelib, 2)) <= 0)
+ {
+ fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
+ exit(1);
+ }
+ switch (method-1) {
+ case 0:
+ method_conv= MI_STATS_METHOD_NULLS_EQUAL;
+ break;
+ case 1:
+ method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ break;
+ case 2:
+ method_conv= MI_STATS_METHOD_IGNORE_NULLS;
+ break;
+ default: assert(0); /* Impossible */
+ }
+ check_param.stats_method= method_conv;
+ break;
+ }
+#ifdef DEBUG /* Only useful if debugging */
+ case OPT_START_CHECK_POS:
+ check_param.start_check_pos= strtoull(argument, NULL, 0);
+ break;
+#endif
+ case 'H':
+ my_print_help(my_long_options);
+ exit(0);
+ case '?':
+ usage();
+ exit(0);
+ }
+ return 0;
+}
+
+
+static void get_options(register int *argc,register char ***argv)
+{
+ int ho_error;
+
+ load_defaults("my", load_default_groups, argc, argv);
+ default_argv= *argv;
+ if (isatty(fileno(stdout)))
+ check_param.testflag|=T_WRITE_LOOP;
+
+ if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ /* If using repair, then update checksum if one uses --update-state */
+ if ((check_param.testflag & T_UPDATE_STATE) &&
+ (check_param.testflag & T_REP_ANY))
+ check_param.testflag|= T_CALC_CHECKSUM;
+
+ if (*argc == 0)
+ {
+ usage();
+ exit(-1);
+ }
+
+ if ((check_param.testflag & T_UNPACK) &&
+ (check_param.testflag & (T_QUICK | T_SORT_RECORDS)))
+ {
+ VOID(fprintf(stderr,
+ "%s: --unpack can't be used with --quick or --sort-records\n",
+ my_progname_short));
+ exit(1);
+ }
+ if ((check_param.testflag & T_READONLY) &&
+ (check_param.testflag &
+ (T_REP_ANY | T_STATISTICS | T_AUTO_INC |
+ T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE)))
+ {
+ VOID(fprintf(stderr,
+ "%s: Can't use --readonly when repairing or sorting\n",
+ my_progname_short));
+ exit(1);
+ }
+
+ if (init_tmpdir(&maria_chk_tmpdir, opt_tmpdir))
+ exit(1);
+
+ check_param.tmpdir=&maria_chk_tmpdir;
+
+ if (set_collation_name)
+ if (!(set_collation= get_charset_by_name(set_collation_name,
+ MYF(MY_WME))))
+ exit(1);
+
+ return;
+} /* get options */
+
+
+ /* Check table */
+
+static int maria_chk(HA_CHECK *param, char *filename)
+{
+ int error,lock_type,recreate;
+ int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS);
+ MARIA_HA *info;
+ File datafile;
+ char llbuff[22],llbuff2[22];
+ my_bool state_updated=0;
+ MARIA_SHARE *share;
+ DBUG_ENTER("maria_chk");
+
+ param->out_flag=error=param->warning_printed=param->error_printed=
+ recreate=0;
+ datafile=0;
+ param->isam_file_name=filename; /* For error messages */
+ if (!(info=maria_open(filename,
+ (param->testflag & (T_DESCRIPT | T_READONLY)) ?
+ O_RDONLY : O_RDWR,
+ HA_OPEN_FOR_REPAIR |
+ ((param->testflag & T_WAIT_FOREVER) ?
+ HA_OPEN_WAIT_IF_LOCKED :
+ (param->testflag & T_DESCRIPT) ?
+ HA_OPEN_IGNORE_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED))))
+ {
+ /* Avoid twice printing of isam file name */
+ param->error_printed=1;
+ switch (my_errno) {
+ case HA_ERR_CRASHED:
+ _ma_check_print_error(param,"'%s' doesn't have a correct index definition. You need to recreate it before you can do a repair",filename);
+ break;
+ case HA_ERR_NOT_A_TABLE:
+ _ma_check_print_error(param,"'%s' is not a MARIA-table",filename);
+ break;
+ case HA_ERR_CRASHED_ON_USAGE:
+ _ma_check_print_error(param,"'%s' is marked as crashed",filename);
+ break;
+ case HA_ERR_CRASHED_ON_REPAIR:
+ _ma_check_print_error(param,"'%s' is marked as crashed after last repair",filename);
+ break;
+ case HA_ERR_OLD_FILE:
+ _ma_check_print_error(param,"'%s' is a old type of MARIA-table", filename);
+ break;
+ case HA_ERR_NEW_FILE:
+ _ma_check_print_error(param,"'%s' uses new features not supported by this version of the MARIA library", filename);
+ break;
+ case HA_ERR_END_OF_FILE:
+ _ma_check_print_error(param,"Couldn't read complete header from '%s'", filename);
+ break;
+ case EAGAIN:
+ _ma_check_print_error(param,"'%s' is locked. Use -w to wait until unlocked",filename);
+ break;
+ case ENOENT:
+ _ma_check_print_error(param,"File '%s' doesn't exist",filename);
+ break;
+ case EACCES:
+ _ma_check_print_error(param,"You don't have permission to use '%s'",
+ filename);
+ break;
+ default:
+ _ma_check_print_error(param,"%d when opening MARIA-table '%s'",
+ my_errno,filename);
+ break;
+ }
+ DBUG_RETURN(1);
+ }
+ share= info->s;
+ share->tot_locks-= share->r_locks;
+ share->r_locks=0;
+ maria_block_size= share->base.block_size;
+
+ if (share->data_file_type == BLOCK_RECORD ||
+ ((param->testflag & T_UNPACK) &&
+ share->state.header.org_data_file_type == BLOCK_RECORD))
+ {
+ if (param->testflag & T_SORT_RECORDS)
+ {
+ _ma_check_print_error(param,
+ "Record format used by '%s' is is not yet supported with repair/check",
+ filename);
+ param->error_printed= 0;
+ error= 1;
+ goto end2;
+ }
+ /* We can't do parallell repair with BLOCK_RECORD yet */
+ if (param->testflag & T_REP_PARALLEL)
+ {
+ param->testflag&= ~T_REP_PARALLEL;
+ param->testflag|= T_REP_BY_SORT;
+ }
+ }
+
+ /*
+ Skip the checking of the file if:
+ We are using --fast and the table is closed properly
+ We are using --check-only-changed-tables and the table hasn't changed
+ */
+ if (param->testflag & (T_FAST | T_CHECK_ONLY_CHANGED))
+ {
+ my_bool need_to_check= (maria_is_crashed(info) ||
+ share->state.open_count != 0);
+
+ if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) &&
+ ((share->state.changed & (STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR) ||
+ !(param->testflag & T_CHECK_ONLY_CHANGED))))
+ need_to_check=1;
+
+ if (info->s->base.keys && info->state->records)
+ {
+ if ((param->testflag & T_STATISTICS) &&
+ (share->state.changed & STATE_NOT_ANALYZED))
+ need_to_check=1;
+ if ((param->testflag & T_SORT_INDEX) &&
+ (share->state.changed & STATE_NOT_SORTED_PAGES))
+ need_to_check=1;
+ if ((param->testflag & T_REP_BY_SORT) &&
+ (share->state.changed & STATE_NOT_OPTIMIZED_KEYS))
+ need_to_check=1;
+ }
+ if ((param->testflag & T_CHECK_ONLY_CHANGED) &&
+ (share->state.changed & (STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR)))
+ need_to_check=1;
+ if (!need_to_check)
+ {
+ if (!(param->testflag & T_SILENT) || param->testflag & T_INFO)
+ printf("MARIA file: %s is already checked\n",filename);
+ if (maria_close(info))
+ {
+ _ma_check_print_error(param,"%d when closing MARIA-table '%s'",
+ my_errno,filename);
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+ }
+ }
+ if ((param->testflag & (T_REP_ANY | T_STATISTICS |
+ T_SORT_RECORDS | T_SORT_INDEX)) &&
+ (((param->testflag & T_UNPACK) &&
+ share->data_file_type == COMPRESSED_RECORD) ||
+ mi_uint2korr(share->state.header.state_info_length) !=
+ MARIA_STATE_INFO_SIZE ||
+ mi_uint2korr(share->state.header.base_info_length) !=
+ MARIA_BASE_INFO_SIZE ||
+ maria_is_any_intersect_keys_active(param->keys_in_use, share->base.keys,
+ ~share->state.key_map) ||
+ maria_test_if_almost_full(info) ||
+ info->s->state.header.file_version[3] != maria_file_magic[3] ||
+ (set_collation &&
+ set_collation->number != share->state.header.language)))
+ {
+ if (set_collation)
+ param->language= set_collation->number;
+ if (maria_recreate_table(param, &info,filename))
+ {
+ VOID(fprintf(stderr,
+ "MARIA-table '%s' is not fixed because of errors\n",
+ filename));
+ return(-1);
+ }
+ recreate=1;
+ if (!(param->testflag & T_REP_ANY))
+ {
+ param->testflag|=T_REP_BY_SORT; /* if only STATISTICS */
+ if (!(param->testflag & T_SILENT))
+ printf("- '%s' has old table-format. Recreating index\n",filename);
+ rep_quick|=T_QUICK;
+ }
+ share= info->s;
+ share->tot_locks-= share->r_locks;
+ share->r_locks=0;
+ }
+
+ if (param->testflag & T_DESCRIPT)
+ {
+ param->total_files++;
+ param->total_records+=info->state->records;
+ param->total_deleted+=info->state->del;
+ descript(param, info, filename);
+ maria_close(info); /* Should always succeed */
+ return(0);
+ }
+
+ if (!stopwords_inited++)
+ ft_init_stopwords();
+
+ if (!(param->testflag & T_READONLY))
+ lock_type = F_WRLCK; /* table is changed */
+ else
+ lock_type= F_RDLCK;
+ if (info->lock_type == F_RDLCK)
+ info->lock_type=F_UNLCK; /* Read only table */
+ if (_ma_readinfo(info,lock_type,0))
+ {
+ _ma_check_print_error(param,"Can't lock indexfile of '%s', error: %d",
+ filename,my_errno);
+ param->error_printed=0;
+ error= 1;
+ goto end2;
+ }
+ /*
+ _ma_readinfo() has locked the table.
+ We mark the table as locked (without doing file locks) to be able to
+ use functions that only works on locked tables (like row caching).
+ */
+ maria_lock_database(info, F_EXTRA_LCK);
+ datafile= info->dfile.file;
+ if (init_pagecache(maria_pagecache, param->use_buffers, 0, 0,
+ maria_block_size, MY_WME) == 0)
+ {
+ _ma_check_print_error(param, "Can't initialize page cache with %lu memory",
+ (ulong) param->use_buffers);
+ error= 1;
+ goto end2;
+ }
+
+ if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX))
+ {
+ /* Mark table as not transactional to avoid logging */
+ maria_disable_logging(info);
+
+ if (param->testflag & T_REP_ANY)
+ {
+ ulonglong tmp=share->state.key_map;
+ maria_copy_keys_active(share->state.key_map, share->base.keys,
+ param->keys_in_use);
+ if (tmp != share->state.key_map)
+ info->update|=HA_STATE_CHANGED;
+ }
+ if (rep_quick &&
+ maria_chk_del(param, info, param->testflag & ~T_VERBOSE))
+ {
+ if (param->testflag & T_FORCE_CREATE)
+ {
+ rep_quick=0;
+ _ma_check_print_info(param,"Creating new data file\n");
+ }
+ else
+ {
+ error=1;
+ _ma_check_print_error(param,
+ "Quick-recover aborted; Run recovery without switch 'q'");
+ }
+ }
+ if (!error)
+ {
+ /*
+ Tell the server's Recovery to ignore old REDOs on this table; we don't
+ know what the log's end LSN is now, so we just let the server know
+ that it will have to find and store it.
+ This is the only case where create_rename_lsn can be a horizon and not
+ a LSN.
+ */
+ if (share->base.born_transactional)
+ share->state.create_rename_lsn= share->state.is_of_horizon=
+ LSN_REPAIRED_BY_MARIA_CHK;
+ if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) &&
+ (maria_is_any_key_active(share->state.key_map) ||
+ (rep_quick && !param->keys_in_use && !recreate)) &&
+ maria_test_if_sort_rep(info, info->state->records,
+ info->s->state.key_map,
+ param->force_sort))
+ {
+ if (param->testflag & T_REP_BY_SORT)
+ error=maria_repair_by_sort(param,info,filename,rep_quick);
+ else
+ error=maria_repair_parallel(param,info,filename,rep_quick);
+ state_updated=1;
+ }
+ else if (param->testflag & T_REP_ANY)
+ error=maria_repair(param, info,filename,rep_quick);
+ }
+ if (!error && param->testflag & T_SORT_RECORDS)
+ {
+ /*
+ The data file is nowadays reopened in the repair code so we should
+ soon remove the following reopen-code
+ */
+#ifndef TO_BE_REMOVED
+ if (param->out_flag & O_NEW_DATA)
+ { /* Change temp file to org file */
+ VOID(my_close(info->dfile.file, MYF(MY_WME))); /* Close new file */
+ error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT,
+ MYF(0));
+ if (_ma_open_datafile(info,info->s, -1))
+ error=1;
+ param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */
+ param->read_cache.file= info->dfile.file;
+ }
+#endif
+ if (! error)
+ {
+ uint key;
+ /*
+ We can't update the index in maria_sort_records if we have a
+ prefix compressed or fulltext index
+ */
+ my_bool update_index=1;
+ for (key=0 ; key < share->base.keys; key++)
+ if (share->keyinfo[key].flag & (HA_BINARY_PACK_KEY|HA_FULLTEXT))
+ update_index=0;
+
+ error=maria_sort_records(param,info,filename,param->opt_sort_key,
+ /* what is the following parameter for ? */
+ (my_bool) !(param->testflag & T_REP),
+ update_index);
+ datafile= info->dfile.file; /* This is now locked */
+ if (!error && !update_index)
+ {
+ if (param->verbose)
+ puts("Table had a compressed index; We must now recreate the index");
+ error=maria_repair_by_sort(param,info,filename,1);
+ }
+ }
+ }
+ if (!error && param->testflag & T_SORT_INDEX)
+ error=maria_sort_index(param,info,filename);
+ if (!error)
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ else
+ maria_mark_crashed(info);
+ }
+ else if ((param->testflag & T_CHECK) || !(param->testflag & T_AUTO_INC))
+ {
+ if (!(param->testflag & T_SILENT) || param->testflag & T_INFO)
+ printf("Checking MARIA file: %s\n",filename);
+ if (!(param->testflag & T_SILENT))
+ printf("Data records: %7s Deleted blocks: %7s\n",
+ llstr(info->state->records,llbuff),
+ llstr(info->state->del,llbuff2));
+ error =maria_chk_status(param,info);
+ maria_intersect_keys_active(share->state.key_map, param->keys_in_use);
+ error =maria_chk_size(param,info);
+ if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE)))
+ error|=maria_chk_del(param, info,param->testflag);
+ if ((!error || (!(param->testflag & (T_FAST | T_FORCE_CREATE)) &&
+ !param->start_check_pos)))
+ {
+ error|=maria_chk_key(param, info);
+ if (!error && (param->testflag & (T_STATISTICS | T_AUTO_INC)))
+ error=maria_update_state_info(param, info,
+ ((param->testflag & T_STATISTICS) ?
+ UPDATE_STAT : 0) |
+ ((param->testflag & T_AUTO_INC) ?
+ UPDATE_AUTO_INC : 0));
+ }
+ if ((!rep_quick && !error) ||
+ !(param->testflag & (T_FAST | T_FORCE_CREATE)))
+ {
+ VOID(init_io_cache(&param->read_cache,datafile,
+ (uint) param->read_buffer_length,
+ READ_CACHE,
+ (param->start_check_pos ?
+ param->start_check_pos :
+ share->pack.header_length),
+ 1,
+ MYF(MY_WME)));
+ maria_lock_memory(param);
+ if ((info->s->data_file_type != STATIC_RECORD) ||
+ (param->testflag & (T_EXTEND | T_MEDIUM)))
+ error|=maria_chk_data_link(param, info, param->testflag & T_EXTEND);
+ error|= _ma_flush_table_files_after_repair(param, info);
+ VOID(end_io_cache(&param->read_cache));
+ }
+ if (!error)
+ {
+ if ((share->state.changed & STATE_CHANGED) &&
+ (param->testflag & T_UPDATE_STATE))
+ info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ }
+ else if (!maria_is_crashed(info) &&
+ (param->testflag & T_UPDATE_STATE))
+ { /* Mark crashed */
+ maria_mark_crashed(info);
+ info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+ }
+
+ if ((param->testflag & T_AUTO_INC) ||
+ ((param->testflag & T_REP_ANY) && info->s->base.auto_key))
+ _ma_update_auto_increment_key(param, info,
+ (my_bool) !test(param->testflag & T_AUTO_INC));
+
+ if (info->update & HA_STATE_CHANGED && ! (param->testflag & T_READONLY))
+ error|=maria_update_state_info(param, info,
+ UPDATE_OPEN_COUNT |
+ (((param->testflag & T_REP_ANY) ?
+ UPDATE_TIME : 0) |
+ (state_updated ? UPDATE_STAT : 0) |
+ ((param->testflag & T_SORT_RECORDS) ?
+ UPDATE_SORT : 0)));
+ info->update&= ~HA_STATE_CHANGED;
+ maria_enable_logging(info);
+ maria_lock_database(info, F_UNLCK);
+
+end2:
+ end_pagecache(maria_pagecache, 1);
+ if (maria_close(info))
+ {
+ _ma_check_print_error(param,"%d when closing MARIA-table '%s'",
+ my_errno,filename);
+ DBUG_RETURN(1);
+ }
+ if (error == 0)
+ {
+ if (param->out_flag & O_NEW_DATA)
+ error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT,
+ ((param->testflag & T_BACKUP_DATA) ?
+ MYF(MY_REDEL_MAKE_BACKUP) : MYF(0)));
+ if (param->out_flag & O_NEW_INDEX)
+ error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT,
+ MYF(0));
+ }
+ VOID(fflush(stdout)); VOID(fflush(stderr));
+ if (param->error_printed)
+ {
+ if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX))
+ {
+ VOID(fprintf(stderr,
+ "MARIA-table '%s' is not fixed because of errors\n",
+ filename));
+ if (param->testflag & T_REP_ANY)
+ VOID(fprintf(stderr,
+ "Try fixing it by using the --safe-recover (-o), the --force (-f) option or by not using the --quick (-q) flag\n"));
+ }
+ else if (!(param->error_printed & 2) &&
+ !(param->testflag & T_FORCE_CREATE))
+ VOID(fprintf(stderr,
+ "MARIA-table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n",
+ filename));
+ }
+ else if (param->warning_printed &&
+ ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX |
+ T_FORCE_CREATE)))
+ VOID(fprintf(stderr, "MARIA-table '%s' is usable but should be fixed\n",
+ filename));
+ VOID(fflush(stderr));
+ DBUG_RETURN(error);
+} /* maria_chk */
+
+
+/* Write info about table */
+
+static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
+{
+ uint key,keyseg_nr,field;
+ reg3 MARIA_KEYDEF *keyinfo;
+ reg2 HA_KEYSEG *keyseg;
+ reg4 const char *text;
+ char buff[160],length[10],*pos,*end;
+ enum en_fieldtype type;
+ MARIA_SHARE *share= info->s;
+ char llbuff[22],llbuff2[22];
+ DBUG_ENTER("describe");
+
+ if (param->testflag & T_VERY_SILENT)
+ {
+ longlong checksum= info->state->checksum;
+ if (!(share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
+ checksum= 0;
+ printf("%s %s %s\n", name, llstr(info->state->records,llbuff),
+ llstr(checksum, llbuff2));
+ DBUG_VOID_RETURN;
+ }
+
+ printf("MARIA file: %s\n",name);
+ printf("Record format: %s\n", record_formats[share->data_file_type]);
+ printf("Crashsafe: %s\n",
+ share->base.born_transactional ? "yes" : "no");
+ printf("Character set: %s (%d)\n",
+ get_charset_name(share->state.header.language),
+ share->state.header.language);
+
+ if (param->testflag & T_VERBOSE)
+ {
+ printf("File-version: %d\n",
+ (int) share->state.header.file_version[3]);
+ if (share->state.create_time)
+ {
+ get_date(buff,1,share->state.create_time);
+ printf("Creation time: %s\n",buff);
+ }
+ if (share->state.check_time)
+ {
+ get_date(buff,1,share->state.check_time);
+ printf("Recover time: %s\n",buff);
+ }
+ pos=buff;
+ if (share->state.changed & STATE_CRASHED)
+ strmov(buff,"crashed");
+ else
+ {
+ if (share->state.open_count)
+ pos=strmov(pos,"open,");
+ if (share->state.changed & STATE_CHANGED)
+ pos=strmov(pos,"changed,");
+ else
+ pos=strmov(pos,"checked,");
+ if (!(share->state.changed & STATE_NOT_ANALYZED))
+ pos=strmov(pos,"analyzed,");
+ if (!(share->state.changed & STATE_NOT_OPTIMIZED_KEYS))
+ pos=strmov(pos,"optimized keys,");
+ if (!(share->state.changed & STATE_NOT_SORTED_PAGES))
+ pos=strmov(pos,"sorted index pages,");
+ pos[-1]=0; /* Remove extra ',' */
+ }
+ printf("Status: %s\n",buff);
+ if (share->base.auto_key)
+ {
+ printf("Auto increment key: %16d Last value: %18s\n",
+ share->base.auto_key,
+ llstr(share->state.auto_increment,llbuff));
+ }
+ if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ printf("Checksum: %26s\n",llstr(info->state->checksum,llbuff));
+;
+ if (share->options & HA_OPTION_DELAY_KEY_WRITE)
+ printf("Keys are only flushed at close\n");
+
+ if (share->options & HA_OPTION_PAGE_CHECKSUM)
+ printf("Page checksums are used\n");
+ }
+ printf("Data records: %16s Deleted blocks: %18s\n",
+ llstr(info->state->records,llbuff),llstr(info->state->del,llbuff2));
+ if (param->testflag & T_SILENT)
+ DBUG_VOID_RETURN; /* This is enough */
+
+ if (param->testflag & T_VERBOSE)
+ {
+#ifdef USE_RELOC
+ printf("Init-relocation: %16s\n",llstr(share->base.reloc,llbuff));
+#endif
+ printf("Datafile parts: %16s Deleted data: %18s\n",
+ llstr(share->state.split,llbuff),
+ llstr(info->state->empty,llbuff2));
+ printf("Datafile pointer (bytes): %11d Keyfile pointer (bytes): %13d\n",
+ share->rec_reflength,share->base.key_reflength);
+ printf("Datafile length: %16s Keyfile length: %18s\n",
+ llstr(info->state->data_file_length,llbuff),
+ llstr(info->state->key_file_length,llbuff2));
+
+ if (info->s->base.reloc == 1L && info->s->base.records == 1L)
+ puts("This is a one-record table");
+ else
+ {
+ if (share->base.max_data_file_length != HA_OFFSET_ERROR ||
+ share->base.max_key_file_length != HA_OFFSET_ERROR)
+ printf("Max datafile length: %16s Max keyfile length: %18s\n",
+ llstr(share->base.max_data_file_length-1,llbuff),
+ llstr(share->base.max_key_file_length-1,llbuff2));
+ }
+ }
+ printf("Block_size: %16d\n",(int) share->block_size);
+ printf("Recordlength: %16d\n",(int) share->base.pack_reclength);
+ if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
+ {
+ longlong2str(share->state.key_map,buff,2);
+ printf("Using only keys '%s' of %d possibly keys\n",
+ buff, share->base.keys);
+ }
+ puts("\ntable description:");
+ printf("Key Start Len Index Type");
+ if (param->testflag & T_VERBOSE)
+ printf(" Rec/key Root Blocksize");
+ VOID(putchar('\n'));
+
+ for (key=keyseg_nr=0, keyinfo= &share->keyinfo[0] ;
+ key < share->base.keys;
+ key++,keyinfo++)
+ {
+ keyseg=keyinfo->seg;
+ if (keyinfo->flag & HA_NOSAME) text="unique ";
+ else if (keyinfo->flag & HA_FULLTEXT) text="fulltext ";
+ else text="multip.";
+
+ pos=buff;
+ if (keyseg->flag & HA_REVERSE_SORT)
+ *pos++ = '-';
+ pos=strmov(pos,type_names[keyseg->type]);
+ *pos++ = ' ';
+ *pos=0;
+ if (keyinfo->flag & HA_PACK_KEY)
+ pos=strmov(pos,prefix_packed_txt);
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ pos=strmov(pos,bin_packed_txt);
+ if (keyseg->flag & HA_SPACE_PACK)
+ pos=strmov(pos,diff_txt);
+ if (keyseg->flag & HA_BLOB_PART)
+ pos=strmov(pos,blob_txt);
+ if (keyseg->flag & HA_NULL_PART)
+ pos=strmov(pos,null_txt);
+ *pos=0;
+
+ printf("%-4d%-6ld%-3d %-8s%-21s",
+ key+1,(long) keyseg->start+1,keyseg->length,text,buff);
+ if (share->state.key_root[key] != HA_OFFSET_ERROR)
+ llstr(share->state.key_root[key],buff);
+ else
+ buff[0]=0;
+ if (param->testflag & T_VERBOSE)
+ printf("%11.0g %12s %10d",
+ share->state.rec_per_key_part[keyseg_nr++],
+ buff,keyinfo->block_length);
+ VOID(putchar('\n'));
+ while ((++keyseg)->type != HA_KEYTYPE_END)
+ {
+ pos=buff;
+ if (keyseg->flag & HA_REVERSE_SORT)
+ *pos++ = '-';
+ pos=strmov(pos,type_names[keyseg->type]);
+ *pos++= ' ';
+ if (keyseg->flag & HA_SPACE_PACK)
+ pos=strmov(pos,diff_txt);
+ if (keyseg->flag & HA_BLOB_PART)
+ pos=strmov(pos,blob_txt);
+ if (keyseg->flag & HA_NULL_PART)
+ pos=strmov(pos,null_txt);
+ *pos=0;
+ printf(" %-6ld%-3d %-21s",
+ (long) keyseg->start+1,keyseg->length,buff);
+ if (param->testflag & T_VERBOSE)
+ printf("%11.0g", share->state.rec_per_key_part[keyseg_nr++]);
+ VOID(putchar('\n'));
+ }
+ keyseg++;
+ }
+ if (share->state.header.uniques)
+ {
+ MARIA_UNIQUEDEF *uniqueinfo;
+ puts("\nUnique Key Start Len Nullpos Nullbit Type");
+ for (key=0,uniqueinfo= &share->uniqueinfo[0] ;
+ key < share->state.header.uniques; key++, uniqueinfo++)
+ {
+ my_bool new_row=0;
+ char null_bit[8],null_pos[8];
+ printf("%-8d%-5d",key+1,uniqueinfo->key+1);
+ for (keyseg=uniqueinfo->seg ; keyseg->type != HA_KEYTYPE_END ; keyseg++)
+ {
+ if (new_row)
+ fputs(" ",stdout);
+ null_bit[0]=null_pos[0]=0;
+ if (keyseg->null_bit)
+ {
+ sprintf(null_bit,"%d",keyseg->null_bit);
+ sprintf(null_pos,"%ld",(long) keyseg->null_pos+1);
+ }
+ printf("%-7ld%-5d%-9s%-10s%-30s\n",
+ (long) keyseg->start+1,keyseg->length,
+ null_pos,null_bit,
+ type_names[keyseg->type]);
+ new_row=1;
+ }
+ }
+ }
+ if (param->verbose > 1)
+ {
+ char null_bit[8],null_pos[8];
+ printf("\nField Start Length Nullpos Nullbit Type");
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ printf(" Huff tree Bits");
+ VOID(putchar('\n'));
+
+ for (field=0 ; field < share->base.fields ; field++)
+ {
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ type=share->columndef[field].base_type;
+ else
+ type=(enum en_fieldtype) share->columndef[field].type;
+ end=strmov(buff,field_pack[type]);
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ {
+ if (share->columndef[field].pack_type & PACK_TYPE_SELECTED)
+ end=strmov(end,", not_always");
+ if (share->columndef[field].pack_type & PACK_TYPE_SPACE_FIELDS)
+ end=strmov(end,", no empty");
+ if (share->columndef[field].pack_type & PACK_TYPE_ZERO_FILL)
+ {
+ sprintf(end,", zerofill(%d)",share->columndef[field].space_length_bits);
+ end=strend(end);
+ }
+ }
+ if (buff[0] == ',')
+ strmov(buff,buff+2);
+ int10_to_str((long) share->columndef[field].length,length,10);
+ null_bit[0]=null_pos[0]=0;
+ if (share->columndef[field].null_bit)
+ {
+ sprintf(null_bit,"%d",share->columndef[field].null_bit);
+ sprintf(null_pos,"%d",share->columndef[field].null_pos+1);
+ }
+ printf("%-6d%-6u%-7s%-8s%-8s%-35s",field+1,
+ (uint) share->columndef[field].offset+1,
+ length, null_pos, null_bit, buff);
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ {
+ if (share->columndef[field].huff_tree)
+ printf("%3d %2d",
+ (uint) (share->columndef[field].huff_tree-share->decode_trees)+1,
+ share->columndef[field].huff_tree->quick_table_bits);
+ }
+ VOID(putchar('\n'));
+ }
+ }
+ DBUG_VOID_RETURN;
+} /* describe */
+
+
+ /* Sort records according to one key */
+
+static int maria_sort_records(HA_CHECK *param,
+ register MARIA_HA *info, char *name,
+ uint sort_key,
+ my_bool write_info,
+ my_bool update_index)
+{
+ int got_error;
+ uint key;
+ MARIA_KEYDEF *keyinfo;
+ File new_file;
+ uchar *temp_buff;
+ ha_rows old_record_count;
+ MARIA_SHARE *share= info->s;
+ char llbuff[22],llbuff2[22];
+ MARIA_SORT_INFO sort_info;
+ MARIA_SORT_PARAM sort_param;
+ DBUG_ENTER("sort_records");
+
+ bzero((char*)&sort_info,sizeof(sort_info));
+ bzero((char*)&sort_param,sizeof(sort_param));
+ sort_param.sort_info=&sort_info;
+ sort_info.param=param;
+ keyinfo= &share->keyinfo[sort_key];
+ got_error=1;
+ temp_buff=0;
+ new_file= -1;
+
+ if (! maria_is_key_active(share->state.key_map, sort_key))
+ {
+ _ma_check_print_warning(param,
+ "Can't sort table '%s' on key %d; No such key",
+ name,sort_key+1);
+ param->error_printed=0;
+ DBUG_RETURN(0); /* Nothing to do */
+ }
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ _ma_check_print_warning(param,"Can't sort table '%s' on FULLTEXT key %d",
+ name,sort_key+1);
+ param->error_printed=0;
+ DBUG_RETURN(0); /* Nothing to do */
+ }
+ if (share->data_file_type == COMPRESSED_RECORD)
+ {
+ _ma_check_print_warning(param,"Can't sort read-only table '%s'", name);
+ param->error_printed=0;
+ DBUG_RETURN(0); /* Nothing to do */
+ }
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- Sorting records for MARIA-table '%s'\n",name);
+ if (write_info)
+ printf("Data records: %9s Deleted: %9s\n",
+ llstr(info->state->records,llbuff),
+ llstr(info->state->del,llbuff2));
+ }
+ if (share->state.key_root[sort_key] == HA_OFFSET_ERROR)
+ DBUG_RETURN(0); /* Nothing to do */
+
+ if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length,
+ WRITE_CACHE,share->pack.header_length,1,
+ MYF(MY_WME | MY_WAIT_IF_FULL)))
+ goto err;
+ info->opt_flag|=WRITE_CACHE_USED;
+
+ if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not enough memory for key block");
+ goto err;
+ }
+ if (!(sort_param.record=(uchar*) my_malloc((uint) share->base.pack_reclength,
+ MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for record");
+ goto err;
+ }
+ fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32);
+ new_file= my_create(fn_format(param->temp_filename,
+ param->temp_filename,"",
+ DATA_TMP_EXT,
+ MY_REPLACE_EXT | MY_UNPACK_FILENAME),
+ 0, param->tmpfile_createflag,
+ MYF(0));
+ if (new_file < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (share->pack.header_length)
+ if (maria_filecopy(param, new_file, info->dfile.file, 0L,
+ share->pack.header_length,
+ "datafile-header"))
+ goto err;
+ info->rec_cache.file=new_file; /* Use this file for cacheing*/
+
+ maria_lock_memory(param);
+ for (key=0 ; key < share->base.keys ; key++)
+ share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME;
+
+ if (my_pread(share->kfile.file, temp_buff,
+ (uint) keyinfo->block_length,
+ share->state.key_root[sort_key],
+ MYF(MY_NABP+MY_WME)))
+ {
+ _ma_check_print_error(param, "Can't read indexpage from filepos: %s",
+ llstr(share->state.key_root[sort_key], llbuff));
+ goto err;
+ }
+
+ /* Setup param for _ma_sort_write_record */
+ sort_info.info=info;
+ sort_info.new_data_file_type=share->data_file_type;
+ sort_param.fix_datafile=1;
+ sort_param.master=1;
+ sort_param.filepos=share->pack.header_length;
+ old_record_count=info->state->records;
+ info->state->records=0;
+ if (sort_info.new_data_file_type != COMPRESSED_RECORD)
+ info->state->checksum=0;
+
+ if (sort_record_index(&sort_param,info,keyinfo,
+ share->state.key_root[sort_key],
+ temp_buff, sort_key,new_file,update_index) ||
+ maria_write_data_suffix(&sort_info,1) ||
+ flush_io_cache(&info->rec_cache))
+ goto err;
+
+ if (info->state->records != old_record_count)
+ {
+ _ma_check_print_error(param,"found %s of %s records",
+ llstr(info->state->records,llbuff),
+ llstr(old_record_count,llbuff2));
+ goto err;
+ }
+
+ VOID(my_close(info->dfile.file, MYF(MY_WME)));
+ param->out_flag|=O_NEW_DATA; /* Data in new file */
+ info->dfile.file= new_file; /* Use new datafile */
+ pagecache_file_init(info->dfile, &maria_page_crc_check_data,
+ (share->options & HA_OPTION_PAGE_CHECKSUM ?
+ &maria_page_crc_set_normal :
+ &maria_page_filler_set_normal),
+ &maria_page_write_failure, share);
+ info->state->del=0;
+ info->state->empty=0;
+ share->state.dellink= HA_OFFSET_ERROR;
+ info->state->data_file_length=sort_param.filepos;
+ share->state.split=info->state->records; /* Only hole records */
+ share->state.version=(ulong) time((time_t*) 0);
+
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+ got_error=0;
+
+err:
+ if (got_error && new_file >= 0)
+ {
+ VOID(end_io_cache(&info->rec_cache));
+ (void) my_close(new_file,MYF(MY_WME));
+ (void) my_delete(param->temp_filename, MYF(MY_WME));
+ }
+ if (temp_buff)
+ {
+ my_afree((uchar*) temp_buff);
+ }
+ my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ VOID(end_io_cache(&info->rec_cache));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ sort_info.buff=0;
+ share->state.sortkey=sort_key;
+ DBUG_RETURN(_ma_flush_table_files_after_repair(param, info) | got_error);
+} /* sort_records */
+
+
+/* Sort records recursive using one index */
+
+static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t page, uchar *buff, uint sort_key,
+ File new_file,my_bool update_index)
+{
+ MARIA_SHARE *share= info->s;
+ uint nod_flag,used_length,key_length;
+ uchar *temp_buff,*keypos,*endpos;
+ my_off_t next_page,rec_pos;
+ uchar lastkey[HA_MAX_KEY_BUFF];
+ char llbuff[22];
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ DBUG_ENTER("sort_record_index");
+
+ nod_flag=_ma_test_if_nod(share, buff);
+ temp_buff=0;
+
+ if (nod_flag)
+ {
+ if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not Enough memory");
+ DBUG_RETURN(-1);
+ }
+ }
+ used_length= _ma_get_page_used(share, buff);
+ keypos= buff + share->keypage_header + nod_flag;
+ endpos= buff + used_length;
+ for ( ;; )
+ {
+ _sanity(__FILE__,__LINE__);
+ if (nod_flag)
+ {
+ next_page= _ma_kpos(nod_flag, keypos);
+ if (my_pread(share->kfile.file, (uchar*)temp_buff,
+ (uint) keyinfo->block_length, next_page,
+ MYF(MY_NABP+MY_WME)))
+ {
+ _ma_check_print_error(param,"Can't read keys from filepos: %s",
+ llstr(next_page,llbuff));
+ goto err;
+ }
+ if (sort_record_index(sort_param, info,keyinfo,next_page,temp_buff,
+ sort_key,
+ new_file, update_index))
+ goto err;
+ }
+ _sanity(__FILE__,__LINE__);
+ if (keypos >= endpos ||
+ (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey))
+ == 0)
+ break;
+ rec_pos= _ma_dpos(info,0,lastkey+key_length);
+
+ if ((*share->read_record)(info,sort_param->record,rec_pos))
+ {
+ _ma_check_print_error(param,"%d when reading datafile",my_errno);
+ goto err;
+ }
+ if (rec_pos != sort_param->filepos && update_index)
+ {
+ _ma_dpointer(info,keypos-nod_flag-share->rec_reflength,
+ sort_param->filepos);
+ if (maria_movepoint(info,sort_param->record,rec_pos,sort_param->filepos,
+ sort_key))
+ {
+ _ma_check_print_error(param,"%d when updating key-pointers",my_errno);
+ goto err;
+ }
+ }
+ if (_ma_sort_write_record(sort_param))
+ goto err;
+ }
+ /* Clear end of block to get better compression if the table is backuped */
+ bzero((uchar*) buff+used_length,keyinfo->block_length-used_length);
+ if (my_pwrite(share->kfile.file, (uchar*)buff, (uint)keyinfo->block_length,
+ page,param->myf_rw))
+ {
+ _ma_check_print_error(param,"%d when updating keyblock",my_errno);
+ goto err;
+ }
+ if (temp_buff)
+ my_afree((uchar*) temp_buff);
+ DBUG_RETURN(0);
+err:
+ if (temp_buff)
+ my_afree((uchar*) temp_buff);
+ DBUG_RETURN(1);
+} /* sort_record_index */
+
+
+#include "ma_check_standalone.h"
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
new file mode 100644
index 00000000000..0fc7e327781
--- /dev/null
+++ b/storage/maria/maria_def.h
@@ -0,0 +1,1095 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* This file is included by all internal maria files */
+
+#include "maria.h" /* Structs & some defines */
+#include <myisampack.h> /* packing of keys */
+#include <my_tree.h>
+#include <my_bitmap.h>
+#ifdef THREAD
+#include <my_pthread.h>
+#include <thr_lock.h>
+#else
+#include <my_no_pthread.h>
+#endif
+#include "ma_loghandler.h"
+#include "ma_control_file.h"
+
+/* For testing recovery */
+#ifndef DBUG_OFF
+#define IDENTICAL_PAGES_AFTER_RECOVERY 1
+#endif
+/* Do extra sanity checking */
+#define SANITY_CHECKS 1
+#ifdef EXTRA_DEBUG
+#define EXTRA_DEBUG_KEY_CHANGES
+#endif
+
+#define MAX_NONMAPPED_INSERTS 1000
+#define MARIA_MAX_TREE_LEVELS 32
+#define MARIA_MAX_CONTROL_FILE_LOCK_RETRY 30 /* Retry this many times */
+
+struct st_transaction;
+
+/* undef map from my_nosys; We need test-if-disk full */
+#undef my_write
+
+#define CRC_SIZE 4
+
+typedef struct st_maria_status_info
+{
+ ha_rows records; /* Rows in table */
+ ha_rows del; /* Removed rows */
+ my_off_t empty; /* lost space in datafile */
+ my_off_t key_empty; /* lost space in indexfile */
+ my_off_t key_file_length;
+ my_off_t data_file_length;
+ ha_checksum checksum;
+} MARIA_STATUS_INFO;
+
+typedef struct st_maria_state_info
+{
+ struct
+ { /* Fileheader */
+ uchar file_version[4];
+ uchar options[2];
+ uchar header_length[2];
+ uchar state_info_length[2];
+ uchar base_info_length[2];
+ uchar base_pos[2];
+ uchar key_parts[2]; /* Key parts */
+ uchar unique_key_parts[2]; /* Key parts + unique parts */
+ uchar keys; /* number of keys in file */
+ uchar uniques; /* number of UNIQUE definitions */
+ uchar language; /* Language for indexes */
+ uchar fulltext_keys;
+ uchar data_file_type;
+ /* Used by mariapack to store the original data_file_type */
+ uchar org_data_file_type;
+ } header;
+
+ MARIA_STATUS_INFO state;
+ ha_rows split; /* number of split blocks */
+ my_off_t dellink; /* Link to next removed block */
+ ulonglong first_bitmap_with_space;
+ ulonglong auto_increment;
+ ulong process; /* process that updated table last */
+ ulong unique; /* Unique number for this process */
+ ulong update_count; /* Updated for each write lock */
+ ulong status;
+ double *rec_per_key_part;
+ ulong *nulls_per_key_part;
+ ha_checksum checksum; /* Table checksum */
+ my_off_t *key_root; /* Start of key trees */
+ my_off_t key_del; /* delete links for index pages */
+ my_off_t records_at_analyze; /* Rows when calculating rec_per_key */
+
+ ulong sec_index_changed; /* Updated when new sec_index */
+ ulong sec_index_used; /* which extra index are in use */
+ ulonglong key_map; /* Which keys are in use */
+ ulong version; /* timestamp of create */
+ time_t create_time; /* Time when created database */
+ time_t recover_time; /* Time for last recover */
+ time_t check_time; /* Time for last check */
+ uint sortkey; /* sorted by this key (not used) */
+ uint open_count;
+ uint8 changed; /* Changed since mariachk */
+ LSN create_rename_lsn; /**< LSN when table was last created/renamed */
+ /** @brief Log horizon when state was last updated on disk */
+ TRANSLOG_ADDRESS is_of_horizon;
+
+ /* the following isn't saved on disk */
+ uint state_diff_length; /* Should be 0 */
+ uint state_length; /* Length of state header in file */
+ ulong *key_info;
+} MARIA_STATE_INFO;
+
+
+#define MARIA_STATE_INFO_SIZE \
+ (24 + LSN_STORE_SIZE*2 + 4 + 11*8 + 4*4 + 8 + 3*4 + 5*8)
+#define MARIA_STATE_KEY_SIZE (8 + 4)
+#define MARIA_STATE_KEYBLOCK_SIZE 8
+#define MARIA_STATE_KEYSEG_SIZE 12
+#define MARIA_STATE_EXTRA_SIZE (MARIA_MAX_KEY*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE)
+#define MARIA_KEYDEF_SIZE (2+ 5*2)
+#define MARIA_UNIQUEDEF_SIZE (2+1+1)
+#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
+#define MARIA_COLUMNDEF_SIZE (2*7+1+1+4)
+#define MARIA_BASE_INFO_SIZE (MY_UUID_SIZE + 5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16)
+#define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
+/* Internal management bytes needed to store 2 keys on an index page */
+#define MARIA_INDEX_OVERHEAD_SIZE (TRANSID_SIZE * 2)
+#define MARIA_DELETE_KEY_NR 255 /* keynr for deleted blocks */
+
+/*
+ Basic information of the Maria table. This is stored on disk
+ and not changed (unless we do DLL changes).
+*/
+
+typedef struct st_ma_base_info
+{
+ my_off_t keystart; /* Start of keys */
+ my_off_t max_data_file_length;
+ my_off_t max_key_file_length;
+ my_off_t margin_key_file_length;
+ ha_rows records, reloc; /* Create information */
+ ulong mean_row_length; /* Create information */
+ ulong reclength; /* length of unpacked record */
+ ulong pack_reclength; /* Length of full packed rec */
+ ulong min_pack_length;
+ ulong max_pack_length; /* Max possibly length of packed rec */
+ ulong min_block_length;
+ uint fields; /* fields in table */
+ uint fixed_not_null_fields;
+ uint fixed_not_null_fields_length;
+ uint max_field_lengths;
+ uint pack_fields; /* packed fields in table */
+ uint varlength_fields; /* char/varchar/blobs */
+ /* Number of bytes in the index used to refer to a row (2-8) */
+ uint rec_reflength;
+ /* Number of bytes in the index used to refer to another index page (2-8) */
+ uint key_reflength; /* = 2-8 */
+ uint keys; /* same as in state.header */
+ uint auto_key; /* Which key-1 is a auto key */
+ uint blobs; /* Number of blobs */
+ /* Length of packed bits (when table was created first time) */
+ uint pack_bytes;
+ /* Length of null bits (when table was created first time) */
+ uint original_null_bytes;
+ uint null_bytes; /* Null bytes in record */
+ uint field_offsets; /* Number of field offsets */
+ uint max_key_block_length; /* Max block length */
+ uint max_key_length; /* Max key length */
+ /* Extra allocation when using dynamic record format */
+ uint extra_alloc_bytes;
+ uint extra_alloc_procent;
+ uint is_nulls_extended; /* 1 if new null bytes */
+ uint default_row_flag; /* 0 or ROW_FLAG_NULLS_EXTENDED */
+ uint block_size;
+ /* Size of initial record buffer */
+ uint default_rec_buff_size;
+ /* Extra number of bytes the row format require in the record buffer */
+ uint extra_rec_buff_size;
+ /* Tuning flags that can be ignored by older Maria versions */
+ uint extra_options;
+
+ /* The following are from the header */
+ uint key_parts, all_key_parts;
+ uchar uuid[MY_UUID_SIZE];
+ /**
+ @brief If false, we disable logging, versioning, transaction etc. Observe
+ difference with MARIA_SHARE::now_transactional
+ */
+ my_bool born_transactional;
+} MARIA_BASE_INFO;
+
+
+/* Structs used intern in database */
+
+typedef struct st_maria_blob /* Info of record */
+{
+ ulong offset; /* Offset to blob in record */
+ uint pack_length; /* Type of packed length */
+ ulong length; /* Calc:ed for each record */
+} MARIA_BLOB;
+
+
+typedef struct st_maria_pack
+{
+ ulong header_length;
+ uint ref_length;
+ uchar version;
+} MARIA_PACK;
+
+typedef struct st_maria_file_bitmap
+{
+ uchar *map;
+ ulonglong page; /* Page number for current bitmap */
+ uint used_size; /* Size of bitmap head that is not 0 */
+ my_bool changed; /* 1 if page needs to be flushed */
+ my_bool flush_all_requested; /**< If _ma_bitmap_flush_all waiting */
+ uint non_flushable; /**< 0 if bitmap and log are in sync */
+ PAGECACHE_FILE file; /* datafile where bitmap is stored */
+
+#ifdef THREAD
+ pthread_mutex_t bitmap_lock;
+ pthread_cond_t bitmap_cond; /**< When bitmap becomes flushable */
+#endif
+ /* Constants, allocated when initiating bitmaps */
+ uint sizes[8]; /* Size per bit combination */
+ uint total_size; /* Total usable size of bitmap page */
+ uint block_size; /* Block size of file */
+ ulong pages_covered; /* Pages covered by bitmap + 1 */
+ DYNAMIC_ARRAY pinned_pages; /**< not-yet-flushable bitmap pages */
+} MARIA_FILE_BITMAP;
+
+#define MARIA_CHECKPOINT_LOOKS_AT_ME 1
+#define MARIA_CHECKPOINT_SHOULD_FREE_ME 2
+#define MARIA_CHECKPOINT_SEEN_IN_LOOP 4
+
+typedef struct st_maria_share
+{ /* Shared between opens */
+ MARIA_STATE_INFO state;
+ MARIA_BASE_INFO base;
+ MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key
+ definition */
+ MARIA_KEYDEF *keyinfo; /* Key definitions */
+ MARIA_UNIQUEDEF *uniqueinfo; /* unique definitions */
+ HA_KEYSEG *keyparts; /* key part info */
+ MARIA_COLUMNDEF *columndef; /* Pointer to column information */
+ MARIA_PACK pack; /* Data about packed records */
+ MARIA_BLOB *blobs; /* Pointer to blobs */
+ uint16 *column_nr; /* Original column order */
+ char *unique_file_name; /* realpath() of index file */
+ char *data_file_name; /* Resolved path names from symlinks */
+ char *index_file_name;
+ char *open_file_name; /* parameter to open filename */
+ uchar *file_map; /* mem-map of file if possible */
+ PAGECACHE *pagecache; /* ref to the current key cache */
+ MARIA_DECODE_TREE *decode_trees;
+ uint16 *decode_tables;
+ uint16 id; /**< 2-byte id by which log records refer to the table */
+ /* Called the first time the table instance is opened */
+ my_bool (*once_init)(struct st_maria_share *, File);
+ /* Called when the last instance of the table is closed */
+ my_bool (*once_end)(struct st_maria_share *);
+ /* Is called for every open of the table */
+ my_bool (*init)(MARIA_HA *);
+ /* Is called for every close of the table */
+ void (*end)(MARIA_HA *);
+ /* Called when we want to read a record from a specific position */
+ int (*read_record)(MARIA_HA *, uchar *, MARIA_RECORD_POS);
+ /* Initialize a scan */
+ my_bool (*scan_init)(MARIA_HA *);
+ /* Read next record while scanning */
+ int (*scan)(MARIA_HA *, uchar *, MARIA_RECORD_POS, my_bool);
+ /* End scan */
+ void (*scan_end)(MARIA_HA *);
+ int (*scan_remember_pos)(MARIA_HA *, MARIA_RECORD_POS*);
+ void (*scan_restore_pos)(MARIA_HA *, MARIA_RECORD_POS);
+ /* Pre-write of row (some handlers may do the actual write here) */
+ MARIA_RECORD_POS (*write_record_init)(MARIA_HA *, const uchar *);
+ /* Write record (or accept write_record_init) */
+ my_bool (*write_record)(MARIA_HA *, const uchar *);
+ /* Called when write failed */
+ my_bool (*write_record_abort)(MARIA_HA *);
+ my_bool (*update_record)(MARIA_HA *, MARIA_RECORD_POS,
+ const uchar *, const uchar *);
+ my_bool (*delete_record)(MARIA_HA *, const uchar *record);
+ my_bool (*compare_record)(MARIA_HA *, const uchar *);
+ /* calculate checksum for a row */
+ ha_checksum(*calc_checksum)(MARIA_HA *, const uchar *);
+ /*
+ Calculate checksum for a row during write. May be 0 if we calculate
+ the checksum in write_record_init()
+ */
+ ha_checksum(*calc_write_checksum)(MARIA_HA *, const uchar *);
+ /* calculate checksum for a row during check table */
+ ha_checksum(*calc_check_checksum)(MARIA_HA *, const uchar *);
+ /* Compare a row in memory with a row on disk */
+ my_bool (*compare_unique)(MARIA_HA *, MARIA_UNIQUEDEF *,
+ const uchar *record, MARIA_RECORD_POS pos);
+ my_off_t (*keypos_to_recpos)(MARIA_HA *info, my_off_t pos);
+ my_off_t (*recpos_to_keypos)(MARIA_HA *info, my_off_t pos);
+
+ /* Mapings to read/write the data file */
+ size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf);
+ size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf);
+ invalidator_by_filename invalidator; /* query cache invalidator */
+ my_off_t current_key_del; /* delete links for index pages */
+ ulong this_process; /* processid */
+ ulong last_process; /* For table-change-check */
+ ulong last_version; /* Version on start */
+ ulong options; /* Options used */
+ ulong min_pack_length; /* These are used by packed data */
+ ulong max_pack_length;
+ ulong state_diff_length;
+ uint rec_reflength; /* rec_reflength in use now */
+ uint unique_name_length;
+ uint keypage_header;
+ uint32 ftparsers; /* Number of distinct ftparsers
+ + 1 */
+ PAGECACHE_FILE kfile; /* Shared keyfile */
+ File data_file; /* Shared data file */
+ int mode; /* mode of file on open */
+ uint reopen; /* How many times reopened */
+ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */
+ uint block_size; /* block_size of keyfile & data file*/
+ /* Fixed length part of a packed row in BLOCK_RECORD format */
+ uint base_length;
+ myf write_flag;
+ enum data_file_type data_file_type;
+ enum pagecache_page_type page_type; /* value depending transactional */
+ uint8 in_checkpoint; /**< if Checkpoint looking at table */
+ my_bool temporary;
+ /* Below flag is needed to make log tables work with concurrent insert */
+ my_bool is_log_table;
+
+ my_bool changed, /* If changed since lock */
+ global_changed, /* If changed since open */
+ not_flushed, concurrent_insert;
+ my_bool delay_key_write;
+ my_bool have_rtree;
+ /**
+ @brief if the table is transactional right now. It may have been created
+ transactional (base.born_transactional==TRUE) but with transactionality
+ (logging) temporarily disabled (now_transactional==FALSE). The opposite
+ (FALSE, TRUE) is impossible.
+ */
+ my_bool now_transactional;
+ my_bool used_key_del; /* != 0 if key_del is locked */
+#ifdef THREAD
+ THR_LOCK lock;
+ pthread_mutex_t intern_lock; /* Locking for use with _locking */
+ pthread_cond_t intern_cond;
+ rw_lock_t *key_root_lock;
+#endif
+ my_off_t mmaped_length;
+ uint nonmmaped_inserts; /* counter of writing in
+ non-mmaped area */
+ MARIA_FILE_BITMAP bitmap;
+ rw_lock_t mmap_lock;
+ LSN lsn_of_file_id; /**< LSN of its last LOGREC_FILE_ID */
+} MARIA_SHARE;
+
+
+typedef uchar MARIA_BITMAP_BUFFER;
+
+typedef struct st_maria_bitmap_block
+{
+ ulonglong page; /* Page number */
+ /* Number of continuous pages. TAIL_BIT is set if this is a tail page */
+ uint page_count;
+ uint empty_space; /* Set for head and tail pages */
+ /*
+ Number of BLOCKS for block-region (holds all non-blob-fields or one blob)
+ */
+ uint sub_blocks;
+ /* set to <> 0 in write_record() if this block was actually used */
+ uint8 used;
+ uint8 org_bitmap_value;
+} MARIA_BITMAP_BLOCK;
+
+
+typedef struct st_maria_bitmap_blocks
+{
+ MARIA_BITMAP_BLOCK *block;
+ uint count;
+ my_bool tail_page_skipped; /* If some tail pages was not used */
+ my_bool page_skipped; /* If some full pages was not used */
+} MARIA_BITMAP_BLOCKS;
+
+
+/* Data about the currently read row */
+typedef struct st_maria_row
+{
+ MARIA_BITMAP_BLOCKS insert_blocks;
+ MARIA_BITMAP_BUFFER *extents;
+ MARIA_RECORD_POS lastpos, nextpos;
+ MARIA_RECORD_POS *tail_positions;
+ ha_checksum checksum;
+ LSN orig_undo_lsn; /* Lsn at start of row insert */
+ uchar *empty_bits, *field_lengths;
+ uint *null_field_lengths; /* All null field lengths */
+ ulong *blob_lengths; /* Length for each blob */
+ ulong base_length, normal_length, char_length, varchar_length, blob_length;
+ ulong head_length, total_length;
+ size_t extents_buffer_length; /* Size of 'extents' buffer */
+ uint field_lengths_length; /* Length of data in field_lengths */
+ uint extents_count; /* number of extents in 'extents' */
+ uint full_page_count, tail_count; /* For maria_chk */
+ uint space_on_head_page;
+} MARIA_ROW;
+
+/* Data to scan row in blocked format */
+typedef struct st_maria_block_scan
+{
+ uchar *bitmap_buff, *bitmap_pos, *bitmap_end, *page_buff;
+ uchar *dir, *dir_end;
+ ulong bitmap_page;
+ ulonglong bits;
+ uint number_of_rows, bit_pos;
+ MARIA_RECORD_POS row_base_page;
+} MARIA_BLOCK_SCAN;
+
+
+struct st_maria_handler
+{
+ MARIA_SHARE *s; /* Shared between open:s */
+ struct st_transaction *trn; /* Pointer to active transaction */
+ MARIA_STATUS_INFO *state, save_state;
+ MARIA_ROW cur_row; /* The active row that we just read */
+ MARIA_ROW new_row; /* Storage for a row during update */
+ MARIA_BLOCK_SCAN scan, *scan_save;
+ MARIA_BLOB *blobs; /* Pointer to blobs */
+ MARIA_BIT_BUFF bit_buff;
+ DYNAMIC_ARRAY bitmap_blocks;
+ DYNAMIC_ARRAY pinned_pages;
+ /* accumulate indexfile changes between write's */
+ TREE *bulk_insert;
+ LEX_STRING *log_row_parts; /* For logging */
+ DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
+ MEM_ROOT ft_memroot; /* used by the parser */
+ MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ LSN *key_write_undo_lsn; /* Pointer to undo for each key */
+ LSN *key_delete_undo_lsn; /* Pointer to undo for each key */
+ uchar *buff; /* page buffer */
+ uchar *keyread_buff; /* Buffer for last key read */
+ uchar *lastkey, *lastkey2; /* Last used search key */
+ uchar *first_mbr_key; /* Searhed spatial key */
+ uchar *rec_buff; /* Temp buffer for recordpack */
+ uchar *int_keypos, /* Save position for next/previous */
+ *int_maxpos; /* -""- */
+ uchar *update_field_data; /* Used by update in rows-in-block */
+ uint int_nod_flag; /* -""- */
+ uint32 int_keytree_version; /* -""- */
+ int (*read_record)(MARIA_HA *, uchar*, MARIA_RECORD_POS);
+ invalidator_by_filename invalidator; /* query cache invalidator */
+ ulong this_unique; /* uniq filenumber or thread */
+ ulong last_unique; /* last unique number */
+ ulong this_loop; /* counter for this open */
+ ulong last_loop; /* last used counter */
+ MARIA_RECORD_POS save_lastpos;
+ MARIA_RECORD_POS dup_key_pos;
+ my_off_t pos; /* Intern variable */
+ my_off_t last_keypage; /* Last key page read */
+ my_off_t last_search_keypage; /* Last keypage when searching */
+
+ /*
+ QQ: the folloing two xxx_length fields should be removed,
+ as they are not compatible with parallel repair
+ */
+ ulong packed_length, blob_length; /* Length of found, packed record */
+ size_t rec_buff_size;
+ PAGECACHE_FILE dfile; /* The datafile */
+ IO_CACHE rec_cache; /* When cacheing records */
+ LIST open_list;
+ MY_BITMAP changed_fields;
+ uint opt_flag; /* Optim. for space/speed */
+ uint update; /* If file changed since open */
+ int lastinx; /* Last used index */
+ uint lastkey_length; /* Length of key in lastkey */
+ uint last_rkey_length; /* Last length in maria_rkey() */
+ enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */
+ uint save_lastkey_length;
+ uint pack_key_length; /* For MARIAMRG */
+ myf lock_wait; /* is 0 or MY_SHORT_WAIT */
+ int errkey; /* Got last error on this key */
+ int lock_type; /* How database was locked */
+ int tmp_lock_type; /* When locked by readinfo */
+ uint data_changed; /* Somebody has changed data */
+ uint save_update; /* When using KEY_READ */
+ int save_lastinx;
+ uint preload_buff_size; /* When preloading indexes */
+ uint16 last_used_keyseg; /* For MARIAMRG */
+ uint8 used_key_del; /* != 0 if key_del is used */
+ my_bool was_locked; /* Was locked in panic */
+ my_bool append_insert_at_end; /* Set if concurrent insert */
+ my_bool quick_mode;
+ /* Marker if key_del_changed */
+ /* If info->keyread_buff can't be used for rnext */
+ my_bool page_changed;
+ /* If info->keyread_buff has to be re-read for rnext */
+ my_bool keyread_buff_used;
+ my_bool once_flags; /* For MARIA_MRG */
+#ifdef __WIN__
+ my_bool owned_by_merge; /* This Maria table is part of a merge union */
+#endif
+#ifdef THREAD
+ THR_LOCK_DATA lock;
+#endif
+ uchar *maria_rtree_recursion_state; /* For RTREE */
+ uchar length_buff[5]; /* temp buff to store blob lengths */
+ int maria_rtree_recursion_depth;
+};
+
+/* Some defines used by maria-functions */
+
+#define USE_WHOLE_KEY 65535 /* Use whole key in _search() */
+#define F_EXTRA_LCK -1
+
+/* bits in opt_flag */
+#define MEMMAP_USED 32
+#define REMEMBER_OLD_POS 64
+
+#define WRITEINFO_UPDATE_KEYFILE 1
+#define WRITEINFO_NO_UNLOCK 2
+
+/* once_flags */
+#define USE_PACKED_KEYS 1
+#define RRND_PRESERVE_LASTINX 2
+
+/* bits in state.changed */
+
+#define STATE_CHANGED 1
+#define STATE_CRASHED 2
+#define STATE_CRASHED_ON_REPAIR 4
+#define STATE_NOT_ANALYZED 8
+#define STATE_NOT_OPTIMIZED_KEYS 16
+#define STATE_NOT_SORTED_PAGES 32
+#define STATE_NOT_OPTIMIZED_ROWS 64
+
+/* options to maria_read_cache */
+
+#define READING_NEXT 1
+#define READING_HEADER 2
+
+/* Number of bytes on key pages to indicate used size */
+#define KEYPAGE_USED_SIZE 2
+#define KEYPAGE_KEYID_SIZE 1
+#define KEYPAGE_FLAG_SIZE 1
+#define KEYPAGE_CHECKSUM_SIZE 4
+#define MAX_KEYPAGE_HEADER_SIZE (LSN_STORE_SIZE + KEYPAGE_USED_SIZE + \
+ KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE + \
+ TRANSID_SIZE)
+#define KEYPAGE_FLAG_ISNOD 1
+
+#define _ma_get_page_used(share,x) \
+ ((uint) mi_uint2korr((x) + (share)->keypage_header - KEYPAGE_USED_SIZE))
+#define _ma_store_page_used(share,x,y) \
+ mi_int2store((x) + (share)->keypage_header - KEYPAGE_USED_SIZE, (y))
+#define _ma_test_if_nod(share,x) \
+ ((_ma_get_keypage_flag(share,x) & KEYPAGE_FLAG_ISNOD) ? (share)->base.key_reflength : 0)
+
+#define _ma_get_used_and_nod(share,buff,length,nod) \
+{ \
+ nod= _ma_test_if_nod((share),(buff)); \
+ length= _ma_get_page_used((share),(buff)); \
+}
+#define _ma_store_keynr(share, x, nr) x[(share)->keypage_header - KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE - KEYPAGE_USED_SIZE]= (nr)
+#define _ma_get_keynr(share, x) ((uchar) x[(share)->keypage_header - KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE - KEYPAGE_USED_SIZE])
+#define _ma_store_transid(buff, transid) \
+ int6store((buff) + LSN_STORE_SIZE, (transid))
+#define _ma_korr_transid(buff) \
+ uint6korr((buff) + LSN_STORE_SIZE)
+#define _ma_get_keypage_flag(share,x) x[(share)->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_FLAG_SIZE]
+#define _ma_store_keypage_flag(share,x,flag) x[(share)->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_FLAG_SIZE]= (flag)
+
+
+/*
+ TODO: write int4store_aligned as *((uint32 *) (T))= (uint32) (A) for
+ architectures where it is possible
+*/
+#define int4store_aligned(A,B) int4store((A),(B))
+
+#define maria_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \
+ DBUG_PRINT("error", ("Marked table crashed")); \
+ }while(0)
+#define maria_mark_crashed_share(x) \
+ do{(x)->state.changed|= STATE_CRASHED; \
+ DBUG_PRINT("error", ("Marked table crashed")); \
+ }while(0)
+#define maria_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \
+ STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \
+ (x)->update|= HA_STATE_CHANGED; \
+ DBUG_PRINT("error", \
+ ("Marked table crashed")); \
+ }while(0)
+#define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED)
+#define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR)
+#ifdef EXTRA_DEBUG
+#define maria_print_error(SHARE, ERRNO) \
+ _ma_report_error((ERRNO), (SHARE)->index_file_name)
+#else
+#define maria_print_error(SHARE, ERRNO) while (0)
+#endif
+
+
+/* Functions to store length of space packed keys, VARCHAR or BLOB keys */
+
+#define store_key_length(key,length) \
+{ if ((length) < 255) \
+ { *(key)=(length); } \
+ else \
+ { *(key)=255; mi_int2store((key)+1,(length)); } \
+}
+
+#define get_key_full_length(length,key) \
+ { if (*(uchar*) (key) != 255) \
+ length= ((uint) *(uchar*) ((key)++))+1; \
+ else \
+ { length=mi_uint2korr((key)+1)+3; (key)+=3; } \
+}
+
+#define get_key_full_length_rdonly(length,key) \
+{ if (*(uchar*) (key) != 255) \
+ length= ((uint) *(uchar*) ((key)))+1; \
+ else \
+ { length=mi_uint2korr((key)+1)+3; } \
+}
+
+#define maria_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/2 - MARIA_INDEX_OVERHEAD_SIZE)
+#define get_pack_length(length) ((length) >= 255 ? 3 : 1)
+
+#define MARIA_MIN_BLOCK_LENGTH 20 /* Because of delete-link */
+/* Don't use to small record-blocks */
+#define MARIA_EXTEND_BLOCK_LENGTH 20
+#define MARIA_SPLIT_LENGTH ((MARIA_EXTEND_BLOCK_LENGTH+4)*2)
+ /* Max prefix of record-block */
+#define MARIA_MAX_DYN_BLOCK_HEADER 20
+#define MARIA_BLOCK_INFO_HEADER_LENGTH 20
+#define MARIA_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
+#define MARIA_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
+#define MARIA_DYN_MAX_ROW_LENGTH (MARIA_DYN_MAX_BLOCK_LENGTH - MARIA_SPLIT_LENGTH)
+#define MARIA_DYN_ALIGN_SIZE 4 /* Align blocks on this */
+#define MARIA_MAX_DYN_HEADER_BYTE 13 /* max header uchar for dynamic rows */
+#define MARIA_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1)))
+#define MARIA_REC_BUFF_OFFSET ALIGN_SIZE(MARIA_DYN_DELETE_BLOCK_HEADER+sizeof(uint32))
+
+#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
+
+#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
+#define PACK_TYPE_SPACE_FIELDS 2
+#define PACK_TYPE_ZERO_FILL 4
+#define MARIA_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */
+
+#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size))
+#define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
+#define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
+
+#define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
+#define MARIA_MIN_ROWS_TO_USE_BULK_INSERT 100
+#define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100
+#define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10
+
+/* Marker for impossible delete link */
+#define IMPOSSIBLE_PAGE_NO LL(0xFFFFFFFFFF)
+
+/* The UNIQUE check is done with a hashed long key */
+
+#define MARIA_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
+#define maria_unique_store(A,B) mi_int4store((A),(B))
+
+#ifdef THREAD
+extern pthread_mutex_t THR_LOCK_maria;
+#endif
+#if !defined(THREAD) || defined(DONT_USE_RW_LOCKS)
+#define rw_wrlock(A) {}
+#define rw_rdlock(A) {}
+#define rw_unlock(A) {}
+#endif
+
+
+/* Some extern variables */
+extern LIST *maria_open_list;
+extern uchar maria_file_magic[], maria_pack_file_magic[];
+extern uchar maria_uuid[MY_UUID_SIZE];
+extern uint maria_read_vec[], maria_readnext_vec[];
+extern uint maria_quick_table_bits;
+extern const char *maria_data_root;
+extern uchar maria_zero_string[];
+extern my_bool maria_inited;
+
+
+/* This is used by _ma_calc_xxx_key_length och _ma_store_key */
+typedef struct st_maria_s_param
+{
+ const uchar *key;
+ uchar *prev_key, *next_key_pos;
+ uchar *key_pos; /* For balance page */
+ uint ref_length, key_length, n_ref_length;
+ uint n_length, totlength, part_of_prev_key, prev_length, pack_marker;
+ uint changed_length;
+ int move_length; /* For balance_page */
+ bool store_not_null;
+} MARIA_KEY_PARAM;
+
+
+/* Used to store reference to pinned page */
+typedef struct st_pinned_page
+{
+ PAGECACHE_BLOCK_LINK *link;
+ enum pagecache_page_lock unlock, write_lock;
+ my_bool changed;
+} MARIA_PINNED_PAGE;
+
+
+/* Prototypes for intern functions */
+extern int _ma_read_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS);
+extern int _ma_read_rnd_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
+ my_bool);
+extern my_bool _ma_write_dynamic_record(MARIA_HA *, const uchar *);
+extern my_bool _ma_update_dynamic_record(MARIA_HA *, MARIA_RECORD_POS,
+ const uchar *, const uchar *);
+extern my_bool _ma_delete_dynamic_record(MARIA_HA *info, const uchar *record);
+extern my_bool _ma_cmp_dynamic_record(MARIA_HA *info, const uchar *record);
+extern my_bool _ma_write_blob_record(MARIA_HA *, const uchar *);
+extern my_bool _ma_update_blob_record(MARIA_HA *, MARIA_RECORD_POS,
+ const uchar *, const uchar *);
+extern int _ma_read_static_record(MARIA_HA *info, uchar *, MARIA_RECORD_POS);
+extern int _ma_read_rnd_static_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
+ my_bool);
+extern my_bool _ma_write_static_record(MARIA_HA *, const uchar *);
+extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS,
+ const uchar *, const uchar *);
+extern my_bool _ma_delete_static_record(MARIA_HA *info, const uchar *record);
+extern my_bool _ma_cmp_static_record(MARIA_HA *info, const uchar *record);
+extern int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key,
+ uint length);
+extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ const uchar *key, MARIA_RECORD_POS *root);
+extern int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *key, uchar *anc_buff, uchar *key_pos,
+ my_off_t anc_page, uchar *key_buff, my_off_t father_page,
+ uchar *father_buff, MARIA_PINNED_PAGE *father_page_link,
+ uchar *father_key_pos, my_bool insert_last);
+extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ MARIA_RECORD_POS *root, uint comp_flag);
+extern int _ma_split_page(register MARIA_HA *info,
+ register MARIA_KEYDEF *keyinfo,
+ uchar *key, my_off_t split_page, uchar *split_buff,
+ uint org_split_length,
+ uchar *inserted_key_pos, uint changed_length,
+ int move_length,
+ uchar *key_buff, my_bool insert_last_key);
+extern uchar *_ma_find_half_pos(MARIA_HA *info, uint nod_flag,
+ MARIA_KEYDEF *keyinfo,
+ uchar *page, uchar *key,
+ uint *return_key_length,
+ uchar ** after_key);
+extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *key_buff, const uchar *key,
+ MARIA_KEY_PARAM *s_temp);
+extern int _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *key_buff, const uchar *key,
+ MARIA_KEY_PARAM *s_temp);
+extern int _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo,
+ uint nod_flag, uchar *key_pos,
+ uchar *org_key, uchar *prev_key,
+ const uchar *key,
+ MARIA_KEY_PARAM *s_temp);
+extern int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo,
+ uint nod_flag, uchar *key_pos,
+ uchar *org_key, uchar *prev_key,
+ const uchar *key,
+ MARIA_KEY_PARAM *s_temp);
+extern void _ma_store_static_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+extern void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+#ifdef NOT_USED
+extern void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+#endif
+extern void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+
+extern int _ma_ck_delete(MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length);
+extern int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ my_off_t *root);
+extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer);
+extern int _ma_writeinfo(MARIA_HA *info, uint options);
+extern int _ma_test_if_changed(MARIA_HA *info);
+extern int _ma_mark_file_changed(MARIA_HA *info);
+extern int _ma_decrement_open_count(MARIA_HA *info);
+extern int _ma_check_index(MARIA_HA *info, int inx);
+extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
+ uint key_len, uint nextflag, my_off_t pos);
+extern int _ma_bin_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint comp_flag, uchar **ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern int _ma_seq_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint comp_flag, uchar ** ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern int _ma_prefix_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint comp_flag, uchar ** ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern my_off_t _ma_kpos(uint nod_flag, uchar *after_key);
+extern void _ma_kpointer(MARIA_HA *info, uchar *buff, my_off_t pos);
+extern MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag,
+ const uchar *after_key);
+extern MARIA_RECORD_POS _ma_rec_pos(MARIA_HA *info, uchar *ptr);
+extern void _ma_dpointer(MARIA_HA *info, uchar *buff, MARIA_RECORD_POS pos);
+extern uint _ma_get_static_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar **page, uchar *key);
+extern uint _ma_get_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar **page, uchar *key);
+extern uint _ma_get_binary_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ uchar ** page_pos, uchar *key);
+extern uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *keypos, uchar *lastkey,
+ uchar *endpos, uint *return_key_length);
+extern uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *page, uchar *key, uchar *keypos,
+ uint *return_key_length);
+extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, const uchar *key);
+extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const uchar *key,
+ HA_KEYSEG *end);
+extern uchar *_ma_move_key(MARIA_KEYDEF *keyinfo, uchar *to, const uchar *from);
+extern int _ma_search_next(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length, uint nextflag,
+ my_off_t pos);
+extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t pos);
+extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t pos);
+extern my_off_t _ma_static_keypos_to_recpos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_static_recpos_to_keypos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_transparent_recpos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_transaction_keypos_to_recpos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_transaction_recpos_to_keypos(MARIA_HA *info, my_off_t pos);
+
+extern uchar *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff, int return_buffer,
+ MARIA_PINNED_PAGE **page_link);
+extern int _ma_write_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff);
+extern int _ma_dispose(MARIA_HA *info, my_off_t pos, my_bool page_not_read);
+extern my_off_t _ma_new(register MARIA_HA *info, int level,
+ MARIA_PINNED_PAGE **page_link);
+extern uint _ma_make_key(MARIA_HA *info, uint keynr, uchar *key,
+ const uchar *record, MARIA_RECORD_POS filepos);
+extern uint _ma_pack_key(MARIA_HA *info, uint keynr, uchar *key,
+ const uchar *old, key_part_map keypart_map,
+ HA_KEYSEG ** last_used_keyseg);
+extern int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS);
+extern int _ma_read_cache(IO_CACHE *info, uchar *buff, MARIA_RECORD_POS pos,
+ uint length, int re_read_if_possibly);
+extern ulonglong ma_retrieve_auto_increment(const uchar *key, uint8 key_type);
+extern my_bool _ma_alloc_buffer(uchar **old_addr, size_t *old_size,
+ size_t new_size);
+extern ulong _ma_rec_unpack(MARIA_HA *info, uchar *to, uchar *from,
+ ulong reclength);
+extern my_bool _ma_rec_check(MARIA_HA *info, const uchar *record,
+ uchar *packpos, ulong packed_length,
+ my_bool with_checkum, ha_checksum checksum);
+extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos,
+ ulong length, my_off_t next_filepos,
+ uchar ** record, ulong *reclength,
+ int *flag);
+extern void _ma_print_key(FILE *stream, HA_KEYSEG *keyseg,
+ const uchar *key, uint length);
+extern my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile);
+extern my_bool _ma_once_end_pack_row(MARIA_SHARE *share);
+extern int _ma_read_pack_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos);
+extern int _ma_read_rnd_pack_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
+ my_bool);
+extern int _ma_pack_rec_unpack(MARIA_HA *info, MARIA_BIT_BUFF *bit_buff,
+ uchar *to, uchar *from, ulong reclength);
+extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b);
+extern int _ma_ft_update(MARIA_HA *info, uint keynr, uchar *keybuf,
+ const uchar *oldrec, const uchar *newrec,
+ my_off_t pos);
+
+/*
+ Parameter to _ma_get_block_info
+ The dynamic row header is read into this struct. For an explanation of
+ the fields, look at the function _ma_get_block_info().
+*/
+
+typedef struct st_maria_block_info
+{
+ uchar header[MARIA_BLOCK_INFO_HEADER_LENGTH];
+ ulong rec_len;
+ ulong data_len;
+ ulong block_len;
+ ulong blob_len;
+ MARIA_RECORD_POS filepos;
+ MARIA_RECORD_POS next_filepos;
+ MARIA_RECORD_POS prev_filepos;
+ uint second_read;
+ uint offset;
+} MARIA_BLOCK_INFO;
+
+
+/* bits in return from _ma_get_block_info */
+
+#define BLOCK_FIRST 1
+#define BLOCK_LAST 2
+#define BLOCK_DELETED 4
+#define BLOCK_ERROR 8 /* Wrong data */
+#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
+#define BLOCK_FATAL_ERROR 32 /* hardware-error */
+
+#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
+#define MAXERR 20
+#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */
+#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
+#define INDEX_TMP_EXT ".TMM"
+#define DATA_TMP_EXT ".TMD"
+
+#define UPDATE_TIME 1
+#define UPDATE_STAT 2
+#define UPDATE_SORT 4
+#define UPDATE_AUTO_INC 8
+#define UPDATE_OPEN_COUNT 16
+
+#define USE_BUFFER_INIT (((1024L*1024L*10-MALLOC_OVERHEAD)/8192)*8192)
+#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD)
+#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
+#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
+
+#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0)
+#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1)
+
+extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t);
+extern uint _ma_rec_pack(MARIA_HA *info, uchar *to, const uchar *from);
+extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info, uchar **rec_buff_p,
+ size_t *rec_buff_size,
+ File file, my_off_t filepos);
+extern void _ma_store_blob_length(uchar *pos, uint pack_length, uint length);
+extern void _ma_report_error(int errcode, const char *file_name);
+extern my_bool _ma_memmap_file(MARIA_HA *info);
+extern void _ma_unmap_file(MARIA_HA *info);
+extern uint _ma_save_pack_length(uint version, uchar * block_buff,
+ ulong length);
+extern uint _ma_calc_pack_length(uint version, ulong length);
+extern ulong _ma_calc_blob_length(uint length, const uchar *pos);
+extern size_t _ma_mmap_pread(MARIA_HA *info, uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags);
+extern size_t _ma_mmap_pwrite(MARIA_HA *info, const uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags);
+extern size_t _ma_nommap_pread(MARIA_HA *info, uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags);
+extern size_t _ma_nommap_pwrite(MARIA_HA *info, const uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags);
+
+uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite);
+uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite);
+uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state);
+uint _ma_base_info_write(File file, MARIA_BASE_INFO *base);
+my_bool _ma_keyseg_write(File file, const HA_KEYSEG *keyseg);
+uchar *_ma_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg);
+my_bool _ma_keydef_write(File file, MARIA_KEYDEF *keydef);
+uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef);
+my_bool _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *keydef);
+uchar *_ma_uniquedef_read(uchar *ptr, MARIA_UNIQUEDEF *keydef);
+my_bool _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef);
+uchar *_ma_columndef_read(uchar *ptr, MARIA_COLUMNDEF *columndef);
+my_bool _ma_column_nr_write(File file, uint16 *offsets, uint columns);
+uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns);
+ulong _ma_calc_total_blob_length(MARIA_HA *info, const uchar *record);
+ha_checksum _ma_checksum(MARIA_HA *info, const uchar *buf);
+ha_checksum _ma_static_checksum(MARIA_HA *info, const uchar *buf);
+my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ uchar *record, ha_checksum unique_hash,
+ MARIA_RECORD_POS pos);
+ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *buf);
+my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const uchar *record, MARIA_RECORD_POS pos);
+my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const uchar *record, MARIA_RECORD_POS pos);
+my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const uchar *a, const uchar *b,
+ my_bool null_are_equal);
+void _ma_get_status(void *param, int concurrent_insert);
+void _ma_update_status(void *param);
+void _ma_restore_status(void *param);
+void _ma_copy_status(void *to, void *from);
+my_bool _ma_check_status(void *param);
+void _ma_reset_status(MARIA_HA *maria);
+int _ma_def_scan_remember_pos(MARIA_HA *info, MARIA_RECORD_POS *lastpos);
+void _ma_def_scan_restore_pos(MARIA_HA *info, MARIA_RECORD_POS lastpos);
+
+#include "ma_commit.h"
+
+extern MARIA_HA *_ma_test_if_reopen(const char *filename);
+my_bool _ma_check_table_is_closed(const char *name, const char *where);
+int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup);
+int _ma_open_keyfile(MARIA_SHARE *share);
+void _ma_setup_functions(register MARIA_SHARE *share);
+my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size);
+void _ma_remap_file(MARIA_HA *info, my_off_t size);
+
+MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const uchar *record);
+my_bool _ma_write_abort_default(MARIA_HA *info);
+
+C_MODE_START
+#define MARIA_FLUSH_DATA 1
+#define MARIA_FLUSH_INDEX 2
+int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
+ enum flush_type flush_type_for_data,
+ enum flush_type flush_type_for_index);
+/*
+ Functions needed by _ma_check (are overridden in MySQL/ha_maria.cc).
+ See ma_check_standalone.h .
+*/
+volatile int *_ma_killed_ptr(HA_CHECK *param);
+void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...))
+ ATTRIBUTE_FORMAT(printf, 2, 3);
+void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...))
+ ATTRIBUTE_FORMAT(printf, 2, 3);
+void _ma_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...))
+ ATTRIBUTE_FORMAT(printf, 2, 3);
+C_MODE_END
+
+int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param);
+int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param);
+int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param);
+#ifdef THREAD
+pthread_handler_t _ma_thr_find_all_keys(void *arg);
+#endif
+int _ma_flush_table_files_after_repair(HA_CHECK *param, MARIA_HA *info);
+
+int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param);
+int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
+ size_t);
+int _ma_sync_table_files(const MARIA_HA *info);
+int _ma_initialize_data_file(MARIA_SHARE *share, File dfile);
+int _ma_update_create_rename_lsn(MARIA_SHARE *share,
+ LSN lsn, my_bool do_sync);
+int _ma_update_create_rename_lsn_sub(MARIA_SHARE *share,
+ LSN lsn, my_bool do_sync);
+
+void _ma_tmp_disable_logging_for_table(MARIA_HA *info,
+ my_bool log_incomplete);
+#define _ma_reenable_logging_for_table(S) \
+ { if (((S)->now_transactional= (S)->base.born_transactional)) \
+ (S)->page_type= PAGECACHE_LSN_PAGE; }
+
+#define MARIA_NO_CRC_NORMAL_PAGE 0xffffffff
+#define MARIA_NO_CRC_BITMAP_PAGE 0xfffffffe
+extern my_bool maria_page_crc_set_index(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_crc_set_normal(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_crc_check_bitmap(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_crc_check_data(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_crc_check_index(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_crc_check_none(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_filler_set_bitmap(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_filler_set_normal(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern my_bool maria_page_filler_set_none(uchar *page,
+ pgcache_page_no_t page_no,
+ uchar *data_ptr);
+extern void maria_page_write_failure(uchar* data_ptr);
+
+extern PAGECACHE *maria_log_pagecache;
diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c
new file mode 100644
index 00000000000..d691cbe0c3b
--- /dev/null
+++ b/storage/maria/maria_ftdump.c
@@ -0,0 +1,279 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code
+ added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
+
+#include "ma_ftdefs.h"
+#include <my_getopt.h>
+
+static void usage();
+static void complain(int val);
+static my_bool get_one_option(int, const struct my_option *, char *);
+
+static int count=0, stats=0, dump=0, lstats=0;
+static my_bool verbose;
+static char *query=NULL;
+static uint lengths[256];
+
+#define MAX_LEN (HA_FT_MAXBYTELEN+10)
+#define HOW_OFTEN_TO_WRITE 10000
+
+static struct my_option my_long_options[] =
+{
+ {"help", 'h', "Display help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"help", '?', "Synonym for -h.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"count", 'c', "Calculate per-word stats (counts and global weights).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"dump", 'd', "Dump index (incl. data offsets and word weights).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"length", 'l', "Report length distribution.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"stats", 's', "Report global stats.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Be verbose.",
+ (uchar**) &verbose, (uchar**) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+int main(int argc,char *argv[])
+{
+ int error=0, subkeys;
+ uint keylen, keylen2=0, inx, doc_cnt=0;
+ float weight= 1.0;
+ double gws, min_gws=0, avg_gws=0;
+ MARIA_HA *info;
+ char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
+ ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
+ struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
+
+ MY_INIT(argv[0]);
+ if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(error);
+ maria_init();
+ if (count || dump)
+ verbose=0;
+ if (!count && !dump && !lstats && !query)
+ stats=1;
+
+ if (verbose)
+ setbuf(stdout,NULL);
+
+ if (argc < 2)
+ usage();
+
+ {
+ char *end;
+ inx= (uint) strtoll(argv[1], &end, 10);
+ if (*end)
+ usage();
+ }
+
+ init_pagecache(maria_pagecache, USE_BUFFER_INIT, 0, 0,
+ MARIA_KEY_BLOCK_LENGTH, MY_WME);
+
+ if (!(info=maria_open(argv[0], O_RDONLY,
+ HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
+ {
+ error=my_errno;
+ goto err;
+ }
+
+ *buf2=0;
+ aio->info=info;
+
+ if ((inx >= info->s->base.keys) ||
+ !(info->s->keyinfo[inx].flag & HA_FULLTEXT))
+ {
+ printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->s->open_file_name);
+ goto err;
+ }
+
+ maria_lock_database(info, F_EXTRA_LCK);
+
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->update|= HA_STATE_PREV_FOUND;
+
+ while (!(error=maria_rnext(info,NULL,inx)))
+ {
+ keylen=*(info->lastkey);
+
+ subkeys=ft_sintXkorr(info->lastkey+keylen+1);
+ if (subkeys >= 0)
+ weight=*(float*)&subkeys;
+
+#ifdef HAVE_SNPRINTF
+ snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
+#else
+ sprintf(buf,"%.*s",(int) keylen,info->lastkey+1);
+#endif
+ my_casedn_str(default_charset_info,buf);
+ total++;
+ lengths[keylen]++;
+
+ if (count || stats)
+ {
+ if (strcmp(buf, buf2))
+ {
+ if (*buf2)
+ {
+ uniq++;
+ avg_gws+=gws=GWS_IN_USE;
+ if (count)
+ printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
+ if (maxlen<keylen2)
+ {
+ maxlen=keylen2;
+ strmov(buf_maxlen, buf2);
+ }
+ if (max_doc_cnt < doc_cnt)
+ {
+ max_doc_cnt=doc_cnt;
+ strmov(buf_min_gws, buf2);
+ min_gws=gws;
+ }
+ }
+ strmov(buf2, buf);
+ keylen2=keylen;
+ doc_cnt=0;
+ }
+ doc_cnt+= (subkeys >= 0 ? 1 : -subkeys);
+ }
+ if (dump)
+ {
+ if (subkeys>=0)
+ printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf);
+ else
+ printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys,buf);
+ }
+ if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
+ printf("%10ld\r",total);
+ }
+ maria_lock_database(info, F_UNLCK);
+
+ if (count || stats)
+ {
+ if (*buf2)
+ {
+ uniq++;
+ avg_gws+=gws=GWS_IN_USE;
+ if (count)
+ printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
+ if (maxlen<keylen2)
+ {
+ maxlen=keylen2;
+ strmov(buf_maxlen, buf2);
+ }
+ if (max_doc_cnt < doc_cnt)
+ {
+ max_doc_cnt=doc_cnt;
+ strmov(buf_min_gws, buf2);
+ min_gws=gws;
+ }
+ }
+ }
+
+ if (stats)
+ {
+ count=0;
+ for (inx=0;inx<256;inx++)
+ {
+ count+=lengths[inx];
+ if ((ulong) count >= total/2)
+ break;
+ }
+ printf("Total rows: %lu\nTotal words: %lu\n"
+ "Unique words: %lu\nLongest word: %lu chars (%s)\n"
+ "Median length: %u\n"
+ "Average global weight: %f\n"
+ "Most common word: %lu times, weight: %f (%s)\n",
+ (long) info->state->records, total, uniq, maxlen, buf_maxlen,
+ inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
+ }
+ if (lstats)
+ {
+ count=0;
+ for (inx=0; inx<256; inx++)
+ {
+ count+=lengths[inx];
+ if (count && lengths[inx])
+ printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
+ (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
+ 100.0*count/total);
+ }
+ }
+
+err:
+ if (error && error != HA_ERR_END_OF_FILE)
+ printf("got error %d\n",my_errno);
+ if (info)
+ maria_close(info);
+ maria_end();
+ return 0;
+}
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument __attribute__((unused)))
+{
+ switch(optid) {
+ case 'd':
+ dump=1;
+ complain(count || query);
+ break;
+ case 's':
+ stats=1;
+ complain(query!=0);
+ break;
+ case 'c':
+ count= 1;
+ complain(dump || query);
+ break;
+ case 'l':
+ lstats=1;
+ complain(query!=0);
+ break;
+ case '?':
+ case 'h':
+ usage();
+ }
+ return 0;
+}
+
+#include <help_start.h>
+
+static void usage()
+{
+ printf("Use: maria_ft_dump <table_name> <index_num>\n");
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+ NETWARE_SET_SCREEN_MODE(1);
+ exit(1);
+}
+
+#include <help_end.h>
+
+static void complain(int val) /* Kinda assert :-) */
+{
+ if (val)
+ {
+ printf("You cannot use these options together!\n");
+ exit(1);
+ }
+}
diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c
new file mode 100644
index 00000000000..dc0afdd4b64
--- /dev/null
+++ b/storage/maria/maria_pack.c
@@ -0,0 +1,3230 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Pack MARIA file */
+
+#ifndef USE_MY_FUNC
+#define USE_MY_FUNC /* We need at least my_malloc */
+#endif
+
+#include "maria_def.h"
+#include <queues.h>
+#include <my_tree.h>
+#include "mysys_err.h"
+#ifdef MSDOS
+#include <io.h>
+#endif
+#ifndef __GNU_LIBRARY__
+#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */
+#endif
+#include <my_getopt.h>
+#include <assert.h>
+
+#if SIZEOF_LONG_LONG > 4
+#define BITS_SAVED 64
+#else
+#define BITS_SAVED 32
+#endif
+
+#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
+#define HEAD_LENGTH 32
+#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */
+
+#define DATA_TMP_EXT ".TMD"
+#define OLD_EXT ".OLD"
+#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
+
+struct st_file_buffer {
+ File file;
+ uchar *buffer,*pos,*end;
+ my_off_t pos_in_file;
+ int bits;
+ ulonglong bitbucket;
+};
+
+struct st_huff_tree;
+struct st_huff_element;
+
+typedef struct st_huff_counts {
+ uint field_length,max_zero_fill;
+ uint pack_type;
+ uint max_end_space,max_pre_space,length_bits,min_space;
+ ulong max_length;
+ enum en_fieldtype field_type;
+ struct st_huff_tree *tree; /* Tree for field */
+ my_off_t counts[256];
+ my_off_t end_space[8];
+ my_off_t pre_space[8];
+ my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
+ TREE int_tree; /* Tree for detecting distinct column values. */
+ uchar *tree_buff; /* Column values, 'field_length' each. */
+ uchar *tree_pos; /* Points to end of column values in 'tree_buff'. */
+} HUFF_COUNTS;
+
+typedef struct st_huff_element HUFF_ELEMENT;
+
+/*
+ WARNING: It is crucial for the optimizations in calc_packed_length()
+ that 'count' is the first element of 'HUFF_ELEMENT'.
+*/
+struct st_huff_element {
+ my_off_t count;
+ union un_element {
+ struct st_nod {
+ HUFF_ELEMENT *left,*right;
+ } nod;
+ struct st_leaf {
+ HUFF_ELEMENT *null;
+ uint element_nr; /* Number of element */
+ } leaf;
+ } a;
+};
+
+
+typedef struct st_huff_tree {
+ HUFF_ELEMENT *root,*element_buffer;
+ HUFF_COUNTS *counts;
+ uint tree_number;
+ uint elements;
+ my_off_t bytes_packed;
+ uint tree_pack_length;
+ uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
+ ulonglong *code;
+ uchar *code_len;
+} HUFF_TREE;
+
+
+typedef struct st_isam_mrg {
+ MARIA_HA **file,**current,**end;
+ uint free_file;
+ uint count;
+ uint min_pack_length; /* Theese is used by packed data */
+ uint max_pack_length;
+ uint ref_length;
+ uint max_blob_length;
+ my_off_t records;
+ /* true if at least one source file has at least one disabled index */
+ my_bool src_file_has_indexes_disabled;
+} PACK_MRG_INFO;
+
+
+extern int main(int argc,char * *argv);
+static void get_options(int *argc,char ***argv);
+static MARIA_HA *open_isam_file(char *name,int mode);
+static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count);
+static int compress(PACK_MRG_INFO *file,char *join_name);
+static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records);
+static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees,
+ uint trees,
+ HUFF_COUNTS *huff_counts,
+ uint fields);
+static int compare_tree(void* cmp_arg __attribute__((unused)),
+ const uchar *s,const uchar *t);
+static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts);
+static void check_counts(HUFF_COUNTS *huff_counts,uint trees,
+ my_off_t records);
+static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records,
+ uint max_space_length,my_off_t *space_counts,
+ my_off_t tot_space_count,
+ enum en_fieldtype field_type);
+static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees);
+static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts);
+static int compare_huff_elements(void *not_used, uchar *a,uchar *b);
+static int save_counts_in_queue(uchar *key,element_count count,
+ HUFF_TREE *tree);
+static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag);
+static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
+static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
+static void make_traverse_code_tree(HUFF_TREE *huff_tree,
+ HUFF_ELEMENT *element,uint size,
+ ulonglong code);
+static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
+ my_off_t tot_elements,my_off_t filelength);
+static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
+static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees);
+static uint *make_offset_code_tree(HUFF_TREE *huff_tree,
+ HUFF_ELEMENT *element,
+ uint *offset);
+static uint max_bit(uint value);
+static int compress_isam_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts);
+static char *make_new_name(char *new_name,char *old_name);
+static char *make_old_name(char *new_name,char *old_name);
+static void init_file_buffer(File file,pbool read_buffer);
+static int flush_buffer(ulong neaded_length);
+static void end_file_buffer(void);
+static void write_bits(ulonglong value, uint bits);
+static void flush_bits(void);
+static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
+ ha_checksum crc);
+static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length,
+ ha_checksum crc);
+static int mrg_close(PACK_MRG_INFO *mrg);
+static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf);
+static void mrg_reset(PACK_MRG_INFO *mrg);
+#if !defined(DBUG_OFF)
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
+static int fakecmp(my_off_t **count1, my_off_t **count2);
+#endif
+
+
+static int error_on_write=0,test_only=0,verbose=0,silent=0,
+ write_loop=0,force_pack=0, isamchk_neaded=0;
+static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
+static my_bool backup, opt_wait;
+/*
+ tree_buff_length is somewhat arbitrary. The bigger it is the better
+ the chance to win in terms of compression factor. On the other hand,
+ this table becomes part of the compressed file header. And its length
+ is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
+*/
+static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
+static char tmp_dir[FN_REFLEN]={0},*join_table;
+static my_off_t intervall_length;
+static ha_checksum glob_crc;
+static struct st_file_buffer file_buffer;
+static QUEUE queue;
+static HUFF_COUNTS *global_count;
+static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const char *load_default_groups[]= { "mariapack",0 };
+
+ /* The main program */
+
+int main(int argc, char **argv)
+{
+ int error,ok;
+ PACK_MRG_INFO merge;
+ char **default_argv;
+ MY_INIT(argv[0]);
+
+ load_defaults("my",load_default_groups,&argc,&argv);
+ default_argv= argv;
+ get_options(&argc,&argv);
+ maria_init();
+
+ error=ok=isamchk_neaded=0;
+ if (join_table)
+ { /* Join files into one */
+ if (open_isam_files(&merge,argv,(uint) argc) ||
+ compress(&merge,join_table))
+ error=1;
+ }
+ else while (argc--)
+ {
+ MARIA_HA *isam_file;
+ if (!(isam_file=open_isam_file(*argv++,O_RDWR)))
+ error=1;
+ else
+ {
+ merge.file= &isam_file;
+ merge.current=0;
+ merge.free_file=0;
+ merge.count=1;
+ if (compress(&merge,0))
+ error=1;
+ else
+ ok=1;
+ }
+ }
+ if (ok && isamchk_neaded && !silent)
+ puts("Remember to run maria_chk -rq on compressed tables");
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
+ free_defaults(default_argv);
+ maria_end();
+ my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
+ exit(error ? 2 : 0);
+#ifndef _lint
+ return 0; /* No compiler warning */
+#endif
+}
+
+enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE};
+
+static struct my_option my_long_options[] =
+{
+#ifdef __NETWARE__
+ {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"backup", 'b', "Make a backup of the table as table_name.OLD.",
+ (uchar**) &backup, (uchar**) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"character-sets-dir", OPT_CHARSETS_DIR_MP,
+ "Directory where character sets are.", (uchar**) &charsets_dir,
+ (uchar**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.",
+ 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ {"force", 'f',
+ "Force packing of table even if it gets bigger or if tempfile exists.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"join", 'j',
+ "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.",
+ (uchar**) &join_table, (uchar**) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
+ 0, 0, 0},
+ {"help", '?', "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"silent", 's', "Be more silent.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"tmpdir", 'T', "Use temporary directory to store temporary table.",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"test", 't', "Don't pack table, only test packing it.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Output version information and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"wait", 'w', "Wait and retry if table is in use.", (uchar**) &opt_wait,
+ (uchar**) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+#include <help_start.h>
+
+static void print_version(void)
+{
+ VOID(printf("%s Ver 1.0 for %s on %s\n",
+ my_progname, SYSTEM_TYPE, MACHINE_TYPE));
+ NETWARE_SET_SCREEN_MODE(1);
+}
+
+
+static void usage(void)
+{
+ print_version();
+ puts("Copyright (C) 2002 MySQL AB");
+ puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
+ puts("and you are welcome to modify and redistribute it under the GPL license\n");
+
+ puts("Pack a MARIA-table to take much less space.");
+ puts("Keys are not updated, you must run maria_chk -rq on the index (.MAI) file");
+ puts("afterwards to update the keys.");
+ puts("You should give the .MAI file as the filename argument.");
+ puts("To unpack a packed table, run maria_chk -u on the table");
+
+ VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname));
+ my_print_help(my_long_options);
+ print_defaults("my", load_default_groups);
+ my_print_variables(my_long_options);
+}
+
+#include <help_end.h>
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ uint length;
+
+ switch(optid) {
+#ifdef __NETWARE__
+ case OPT_AUTO_CLOSE:
+ setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
+ break;
+#endif
+ case 'f':
+ force_pack= 1;
+ tmpfile_createflag= O_RDWR | O_TRUNC;
+ break;
+ case 's':
+ write_loop= verbose= 0;
+ silent= 1;
+ break;
+ case 't':
+ test_only= 1;
+ /* Avoid to reset 'verbose' if it was already set > 1. */
+ if (! verbose)
+ verbose= 1;
+ break;
+ case 'T':
+ length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
+ if (length != dirname_length(tmp_dir))
+ {
+ tmp_dir[length]=FN_LIBCHAR;
+ tmp_dir[length+1]=0;
+ }
+ break;
+ case 'v':
+ verbose++; /* Allow for selecting the level of verbosity. */
+ silent= 0;
+ break;
+ case '#':
+ DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace");
+ break;
+ case 'V':
+ print_version();
+ exit(0);
+ case 'I':
+ case '?':
+ usage();
+ exit(0);
+ }
+ return 0;
+}
+
+ /* reads options */
+ /* Initiates DEBUG - but no debugging here ! */
+
+static void get_options(int *argc,char ***argv)
+{
+ int ho_error;
+
+ my_progname= argv[0][0];
+ if (isatty(fileno(stdout)))
+ write_loop=1;
+
+ if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ if (!*argc)
+ {
+ usage();
+ exit(1);
+ }
+ if (join_table)
+ {
+ backup=0; /* Not needed */
+ tmp_dir[0]=0;
+ }
+ return;
+}
+
+
+static MARIA_HA *open_isam_file(char *name,int mode)
+{
+ MARIA_HA *isam_file;
+ MARIA_SHARE *share;
+ DBUG_ENTER("open_isam_file");
+
+ if (!(isam_file=maria_open(name,mode,
+ (opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
+ HA_OPEN_ABORT_IF_LOCKED))))
+ {
+ VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno));
+ DBUG_RETURN(0);
+ }
+ share=isam_file->s;
+ if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table)
+ {
+ if (!force_pack)
+ {
+ VOID(fprintf(stderr, "%s is already compressed\n", name));
+ VOID(maria_close(isam_file));
+ DBUG_RETURN(0);
+ }
+ if (verbose)
+ puts("Recompressing already compressed table");
+ share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
+ }
+ if (! force_pack && share->state.state.records != 0 &&
+ (share->state.state.records <= 1 ||
+ share->state.state.data_file_length < 1024))
+ {
+ VOID(fprintf(stderr, "%s is too small to compress\n", name));
+ VOID(maria_close(isam_file));
+ DBUG_RETURN(0);
+ }
+ VOID(maria_lock_database(isam_file,F_WRLCK));
+ DBUG_RETURN(isam_file);
+}
+
+
+static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count)
+{
+ uint i,j;
+ mrg->count=0;
+ mrg->current=0;
+ mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE));
+ mrg->free_file=1;
+ mrg->src_file_has_indexes_disabled= 0;
+ for (i=0; i < count ; i++)
+ {
+ if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY)))
+ goto error;
+
+ mrg->src_file_has_indexes_disabled|=
+ ! maria_is_all_keys_active(mrg->file[i]->s->state.key_map,
+ mrg->file[i]->s->base.keys);
+ }
+ /* Check that files are identical */
+ for (j=0 ; j < count-1 ; j++)
+ {
+ MARIA_COLUMNDEF *m1,*m2,*end;
+ if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength ||
+ mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields)
+ goto diff_file;
+ m1=mrg->file[j]->s->columndef;
+ end=m1+mrg->file[j]->s->base.fields;
+ m2=mrg->file[j+1]->s->columndef;
+ for ( ; m1 != end ; m1++,m2++)
+ {
+ if (m1->type != m2->type || m1->length != m2->length)
+ goto diff_file;
+ }
+ }
+ mrg->count=count;
+ return 0;
+
+ diff_file:
+ VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
+ my_progname, names[j], names[j+1]));
+ error:
+ while (i--)
+ maria_close(mrg->file[i]);
+ my_free((uchar*) mrg->file,MYF(0));
+ return 1;
+}
+
+
+static int compress(PACK_MRG_INFO *mrg,char *result_table)
+{
+ int error;
+ File new_file,join_isam_file;
+ MARIA_HA *isam_file;
+ MARIA_SHARE *share;
+ char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN];
+ uint i,header_length,fields,trees,used_trees;
+ my_off_t old_length,new_length,tot_elements;
+ HUFF_COUNTS *huff_counts;
+ HUFF_TREE *huff_trees;
+ DBUG_ENTER("compress");
+
+ isam_file=mrg->file[0]; /* Take this as an example */
+ share=isam_file->s;
+ new_file=join_isam_file= -1;
+ trees=fields=0;
+ huff_trees=0;
+ huff_counts=0;
+ maria_block_size= isam_file->s->block_size;
+
+ /* Create temporary or join file */
+ if (backup)
+ VOID(fn_format(org_name,isam_file->s->open_file_name,"",MARIA_NAME_DEXT,
+ 2));
+ else
+ VOID(fn_format(org_name,isam_file->s->open_file_name,"",MARIA_NAME_DEXT,
+ 2+4+16));
+
+ if (init_pagecache(maria_pagecache, MARIA_MIN_PAGE_CACHE_SIZE, 0, 0,
+ maria_block_size, MY_WME) == 0)
+ {
+ fprintf(stderr, "Can't initialize page cache\n");
+ goto err;
+ }
+
+ if (!test_only && result_table)
+ {
+ /* Make a new indexfile based on first file in list */
+ uint length;
+ uchar *buff;
+ strmov(org_name,result_table); /* Fix error messages */
+ VOID(fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2));
+ if ((join_isam_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME)))
+ < 0)
+ goto err;
+ length=(uint) share->base.keystart;
+ if (!(buff= (uchar*) my_malloc(length,MYF(MY_WME))))
+ goto err;
+ if (my_pread(share->kfile.file, buff, length, 0L, MYF(MY_WME | MY_NABP)) ||
+ my_write(join_isam_file,buff,length,
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
+ {
+ my_free(buff,MYF(0));
+ goto err;
+ }
+ my_free(buff,MYF(0));
+ VOID(fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2));
+ }
+ else if (!tmp_dir[0])
+ VOID(make_new_name(new_name,org_name));
+ else
+ VOID(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4));
+ if (!test_only &&
+ (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0)
+ goto err;
+
+ /* Start calculating statistics */
+
+ mrg->records=0;
+ for (i=0 ; i < mrg->count ; i++)
+ mrg->records+=mrg->file[i]->s->state.state.records;
+
+ DBUG_PRINT("info", ("Compressing %s: (%lu records)",
+ result_table ? new_name : org_name,
+ (ulong) mrg->records));
+ if (write_loop || verbose)
+ {
+ VOID(printf("Compressing %s: (%lu records)\n",
+ result_table ? new_name : org_name, (ulong) mrg->records));
+ }
+ trees=fields=share->base.fields;
+ huff_counts=init_huff_count(isam_file,mrg->records);
+ QUICK_SAFEMALLOC;
+
+ /*
+ Read the whole data file(s) for statistics.
+ */
+ DBUG_PRINT("info", ("- Calculating statistics"));
+ if (write_loop || verbose)
+ VOID(printf("- Calculating statistics\n"));
+ if (get_statistic(mrg,huff_counts))
+ goto err;
+ NORMAL_SAFEMALLOC;
+ old_length=0;
+ for (i=0; i < mrg->count ; i++)
+ old_length+= (mrg->file[i]->s->state.state.data_file_length -
+ mrg->file[i]->s->state.state.empty);
+
+ /*
+ Create a global priority queue in preparation for making
+ temporary Huffman trees.
+ */
+ if (init_queue(&queue,256,0,0,compare_huff_elements,0))
+ goto err;
+
+ /*
+ Check each column if we should use pre-space-compress, end-space-
+ compress, empty-field-compress or zero-field-compress.
+ */
+ check_counts(huff_counts,fields,mrg->records);
+
+ /*
+ Build a Huffman tree for each column.
+ */
+ huff_trees=make_huff_trees(huff_counts,trees);
+
+ /*
+ If the packed lengths of combined columns is less then the sum of
+ the non-combined columns, then create common Huffman trees for them.
+ We do this only for uchar compressed columns, not for distinct values
+ compressed columns.
+ */
+ if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
+ goto err;
+
+ /*
+ Assign codes to all uchar or column values.
+ */
+ if (make_huff_decode_table(huff_trees,fields))
+ goto err;
+
+ /* Prepare a file buffer. */
+ init_file_buffer(new_file,0);
+
+ /*
+ Reserve space in the target file for the fixed compressed file header.
+ */
+ file_buffer.pos_in_file=HEAD_LENGTH;
+ if (! test_only)
+ VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0)));
+
+ /*
+ Write field infos: field type, pack type, length bits, tree number.
+ */
+ write_field_info(huff_counts,fields,used_trees);
+
+ /*
+ Write decode trees.
+ */
+ if (!(tot_elements=write_huff_tree(huff_trees,trees)))
+ goto err;
+
+ /*
+ Calculate the total length of the compression info header.
+ This includes the fixed compressed file header, the column compression
+ type descriptions, and the decode trees.
+ */
+ header_length=(uint) file_buffer.pos_in_file+
+ (uint) (file_buffer.pos-file_buffer.buffer);
+
+ /*
+ Compress the source file into the target file.
+ */
+ DBUG_PRINT("info", ("- Compressing file"));
+ if (write_loop || verbose)
+ VOID(printf("- Compressing file\n"));
+ error=compress_isam_file(mrg,huff_counts);
+ new_length=file_buffer.pos_in_file;
+ if (!error && !test_only)
+ {
+ uchar buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */
+ bzero(buff,sizeof(buff));
+ error=my_write(file_buffer.file,buff,sizeof(buff),
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
+ }
+
+ /*
+ Write the fixed compressed file header.
+ */
+ if (!error)
+ error=write_header(mrg,header_length,used_trees,tot_elements,
+ new_length);
+
+ /* Flush the file buffer. */
+ end_file_buffer();
+
+ /* Display statistics. */
+ DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld",
+ mrg->min_pack_length, mrg->max_pack_length,
+ (ulong) (mrg->records ? (new_length/mrg->records) : 0)));
+ if (verbose && mrg->records)
+ VOID(printf("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld\n", mrg->min_pack_length,
+ mrg->max_pack_length, (ulong) (new_length/mrg->records)));
+
+ /* Close source and target file. */
+ if (!test_only)
+ {
+ error|=my_close(new_file,MYF(MY_WME));
+ if (!result_table)
+ {
+ error|=my_close(isam_file->dfile.file, MYF(MY_WME));
+ isam_file->dfile.file= -1; /* Tell maria_close file is closed */
+ isam_file->s->bitmap.file.file= -1;
+ }
+ }
+
+ /* Cleanup. */
+ free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
+ if (! test_only && ! error)
+ {
+ if (result_table)
+ {
+ error=save_state_mrg(join_isam_file,mrg,new_length,glob_crc);
+ }
+ else
+ {
+ if (backup)
+ {
+ if (my_rename(org_name,make_old_name(temp_name,
+ isam_file->s->open_file_name),
+ MYF(MY_WME)))
+ error=1;
+ else
+ {
+ if (tmp_dir[0])
+ error=my_copy(new_name,org_name,MYF(MY_WME));
+ else
+ error=my_rename(new_name,org_name,MYF(MY_WME));
+ if (!error)
+ {
+ VOID(my_copystat(temp_name,org_name,MYF(MY_COPYTIME)));
+ if (tmp_dir[0])
+ VOID(my_delete(new_name,MYF(MY_WME)));
+ }
+ }
+ }
+ else
+ {
+ if (tmp_dir[0])
+ {
+ error=my_copy(new_name,org_name,
+ MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME));
+ if (!error)
+ VOID(my_delete(new_name,MYF(MY_WME)));
+ }
+ else
+ error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME));
+ }
+ if (! error)
+ error=save_state(isam_file,mrg,new_length,glob_crc);
+ }
+ }
+ error|=mrg_close(mrg);
+ if (join_isam_file >= 0)
+ error|=my_close(join_isam_file,MYF(MY_WME));
+ if (error)
+ {
+ VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name));
+ VOID(my_delete(new_name,MYF(MY_WME)));
+ DBUG_RETURN(-1);
+ }
+ if (write_loop || verbose)
+ {
+ if (old_length)
+ VOID(printf("%.4g%% \n",
+ (((longlong) (old_length - new_length)) * 100.0 /
+ (longlong) old_length)));
+ else
+ puts("Empty file saved in compressed format");
+ }
+ DBUG_RETURN(0);
+
+ err:
+ end_pagecache(maria_pagecache, 1);
+ free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
+ if (new_file >= 0)
+ VOID(my_close(new_file,MYF(0)));
+ if (join_isam_file >= 0)
+ VOID(my_close(join_isam_file,MYF(0)));
+ mrg_close(mrg);
+ VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name));
+ DBUG_RETURN(-1);
+}
+
+ /* Init a huff_count-struct for each field and init it */
+
+static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records)
+{
+ reg2 uint i;
+ reg1 HUFF_COUNTS *count;
+ if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields*
+ sizeof(HUFF_COUNTS),
+ MYF(MY_ZEROFILL | MY_WME))))
+ {
+ for (i=0 ; i < info->s->base.fields ; i++)
+ {
+ enum en_fieldtype type;
+ count[i].field_length=info->s->columndef[i].length;
+ type= count[i].field_type= (enum en_fieldtype) info->s->columndef[i].type;
+ if (type == FIELD_INTERVALL ||
+ type == FIELD_CONSTANT ||
+ type == FIELD_ZERO)
+ type = FIELD_NORMAL;
+ if (count[i].field_length <= 8 &&
+ (type == FIELD_NORMAL ||
+ type == FIELD_SKIP_ZERO))
+ count[i].max_zero_fill= count[i].field_length;
+ /*
+ For every column initialize a tree, which is used to detect distinct
+ column values. 'int_tree' works together with 'tree_buff' and
+ 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
+ This is accomplished by '-1' as the element size.
+ */
+ init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL,
+ NULL);
+ if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
+ count[i].tree_pos=count[i].tree_buff =
+ my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
+ MYF(MY_WME));
+ }
+ }
+ return count;
+}
+
+
+ /* Free memory used by counts and trees */
+
+static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees,
+ HUFF_COUNTS *huff_counts,
+ uint fields)
+{
+ register uint i;
+
+ if (huff_trees)
+ {
+ for (i=0 ; i < trees ; i++)
+ {
+ if (huff_trees[i].element_buffer)
+ my_free((uchar*) huff_trees[i].element_buffer,MYF(0));
+ if (huff_trees[i].code)
+ my_free((uchar*) huff_trees[i].code,MYF(0));
+ }
+ my_free((uchar*) huff_trees,MYF(0));
+ }
+ if (huff_counts)
+ {
+ for (i=0 ; i < fields ; i++)
+ {
+ if (huff_counts[i].tree_buff)
+ {
+ my_free((uchar*) huff_counts[i].tree_buff,MYF(0));
+ delete_tree(&huff_counts[i].int_tree);
+ }
+ }
+ my_free((uchar*) huff_counts,MYF(0));
+ }
+ delete_queue(&queue); /* This is safe to free */
+ return;
+}
+
+ /* Read through old file and gather some statistics */
+
+static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
+{
+ int error;
+ uint length, null_bytes;
+ ulong reclength,max_blob_length;
+ uchar *record,*pos,*next_pos,*end_pos,*start_pos;
+ ha_rows record_count;
+ HUFF_COUNTS *count,*end_count;
+ TREE_ELEMENT *element;
+ ha_checksum(*calc_checksum)(MARIA_HA *, const uchar *);
+ DBUG_ENTER("get_statistic");
+
+ reclength= mrg->file[0]->s->base.reclength;
+ null_bytes= mrg->file[0]->s->base.null_bytes;
+ record=(uchar*) my_alloca(reclength);
+ end_count=huff_counts+mrg->file[0]->s->base.fields;
+ record_count=0; glob_crc=0;
+ max_blob_length=0;
+
+ /* Check how to calculate checksum */
+ if (mrg->file[0]->s->data_file_type == STATIC_RECORD)
+ calc_checksum= _ma_static_checksum;
+ else
+ calc_checksum= _ma_checksum;
+
+ mrg_reset(mrg);
+ while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
+ {
+ ulong tot_blob_length=0;
+ if (! error)
+ {
+ /* glob_crc is a checksum over all bytes of all records. */
+ glob_crc+= (*calc_checksum)(mrg->file[0],record);
+
+ /* Count the incidence of values separately for every column. */
+ for (pos=record + null_bytes, count=huff_counts ;
+ count < end_count ;
+ count++,
+ pos=next_pos)
+ {
+ next_pos=end_pos=(start_pos=pos)+count->field_length;
+
+ /*
+ Put the whole column value in a tree if there is room for it.
+ 'int_tree' is used to quickly check for duplicate values.
+ 'tree_buff' collects as many distinct column values as
+ possible. If the field length is > 1, it is tree_buff_length,
+ else 2 bytes. Each value is 'field_length' bytes big. If there
+ are more distinct column values than fit into the buffer, we
+ give up with this tree. BLOBs and VARCHARs do not have a
+ tree_buff as it can only be used with fixed length columns.
+ For the special case of field length == 1, we handle only the
+ case that there is only one distinct value in the table(s).
+ Otherwise, we can have a maximum of 256 distinct values. This
+ is then handled by the normal Huffman tree build.
+
+ Another limit for collecting distinct column values is the
+ number of values itself. Since we would need to build a
+ Huffman tree for the values, we are limited by the 'IS_OFFSET'
+ constant. This constant expresses a bit which is used to
+ determine if a tree element holds a final value or an offset
+ to a child element. Hence, all values and offsets need to be
+ smaller than 'IS_OFFSET'. A tree element is implemented with
+ two integer values, one for the left branch and one for the
+ right branch. For the extreme case that the first element
+ points to the last element, the number of integers in the tree
+ must be less or equal to IS_OFFSET. So the number of elements
+ must be less or equal to IS_OFFSET / 2.
+
+ WARNING: At first, we insert a pointer into the record buffer
+ as the key for the tree. If we got a new distinct value, which
+ is really inserted into the tree, instead of being counted
+ only, we will copy the column value from the record buffer to
+ 'tree_buff' and adjust the key pointer of the tree accordingly.
+ */
+ if (count->tree_buff)
+ {
+ global_count=count;
+ if (!(element=tree_insert(&count->int_tree,pos, 0,
+ count->int_tree.custom_arg)) ||
+ (element->count == 1 &&
+ (count->tree_buff + tree_buff_length <
+ count->tree_pos + count->field_length)) ||
+ (count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
+ (count->field_length == 1 &&
+ count->int_tree.elements_in_tree > 1))
+ {
+ delete_tree(&count->int_tree);
+ my_free(count->tree_buff,MYF(0));
+ count->tree_buff=0;
+ }
+ else
+ {
+ /*
+ If tree_insert() succeeds, it either creates a new element
+ or increments the counter of an existing element.
+ */
+ if (element->count == 1)
+ {
+ /* Copy the new column value into 'tree_buff'. */
+ memcpy(count->tree_pos,pos,(size_t) count->field_length);
+ /* Adjust the key pointer in the tree. */
+ tree_set_pointer(element,count->tree_pos);
+ /* Point behind the last column value so far. */
+ count->tree_pos+=count->field_length;
+ }
+ }
+ }
+
+ /* Save character counters and space-counts and zero-field-counts */
+ if (count->field_type == FIELD_NORMAL ||
+ count->field_type == FIELD_SKIP_ENDSPACE)
+ {
+ /* Ignore trailing space. */
+ for ( ; end_pos > pos ; end_pos--)
+ if (end_pos[-1] != ' ')
+ break;
+ /* Empty fields are just counted. Go to the next record. */
+ if (end_pos == pos)
+ {
+ count->empty_fields++;
+ count->max_zero_fill=0;
+ continue;
+ }
+ /*
+ Count the total of all trailing spaces and the number of
+ short trailing spaces. Remember the longest trailing space.
+ */
+ length= (uint) (next_pos-end_pos);
+ count->tot_end_space+=length;
+ if (length < 8)
+ count->end_space[length]++;
+ if (count->max_end_space < length)
+ count->max_end_space = length;
+ }
+
+ if (count->field_type == FIELD_NORMAL ||
+ count->field_type == FIELD_SKIP_PRESPACE)
+ {
+ /* Ignore leading space. */
+ for (pos=start_pos; pos < end_pos ; pos++)
+ if (pos[0] != ' ')
+ break;
+ /* Empty fields are just counted. Go to the next record. */
+ if (end_pos == pos)
+ {
+ count->empty_fields++;
+ count->max_zero_fill=0;
+ continue;
+ }
+ /*
+ Count the total of all leading spaces and the number of
+ short leading spaces. Remember the longest leading space.
+ */
+ length= (uint) (pos-start_pos);
+ count->tot_pre_space+=length;
+ if (length < 8)
+ count->pre_space[length]++;
+ if (count->max_pre_space < length)
+ count->max_pre_space = length;
+ }
+
+ /* Calculate pos, end_pos, and max_length for variable length fields. */
+ if (count->field_type == FIELD_BLOB)
+ {
+ uint field_length=count->field_length -portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(field_length, start_pos);
+ memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*));
+ end_pos=pos+blob_length;
+ tot_blob_length+=blob_length;
+ set_if_bigger(count->max_length,blob_length);
+ }
+ else if (count->field_type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
+ length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
+ uint2korr(start_pos));
+ pos= start_pos+pack_length;
+ end_pos= pos+length;
+ set_if_bigger(count->max_length,length);
+ }
+
+ /* Evaluate 'max_zero_fill' for short fields. */
+ if (count->field_length <= 8 &&
+ (count->field_type == FIELD_NORMAL ||
+ count->field_type == FIELD_SKIP_ZERO))
+ {
+ uint i;
+ /* Zero fields are just counted. Go to the next record. */
+ if (!memcmp((uchar*) start_pos,zero_string,count->field_length))
+ {
+ count->zero_fields++;
+ continue;
+ }
+ /*
+ max_zero_fill starts with field_length. It is decreased every
+ time a shorter "zero trailer" is found. It is set to zero when
+ an empty field is found (see above). This suggests that the
+ variable should be called 'min_zero_fill'.
+ */
+ for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
+ i++) ;
+ if (i < count->max_zero_fill)
+ count->max_zero_fill=i;
+ }
+
+ /* Ignore zero fields and check fields. */
+ if (count->field_type == FIELD_ZERO ||
+ count->field_type == FIELD_CHECK)
+ continue;
+
+ /*
+ Count the incidence of every uchar value in the
+ significant field value.
+ */
+ for ( ; pos < end_pos ; pos++)
+ count->counts[(uchar) *pos]++;
+
+ /* Step to next field. */
+ }
+
+ if (tot_blob_length > max_blob_length)
+ max_blob_length=tot_blob_length;
+ record_count++;
+ if (write_loop && record_count % WRITE_COUNT == 0)
+ {
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
+ }
+ }
+ else if (error != HA_ERR_RECORD_DELETED)
+ {
+ VOID(fprintf(stderr, "Got error %d while reading rows", error));
+ break;
+ }
+
+ /* Step to next record. */
+ }
+ if (write_loop)
+ {
+ VOID(printf(" \r"));
+ VOID(fflush(stdout));
+ }
+
+ /*
+ If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
+ codes.
+ */
+ DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
+
+ DBUG_PRINT("info", ("Found the following number of incidents "
+ "of the uchar codes:"));
+ if (verbose >= 2)
+ VOID(printf("Found the following number of incidents "
+ "of the uchar codes:\n"));
+ for (count= huff_counts ; count < end_count; count++)
+ {
+ uint idx;
+ my_off_t total_count;
+ char llbuf[32];
+
+ DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1)));
+ if (verbose >= 2)
+ VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1)));
+ if (count->tree_buff)
+ {
+ DBUG_PRINT("info", ("number of distinct values: %u",
+ (uint) ((count->tree_pos - count->tree_buff) /
+ count->field_length)));
+ if (verbose >= 2)
+ VOID(printf("number of distinct values: %u\n",
+ (uint) ((count->tree_pos - count->tree_buff) /
+ count->field_length)));
+ }
+ total_count= 0;
+ for (idx= 0; idx < 256; idx++)
+ {
+ if (count->counts[idx])
+ {
+ total_count+= count->counts[idx];
+ DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ if (verbose >= 2)
+ VOID(printf("counts[0x%02x]: %12s\n", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ }
+ }
+ DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
+ llbuf)));
+ if ((verbose >= 2) && total_count)
+ {
+ VOID(printf("total: %12s\n",
+ llstr((longlong) total_count, llbuf)));
+ }
+ }
+
+ mrg->records=record_count;
+ mrg->max_blob_length=max_blob_length;
+ my_afree((uchar*) record);
+ DBUG_RETURN(error != HA_ERR_END_OF_FILE);
+}
+
+static int compare_huff_elements(void *not_used __attribute__((unused)),
+ uchar *a, uchar *b)
+{
+ return *((my_off_t*) a) < *((my_off_t*) b) ? -1 :
+ (*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1);
+}
+
+ /* Check each tree if we should use pre-space-compress, end-space-
+ compress, empty-field-compress or zero-field-compress */
+
+static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
+ my_off_t records)
+{
+ uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count];
+ my_off_t old_length,new_length,length;
+ DBUG_ENTER("check_counts");
+
+ bzero((uchar*) field_count,sizeof(field_count));
+ space_fields=fill_zero_fields=0;
+
+ for (; trees-- ; huff_counts++)
+ {
+ if (huff_counts->field_type == FIELD_BLOB)
+ {
+ huff_counts->length_bits=max_bit(huff_counts->max_length);
+ goto found_pack;
+ }
+ else if (huff_counts->field_type == FIELD_VARCHAR)
+ {
+ huff_counts->length_bits=max_bit(huff_counts->max_length);
+ goto found_pack;
+ }
+ else if (huff_counts->field_type == FIELD_CHECK)
+ {
+ huff_counts->bytes_packed=0;
+ huff_counts->counts[0]=0;
+ goto found_pack;
+ }
+
+ huff_counts->field_type=FIELD_NORMAL;
+ huff_counts->pack_type=0;
+
+ /* Check for zero-filled records (in this column), or zero records. */
+ if (huff_counts->zero_fields || ! records)
+ {
+ my_off_t old_space_count;
+ /*
+ If there are only zero filled records (in this column),
+ or no records at all, we are done.
+ */
+ if (huff_counts->zero_fields == records)
+ {
+ huff_counts->field_type= FIELD_ZERO;
+ huff_counts->bytes_packed=0;
+ huff_counts->counts[0]=0;
+ goto found_pack;
+ }
+ /* Remeber the number of significant spaces. */
+ old_space_count=huff_counts->counts[' '];
+ /* Add all leading and trailing spaces. */
+ huff_counts->counts[' ']+= (huff_counts->tot_end_space +
+ huff_counts->tot_pre_space +
+ huff_counts->empty_fields *
+ huff_counts->field_length);
+ /* Check, what the compressed length of this would be. */
+ old_length=calc_packed_length(huff_counts,0)+records/8;
+ /* Get the number of zero bytes. */
+ length=huff_counts->zero_fields*huff_counts->field_length;
+ /* Add it to the counts. */
+ huff_counts->counts[0]+=length;
+ /* Check, what the compressed length of this would be. */
+ new_length=calc_packed_length(huff_counts,0);
+ /* If the compression without the zeroes would be shorter, we are done. */
+ if (old_length < new_length && huff_counts->field_length > 1)
+ {
+ huff_counts->field_type=FIELD_SKIP_ZERO;
+ huff_counts->counts[0]-=length;
+ huff_counts->bytes_packed=old_length- records/8;
+ goto found_pack;
+ }
+ /* Remove the insignificant spaces, but keep the zeroes. */
+ huff_counts->counts[' ']=old_space_count;
+ }
+ /* Check, what the compressed length of this column would be. */
+ huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
+
+ /*
+ If there are enough empty records (in this column),
+ treating them specially may pay off.
+ */
+ if (huff_counts->empty_fields)
+ {
+ if (huff_counts->field_length > 2 &&
+ huff_counts->empty_fields + (records - huff_counts->empty_fields)*
+ (1+max_bit(max(huff_counts->max_pre_space,
+ huff_counts->max_end_space))) <
+ records * max_bit(huff_counts->field_length))
+ {
+ huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS;
+ }
+ else
+ {
+ length=huff_counts->empty_fields*huff_counts->field_length;
+ if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space)
+ {
+ huff_counts->tot_end_space+=length;
+ huff_counts->max_end_space=huff_counts->field_length;
+ if (huff_counts->field_length < 8)
+ huff_counts->end_space[huff_counts->field_length]+=
+ huff_counts->empty_fields;
+ }
+ if (huff_counts->tot_pre_space)
+ {
+ huff_counts->tot_pre_space+=length;
+ huff_counts->max_pre_space=huff_counts->field_length;
+ if (huff_counts->field_length < 8)
+ huff_counts->pre_space[huff_counts->field_length]+=
+ huff_counts->empty_fields;
+ }
+ }
+ }
+
+ /*
+ If there are enough trailing spaces (in this column),
+ treating them specially may pay off.
+ */
+ if (huff_counts->tot_end_space)
+ {
+ huff_counts->counts[' ']+=huff_counts->tot_pre_space;
+ if (test_space_compress(huff_counts,records,huff_counts->max_end_space,
+ huff_counts->end_space,
+ huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE))
+ goto found_pack;
+ huff_counts->counts[' ']-=huff_counts->tot_pre_space;
+ }
+
+ /*
+ If there are enough leading spaces (in this column),
+ treating them specially may pay off.
+ */
+ if (huff_counts->tot_pre_space)
+ {
+ if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
+ huff_counts->pre_space,
+ huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE))
+ goto found_pack;
+ }
+
+ found_pack: /* Found field-packing */
+
+ /* Test if we can use zero-fill */
+
+ if (huff_counts->max_zero_fill &&
+ (huff_counts->field_type == FIELD_NORMAL ||
+ huff_counts->field_type == FIELD_SKIP_ZERO))
+ {
+ huff_counts->counts[0]-=huff_counts->max_zero_fill*
+ (huff_counts->field_type == FIELD_SKIP_ZERO ?
+ records - huff_counts->zero_fields : records);
+ huff_counts->pack_type|=PACK_TYPE_ZERO_FILL;
+ huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
+ }
+
+ /* Test if intervall-field is better */
+
+ if (huff_counts->tree_buff)
+ {
+ HUFF_TREE tree;
+
+ DBUG_EXECUTE_IF("forceintervall",
+ huff_counts->bytes_packed= ~ (my_off_t) 0;);
+ tree.element_buffer=0;
+ if (!make_huff_tree(&tree,huff_counts) &&
+ tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
+ {
+ if (tree.elements == 1)
+ huff_counts->field_type=FIELD_CONSTANT;
+ else
+ huff_counts->field_type=FIELD_INTERVALL;
+ huff_counts->pack_type=0;
+ }
+ else
+ {
+ my_free((uchar*) huff_counts->tree_buff,MYF(0));
+ delete_tree(&huff_counts->int_tree);
+ huff_counts->tree_buff=0;
+ }
+ if (tree.element_buffer)
+ my_free((uchar*) tree.element_buffer,MYF(0));
+ }
+ if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS)
+ space_fields++;
+ if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL)
+ fill_zero_fields++;
+ field_count[huff_counts->field_type]++;
+ }
+ DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields));
+ DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d",
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
+ if (verbose)
+ VOID(printf("\nnormal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d\n"
+ "pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d\n",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields,
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
+ DBUG_VOID_RETURN;
+}
+
+
+/* Test if we can use space-compression and empty-field-compression */
+
+static int
+test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records,
+ uint max_space_length, my_off_t *space_counts,
+ my_off_t tot_space_count, enum en_fieldtype field_type)
+{
+ int min_pos;
+ uint length_bits,i;
+ my_off_t space_count,min_space_count,min_pack,new_length,skip;
+
+ length_bits=max_bit(max_space_length);
+
+ /* Default no end_space-packing */
+ space_count=huff_counts->counts[(uint) ' '];
+ min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count);
+ min_pack=calc_packed_length(huff_counts,0);
+ min_pos= -2;
+ huff_counts->counts[(uint) ' ']=space_count;
+
+ /* Test with allways space-count */
+ new_length=huff_counts->bytes_packed+length_bits*records/8;
+ if (new_length+1 < min_pack)
+ {
+ min_pos= -1;
+ min_pack=new_length;
+ min_space_count=space_count;
+ }
+ /* Test with length-flag */
+ for (skip=0L, i=0 ; i < 8 ; i++)
+ {
+ if (space_counts[i])
+ {
+ if (i)
+ huff_counts->counts[(uint) ' ']+=space_counts[i];
+ skip+=huff_counts->pre_space[i];
+ new_length=calc_packed_length(huff_counts,0)+
+ (records+(records-skip)*(1+length_bits))/8;
+ if (new_length < min_pack)
+ {
+ min_pos=(int) i;
+ min_pack=new_length;
+ min_space_count=huff_counts->counts[(uint) ' '];
+ }
+ }
+ }
+
+ huff_counts->counts[(uint) ' ']=min_space_count;
+ huff_counts->bytes_packed=min_pack;
+ switch (min_pos) {
+ case -2:
+ return(0); /* No space-compress */
+ case -1: /* Always space-count */
+ huff_counts->field_type=field_type;
+ huff_counts->min_space=0;
+ huff_counts->length_bits=max_bit(max_space_length);
+ break;
+ default:
+ huff_counts->field_type=field_type;
+ huff_counts->min_space=(uint) min_pos;
+ huff_counts->pack_type|=PACK_TYPE_SELECTED;
+ huff_counts->length_bits=max_bit(max_space_length);
+ break;
+ }
+ return(1); /* Using space-compress */
+}
+
+
+ /* Make a huff_tree of each huff_count */
+
+static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
+{
+ uint tree;
+ HUFF_TREE *huff_tree;
+ DBUG_ENTER("make_huff_trees");
+
+ if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE),
+ MYF(MY_WME | MY_ZEROFILL))))
+ DBUG_RETURN(0);
+
+ for (tree=0 ; tree < trees ; tree++)
+ {
+ if (make_huff_tree(huff_tree+tree,huff_counts+tree))
+ {
+ while (tree--)
+ my_free((uchar*) huff_tree[tree].element_buffer,MYF(0));
+ my_free((uchar*) huff_tree,MYF(0));
+ DBUG_RETURN(0);
+ }
+ }
+ DBUG_RETURN(huff_tree);
+}
+
+/*
+ Build a Huffman tree.
+
+ SYNOPSIS
+ make_huff_tree()
+ huff_tree The Huffman tree.
+ huff_counts The counts.
+
+ DESCRIPTION
+ Build a Huffman tree according to huff_counts->counts or
+ huff_counts->tree_buff. tree_buff, if non-NULL contains up to
+ tree_buff_length of distinct column values. In that case, whole
+ values can be Huffman encoded instead of single bytes.
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
+
+static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
+{
+ uint i,found,bits_packed,first,last;
+ my_off_t bytes_packed;
+ HUFF_ELEMENT *a,*b,*new_huff_el;
+
+ first=last=0;
+ if (huff_counts->tree_buff)
+ {
+ /* Calculate the number of distinct values in tree_buff. */
+ found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
+ huff_counts->field_length;
+ first=0; last=found-1;
+ }
+ else
+ {
+ /* Count the number of uchar codes found in the column. */
+ for (i=found=0 ; i < 256 ; i++)
+ {
+ if (huff_counts->counts[i])
+ {
+ if (! found++)
+ first=i;
+ last=i;
+ }
+ }
+ if (found < 2)
+ found=2;
+ }
+
+ /* When using 'tree_buff' we can have more that 256 values. */
+ if (queue.max_elements < found)
+ {
+ delete_queue(&queue);
+ if (init_queue(&queue,found,0,0,compare_huff_elements,0))
+ return -1;
+ }
+
+ /* Allocate or reallocate an element buffer for the Huffman tree. */
+ if (!huff_tree->element_buffer)
+ {
+ if (!(huff_tree->element_buffer=
+ (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME))))
+ return 1;
+ }
+ else
+ {
+ HUFF_ELEMENT *temp;
+ if (!(temp=
+ (HUFF_ELEMENT*) my_realloc((uchar*) huff_tree->element_buffer,
+ found*2*sizeof(HUFF_ELEMENT),
+ MYF(MY_WME))))
+ return 1;
+ huff_tree->element_buffer=temp;
+ }
+
+ huff_counts->tree=huff_tree;
+ huff_tree->counts=huff_counts;
+ huff_tree->min_chr=first;
+ huff_tree->max_chr=last;
+ huff_tree->char_bits=max_bit(last-first);
+ huff_tree->offset_bits=max_bit(found-1)+1;
+
+ if (huff_counts->tree_buff)
+ {
+ huff_tree->elements=0;
+ huff_tree->tree_pack_length=(1+15+16+5+5+
+ (huff_tree->char_bits+1)*found+
+ (huff_tree->offset_bits+1)*
+ (found-2)+7)/8 +
+ (uint) (huff_tree->counts->tree_pos-
+ huff_tree->counts->tree_buff);
+ /*
+ Put a HUFF_ELEMENT into the queue for every distinct column value.
+
+ tree_walk() calls save_counts_in_queue() for every element in
+ 'int_tree'. This takes elements from the target trees element
+ buffer and places references to them into the buffer of the
+ priority queue. We insert in column value order, but the order is
+ in fact irrelevant here. We will establish the correct order
+ later.
+ */
+ tree_walk(&huff_counts->int_tree,
+ (int (*)(void*, element_count,void*)) save_counts_in_queue,
+ (uchar*) huff_tree, left_root_right);
+ }
+ else
+ {
+ huff_tree->elements=found;
+ huff_tree->tree_pack_length=(9+9+5+5+
+ (huff_tree->char_bits+1)*found+
+ (huff_tree->offset_bits+1)*
+ (found-2)+7)/8;
+ /*
+ Put a HUFF_ELEMENT into the queue for every uchar code found in the column.
+
+ The elements are taken from the target trees element buffer.
+ Instead of using queue_insert(), we just place references to the
+ elements into the buffer of the priority queue. We insert in byte
+ value order, but the order is in fact irrelevant here. We will
+ establish the correct order later.
+ */
+ for (i=first, found=0 ; i <= last ; i++)
+ {
+ if (huff_counts->counts[i])
+ {
+ new_huff_el=huff_tree->element_buffer+(found++);
+ new_huff_el->count=huff_counts->counts[i];
+ new_huff_el->a.leaf.null=0;
+ new_huff_el->a.leaf.element_nr=i;
+ queue.root[found]=(uchar*) new_huff_el;
+ }
+ }
+ /*
+ If there is only a single uchar value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which builds the Huffman tree.
+ */
+ while (found < 2)
+ {
+ new_huff_el=huff_tree->element_buffer+(found++);
+ new_huff_el->count=0;
+ new_huff_el->a.leaf.null=0;
+ if (last)
+ new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1;
+ else
+ new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1;
+ queue.root[found]=(uchar*) new_huff_el;
+ }
+ }
+
+ /* Make a queue from the queue buffer. */
+ queue.elements=found;
+
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
+ for (i=found/2 ; i > 0 ; i--)
+ _downheap(&queue,i);
+
+ /* The Huffman algorithm. */
+ bytes_packed=0; bits_packed=0;
+ for (i=1 ; i < found ; i++)
+ {
+ /*
+ Pop the top element from the queue (the one with the least incidence).
+ Popping from a priority queue includes a re-ordering of the queue,
+ to get the next least incidence element to the top.
+ */
+ a=(HUFF_ELEMENT*) queue_remove(&queue,0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
+ b=(HUFF_ELEMENT*) queue.root[1];
+ /* Get a new element from the element buffer. */
+ new_huff_el=huff_tree->element_buffer+found+i;
+ /* The new element gets the sum of the two least incidence elements. */
+ new_huff_el->count=a->count+b->count;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that uchar is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
+ bits_packed+=(uint) (new_huff_el->count & 7);
+ bytes_packed+=new_huff_el->count/8;
+ /* The new element points to its children, lesser in left. */
+ new_huff_el->a.nod.left=a;
+ new_huff_el->a.nod.right=b;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue.
+ */
+ queue.root[1]=(uchar*) new_huff_el;
+ queue_replaced(&queue);
+ }
+ huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
+ huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
+ return 0;
+}
+
+static int compare_tree(void* cmp_arg __attribute__((unused)),
+ register const uchar *s, register const uchar *t)
+{
+ uint length;
+ for (length=global_count->field_length; length-- ;)
+ if (*s++ != *t++)
+ return (int) s[-1] - (int) t[-1];
+ return 0;
+}
+
+/*
+ Organize distinct column values and their incidences into a priority queue.
+
+ SYNOPSIS
+ save_counts_in_queue()
+ key The column value.
+ count The incidence of this value.
+ tree The Huffman tree to be built later.
+
+ DESCRIPTION
+ We use the element buffer of the targeted tree. The distinct column
+ values are organized in a priority queue first. The Huffman
+ algorithm will later organize the elements into a Huffman tree. For
+ the time being, we just place references to the elements into the
+ queue buffer. The buffer will later be organized into a priority
+ queue.
+
+ RETURN
+ 0
+ */
+
+static int save_counts_in_queue(uchar *key, element_count count,
+ HUFF_TREE *tree)
+{
+ HUFF_ELEMENT *new_huff_el;
+
+ new_huff_el=tree->element_buffer+(tree->elements++);
+ new_huff_el->count=count;
+ new_huff_el->a.leaf.null=0;
+ new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) /
+ tree->counts->field_length;
+ queue.root[tree->elements]=(uchar*) new_huff_el;
+ return 0;
+}
+
+
+/*
+ Calculate length of file if given counts should be used.
+
+ SYNOPSIS
+ calc_packed_length()
+ huff_counts The counts for a column of the table(s).
+ add_tree_lenght If the decode tree length should be added.
+
+ DESCRIPTION
+ We need to follow the Huffman algorithm until we know, how many bits
+ are required for each uchar code. But we do not need the resulting
+ Huffman tree. Hence, we can leave out some steps which are essential
+ in make_huff_tree().
+
+ RETURN
+ Number of bytes required to compress this table column.
+*/
+
+static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
+ uint add_tree_lenght)
+{
+ uint i,found,bits_packed,first,last;
+ my_off_t bytes_packed;
+ HUFF_ELEMENT element_buffer[256];
+ DBUG_ENTER("calc_packed_length");
+
+ /*
+ WARNING: We use a small hack for efficiency: Instead of placing
+ references to HUFF_ELEMENTs into the queue, we just insert
+ references to the counts of the uchar codes which appeared in this
+ table column. During the Huffman algorithm they are successively
+ replaced by references to HUFF_ELEMENTs. This works, because
+ HUFF_ELEMENTs have the incidence count at their beginning.
+ Regardless, wether the queue array contains references to counts of
+ type my_off_t or references to HUFF_ELEMENTs which have the count of
+ type my_off_t at their beginning, it always points to a count of the
+ same type.
+
+ Instead of using queue_insert(), we just copy the references into
+ the buffer of the priority queue. We insert in uchar value order, but
+ the order is in fact irrelevant here. We will establish the correct
+ order later.
+ */
+ first=last=0;
+ for (i=found=0 ; i < 256 ; i++)
+ {
+ if (huff_counts->counts[i])
+ {
+ if (! found++)
+ first=i;
+ last=i;
+ /* We start with root[1], which is the queues top element. */
+ queue.root[found]=(uchar*) &huff_counts->counts[i];
+ }
+ }
+ if (!found)
+ DBUG_RETURN(0); /* Empty tree */
+ /*
+ If there is only a single uchar value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which follows the Huffman algorithm.
+ */
+ if (found < 2)
+ queue.root[++found]=(uchar*) &huff_counts->counts[last ? 0 : 1];
+
+ /* Make a queue from the queue buffer. */
+ queue.elements=found;
+
+ bytes_packed=0; bits_packed=0;
+ /* Add the length of the coding table, which would become part of the file. */
+ if (add_tree_lenght)
+ bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
+ (max_bit(found-1)+1+1)*(found-2) +7)/8;
+
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
+ for (i=(found+1)/2 ; i > 0 ; i--)
+ _downheap(&queue,i);
+
+ /* The Huffman algorithm. */
+ for (i=0 ; i < found-1 ; i++)
+ {
+ my_off_t *a;
+ my_off_t *b;
+ HUFF_ELEMENT *new_huff_el;
+
+ /*
+ Pop the top element from the queue (the one with the least
+ incidence). Popping from a priority queue includes a re-ordering
+ of the queue, to get the next least incidence element to the top.
+ */
+ a= (my_off_t*) queue_remove(&queue, 0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
+ b= (my_off_t*) queue.root[1];
+ /* Create a new element in a local (automatic) buffer. */
+ new_huff_el= element_buffer + i;
+ /* The new element gets the sum of the two least incidence elements. */
+ new_huff_el->count= *a + *b;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that uchar is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
+ bits_packed+=(uint) (new_huff_el->count & 7);
+ bytes_packed+=new_huff_el->count/8;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue. This successively replaces the references to counts by
+ references to HUFF_ELEMENTs.
+ */
+ queue.root[1]=(uchar*) new_huff_el;
+ queue_replaced(&queue);
+ }
+ DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
+}
+
+
+ /* Remove trees that don't give any compression */
+
+static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
+{
+ uint k,tree_number;
+ HUFF_COUNTS count,*i,*j,*last_count;
+
+ last_count=huff_counts+trees;
+ for (tree_number=0, i=huff_counts ; i < last_count ; i++)
+ {
+ if (!i->tree->tree_number)
+ {
+ i->tree->tree_number= ++tree_number;
+ if (i->tree_buff)
+ continue; /* Don't join intervall */
+ for (j=i+1 ; j < last_count ; j++)
+ {
+ if (! j->tree->tree_number && ! j->tree_buff)
+ {
+ for (k=0 ; k < 256 ; k++)
+ count.counts[k]=i->counts[k]+j->counts[k];
+ if (calc_packed_length(&count,1) <=
+ i->tree->bytes_packed + j->tree->bytes_packed+
+ i->tree->tree_pack_length+j->tree->tree_pack_length+
+ ALLOWED_JOIN_DIFF)
+ {
+ memcpy_fixed((uchar*) i->counts,(uchar*) count.counts,
+ sizeof(count.counts[0])*256);
+ my_free((uchar*) j->tree->element_buffer,MYF(0));
+ j->tree->element_buffer=0;
+ j->tree=i->tree;
+ bmove((uchar*) i->counts,(uchar*) count.counts,
+ sizeof(count.counts[0])*256);
+ if (make_huff_tree(i->tree,i))
+ return (uint) -1;
+ }
+ }
+ }
+ }
+ }
+ DBUG_PRINT("info", ("Original trees: %d After join: %d",
+ trees, tree_number));
+ if (verbose)
+ VOID(printf("Original trees: %d After join: %d\n", trees, tree_number));
+ return tree_number; /* Return trees left */
+}
+
+
+/*
+ Fill in huff_tree encode tables.
+
+ SYNOPSIS
+ make_huff_decode_table()
+ huff_tree An array of HUFF_TREE which are to be encoded.
+ trees The number of HUFF_TREE in the array.
+
+ RETURN
+ 0 success
+ != 0 error
+*/
+
+static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
+{
+ uint elements;
+ for ( ; trees-- ; huff_tree++)
+ {
+ if (huff_tree->tree_number > 0)
+ {
+ elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
+ if (!(huff_tree->code =
+ (ulonglong*) my_malloc(elements*
+ (sizeof(ulonglong) + sizeof(uchar)),
+ MYF(MY_WME | MY_ZEROFILL))))
+ return 1;
+ huff_tree->code_len=(uchar*) (huff_tree->code+elements);
+ make_traverse_code_tree(huff_tree, huff_tree->root,
+ 8 * sizeof(ulonglong), LL(0));
+ }
+ }
+ return 0;
+}
+
+
+static void make_traverse_code_tree(HUFF_TREE *huff_tree,
+ HUFF_ELEMENT *element,
+ uint size, ulonglong code)
+{
+ uint chr;
+ if (!element->a.leaf.null)
+ {
+ chr=element->a.leaf.element_nr;
+ huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
+ huff_tree->code[chr]= (code >> size);
+ if (huff_tree->height < 8 * sizeof(ulonglong) - size)
+ huff_tree->height= 8 * sizeof(ulonglong) - size;
+ }
+ else
+ {
+ size--;
+ make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
+ make_traverse_code_tree(huff_tree, element->a.nod.right, size,
+ code + (((ulonglong) 1) << size));
+ }
+ return;
+}
+
+
+/*
+ Convert a value into binary digits.
+
+ SYNOPSIS
+ bindigits()
+ value The value.
+ length The number of low order bits to convert.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *bindigits(ulonglong value, uint bits)
+{
+ static char digits[72];
+ char *ptr= digits;
+ uint idx= bits;
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1);
+ *ptr= '\0';
+ return digits;
+}
+
+
+/*
+ Convert a value into hexadecimal digits.
+
+ SYNOPSIS
+ hexdigits()
+ value The value.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *hexdigits(ulonglong value)
+{
+ static char digits[20];
+ char *ptr= digits;
+ uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ {
+ if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9')
+ *(ptr - 1)+= 'a' - '9' - 1;
+ }
+ *ptr= '\0';
+ return digits;
+}
+
+
+ /* Write header to new packed data file */
+
+static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
+ my_off_t tot_elements,my_off_t filelength)
+{
+ uchar *buff= (uchar*) file_buffer.pos;
+
+ bzero(buff,HEAD_LENGTH);
+ memcpy_fixed(buff,maria_pack_file_magic,4);
+ int4store(buff+4,head_length);
+ int4store(buff+8, mrg->min_pack_length);
+ int4store(buff+12,mrg->max_pack_length);
+ int4store(buff+16,tot_elements);
+ int4store(buff+20,intervall_length);
+ int2store(buff+24,trees);
+ buff[26]=(char) mrg->ref_length;
+ /* Save record pointer length */
+ buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2);
+ if (test_only)
+ return 0;
+ VOID(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0)));
+ return my_write(file_buffer.file,(const uchar *) file_buffer.pos,HEAD_LENGTH,
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
+}
+
+ /* Write fieldinfo to new packed file */
+
+static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
+{
+ reg1 uint i;
+ uint huff_tree_bits;
+ huff_tree_bits=max_bit(trees ? trees-1 : 0);
+
+ DBUG_PRINT("info", (" "));
+ DBUG_PRINT("info", ("column types:"));
+ DBUG_PRINT("info", ("FIELD_NORMAL 0"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
+ DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
+ DBUG_PRINT("info", ("FIELD_BLOB 4"));
+ DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
+ DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
+ DBUG_PRINT("info", ("FIELD_ZERO 7"));
+ DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
+ DBUG_PRINT("info", ("FIELD_CHECK 9"));
+ DBUG_PRINT("info", (" "));
+ DBUG_PRINT("info", ("pack type as a set of flags:"));
+ DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
+ DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
+ DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 2)
+ {
+ VOID(printf("\n"));
+ VOID(printf("column types:\n"));
+ VOID(printf("FIELD_NORMAL 0\n"));
+ VOID(printf("FIELD_SKIP_ENDSPACE 1\n"));
+ VOID(printf("FIELD_SKIP_PRESPACE 2\n"));
+ VOID(printf("FIELD_SKIP_ZERO 3\n"));
+ VOID(printf("FIELD_BLOB 4\n"));
+ VOID(printf("FIELD_CONSTANT 5\n"));
+ VOID(printf("FIELD_INTERVALL 6\n"));
+ VOID(printf("FIELD_ZERO 7\n"));
+ VOID(printf("FIELD_VARCHAR 8\n"));
+ VOID(printf("FIELD_CHECK 9\n"));
+ VOID(printf("\n"));
+ VOID(printf("pack type as a set of flags:\n"));
+ VOID(printf("PACK_TYPE_SELECTED 1\n"));
+ VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n"));
+ VOID(printf("PACK_TYPE_ZERO_FILL 4\n"));
+ VOID(printf("\n"));
+ }
+ for (i=0 ; i++ < fields ; counts++)
+ {
+ write_bits((ulonglong) (int) counts->field_type, 5);
+ write_bits(counts->pack_type,6);
+ if (counts->pack_type & PACK_TYPE_ZERO_FILL)
+ write_bits(counts->max_zero_fill,5);
+ else
+ write_bits(counts->length_bits,5);
+ write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
+ DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ i , counts->field_type, counts->pack_type,
+ counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
+ if (verbose >= 2)
+ VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
+ "tree: %2u length: %4u\n", i , counts->field_type,
+ counts->pack_type, counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
+ }
+ flush_bits();
+ return;
+}
+
+ /* Write all huff_trees to new datafile. Return tot count of
+ elements in all trees
+ Returns 0 on error */
+
+static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
+{
+ uint i,int_length;
+ uint tree_no;
+ uint codes;
+ uint errors= 0;
+ uint *packed_tree,*offset,length;
+ my_off_t elements;
+
+ /* Find the highest number of elements in the trees. */
+ for (i=length=0 ; i < trees ; i++)
+ if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
+ length=huff_tree[i].elements;
+ /*
+ Allocate a buffer for packing a decode tree. Two numbers per element
+ (left child and right child).
+ */
+ if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
+ {
+ my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
+ return 0;
+ }
+
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 2)
+ VOID(printf("\n"));
+ tree_no= 0;
+ intervall_length=0;
+ for (elements=0; trees-- ; huff_tree++)
+ {
+ /* Skip columns that have been joined with other columns. */
+ if (huff_tree->tree_number == 0)
+ continue; /* Deleted tree */
+ tree_no++;
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 3)
+ VOID(printf("\n"));
+ /* Count the total number of elements (byte codes or column values). */
+ elements+=huff_tree->elements;
+ huff_tree->max_offset=2;
+ /* Build a tree of offsets and codes for decoding in 'packed_tree'. */
+ if (huff_tree->elements <= 1)
+ offset=packed_tree;
+ else
+ offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
+
+ /* This should be the same as 'length' above. */
+ huff_tree->offset_bits=max_bit(huff_tree->max_offset);
+
+ /*
+ Since we check this during collecting the distinct column values,
+ this should never happen.
+ */
+ if (huff_tree->max_offset >= IS_OFFSET)
+ { /* This should be impossible */
+ VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n",
+ huff_tree->max_offset));
+ my_afree((uchar*) packed_tree);
+ return 0;
+ }
+
+ DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
+ "char_bits: %u\n",
+ (ulong) (file_buffer.pos - file_buffer.buffer),
+ huff_tree->elements, (ulong) (offset - packed_tree),
+ huff_tree->char_bits));
+ if (!huff_tree->counts->tree_buff)
+ {
+ /* We do a uchar compression on this column. Mark with bit 0. */
+ write_bits(0,1);
+ write_bits(huff_tree->min_chr,8);
+ write_bits(huff_tree->elements,9);
+ write_bits(huff_tree->char_bits,5);
+ write_bits(huff_tree->offset_bits,5);
+ int_length=0;
+ }
+ else
+ {
+ int_length=(uint) (huff_tree->counts->tree_pos -
+ huff_tree->counts->tree_buff);
+ /* We have distinct column values for this column. Mark with bit 1. */
+ write_bits(1,1);
+ write_bits(huff_tree->elements,15);
+ write_bits(int_length,16);
+ write_bits(huff_tree->char_bits,5);
+ write_bits(huff_tree->offset_bits,5);
+ intervall_length+=int_length;
+ }
+ DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
+ "offset_bits: %2u %s: %5u codelen: %2u",
+ tree_no, huff_tree->elements, huff_tree->char_bits,
+ huff_tree->offset_bits, huff_tree->counts->tree_buff ?
+ "bufflen" : "min_chr", huff_tree->counts->tree_buff ?
+ int_length : huff_tree->min_chr, huff_tree->height));
+ if (verbose >= 2)
+ VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
+ "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
+ huff_tree->char_bits, huff_tree->offset_bits,
+ huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
+ huff_tree->counts->tree_buff ? int_length :
+ huff_tree->min_chr, huff_tree->height));
+
+ /* Check that the code tree length matches the element count. */
+ length=(uint) (offset-packed_tree);
+ if (length != huff_tree->elements*2-2)
+ {
+ VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
+ length, huff_tree->elements * 2 - 2));
+ errors++;
+ break;
+ }
+
+ for (i=0 ; i < length ; i++)
+ {
+ if (packed_tree[i] & IS_OFFSET)
+ write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits),
+ huff_tree->offset_bits+1);
+ else
+ write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
+ DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
+ i, (packed_tree[i] & IS_OFFSET) ?
+ " -> " : "", (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
+ if (verbose >= 3)
+ VOID(printf("tree[0x%04x]: %s0x%04x\n",
+ i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
+ (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
+ }
+ flush_bits();
+
+ /*
+ Display coding tables and check their correctness.
+ */
+ codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
+ for (i= 0; i < codes; i++)
+ {
+ ulonglong code;
+ uint bits;
+ uint len;
+ uint idx;
+
+ if (! (len= huff_tree->code_len[i]))
+ continue;
+ DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i],
+ huff_tree->code_len[i])));
+ if (verbose >= 3)
+ VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i], huff_tree->code_len[i])));
+
+ /* Check that the encode table decodes correctly. */
+ code= 0;
+ bits= 0;
+ idx= 0;
+ DBUG_EXECUTE_IF("forcechkerr1", len--;);
+ DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
+ DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
+ for (;;)
+ {
+ if (! len)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n",
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i]));
+ errors++;
+ break;
+ }
+ code<<= 1;
+ code|= (huff_tree->code[i] >> (--len)) & 1;
+ bits++;
+ if (bits > 8 * sizeof(code))
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n",
+ bits, (uint) (8 * sizeof(code))));
+ errors++;
+ break;
+ }
+ idx+= (uint) code & 1;
+ if (idx >= length)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n",
+ idx, length));
+ errors++;
+ break;
+ }
+ if (packed_tree[idx] & IS_OFFSET)
+ idx+= packed_tree[idx] & ~IS_OFFSET;
+ else
+ break; /* Hit a leaf. This contains the result value. */
+ }
+ if (errors)
+ break;
+
+ DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
+ if (packed_tree[idx] != i)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
+ packed_tree[idx], i));
+ errors++;
+ break;
+ }
+ } /*end for (codes)*/
+ if (errors)
+ break;
+
+ /* Write column values in case of distinct column value compression. */
+ if (huff_tree->counts->tree_buff)
+ {
+ for (i=0 ; i < int_length ; i++)
+ {
+ write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
+ DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ if (verbose >= 3)
+ VOID(printf("column_values[0x%04x]: 0x%02x\n",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ }
+ }
+ flush_bits();
+ }
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 2)
+ VOID(printf("\n"));
+ my_afree((uchar*) packed_tree);
+ if (errors)
+ {
+ VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n"));
+ return 0;
+ }
+ return elements;
+}
+
+
+static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
+ uint *offset)
+{
+ uint *prev_offset;
+
+ prev_offset= offset;
+ /*
+ 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
+ then there is no left child and, hence no right child either. This
+ is a property of a binary tree. An element is either a node with two
+ childs, or a leaf without childs.
+
+ The current element is always a node with two childs. Go left first.
+ */
+ if (!element->a.nod.left->a.leaf.null)
+ {
+ /* Store the uchar code or the index of the column value. */
+ prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
+ offset+=2;
+ }
+ else
+ {
+ /*
+ Recursively traverse the tree to the left. Mark it as an offset to
+ another tree node (in contrast to a uchar code or column value index).
+ */
+ prev_offset[0]= IS_OFFSET+2;
+ offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
+ }
+
+ /* Now, check the right child. */
+ if (!element->a.nod.right->a.leaf.null)
+ {
+ /* Store the uchar code or the index of the column value. */
+ prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
+ return offset;
+ }
+ else
+ {
+ /*
+ Recursively traverse the tree to the right. Mark it as an offset to
+ another tree node (in contrast to a uchar code or column value index).
+ */
+ uint temp=(uint) (offset-prev_offset-1);
+ prev_offset[1]= IS_OFFSET+ temp;
+ if (huff_tree->max_offset < temp)
+ huff_tree->max_offset = temp;
+ return make_offset_code_tree(huff_tree,element->a.nod.right,offset);
+ }
+}
+
+ /* Get number of bits neaded to represent value */
+
+static uint max_bit(register uint value)
+{
+ reg2 uint power=1;
+
+ while ((value>>=1))
+ power++;
+ return (power);
+}
+
+
+static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
+{
+ int error;
+ uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length;
+ uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes;
+ my_off_t record_count;
+ char llbuf[32];
+ ulong length,pack_length;
+ uchar *record,*pos,*end_pos,*record_pos,*start_pos;
+ HUFF_COUNTS *count,*end_count;
+ HUFF_TREE *tree;
+ MARIA_HA *isam_file=mrg->file[0];
+ uint pack_version= (uint) isam_file->s->pack.version;
+ DBUG_ENTER("compress_isam_file");
+
+ /* Allocate a buffer for the records (excluding blobs). */
+ if (!(record=(uchar*) my_alloca(isam_file->s->base.reclength)))
+ return -1;
+
+ end_count=huff_counts+isam_file->s->base.fields;
+ min_record_length= (uint) ~0;
+ max_record_length=0;
+ null_bytes= isam_file->s->base.null_bytes;
+
+ /*
+ Calculate the maximum number of bits required to pack the records.
+ Remember to understand 'max_zero_fill' as 'min_zero_fill'.
+ The tree height determines the maximum number of bits per value.
+ Some fields skip leading or trailing spaces or zeroes. The skipped
+ number of bytes is encoded by 'length_bits' bits.
+ Empty blobs and varchar are encoded with a single 1 bit. Other blobs
+ and varchar get a leading 0 bit.
+ */
+ max_calc_length= null_bytes;
+ for (i= 0 ; i < isam_file->s->base.fields ; i++)
+ {
+ if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
+ huff_counts[i].max_zero_fill=0;
+ if (huff_counts[i].field_type == FIELD_CONSTANT ||
+ huff_counts[i].field_type == FIELD_ZERO ||
+ huff_counts[i].field_type == FIELD_CHECK)
+ continue;
+ if (huff_counts[i].field_type == FIELD_INTERVALL)
+ max_calc_length+=huff_counts[i].tree->height;
+ else if (huff_counts[i].field_type == FIELD_BLOB ||
+ huff_counts[i].field_type == FIELD_VARCHAR)
+ max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1;
+ else
+ max_calc_length+=
+ (huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
+ huff_counts[i].tree->height+huff_counts[i].length_bits;
+ }
+ max_calc_length= (max_calc_length + 7) / 8;
+ pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length);
+ record_count=0;
+ /* 'max_blob_length' is the max length of all blobs of a record. */
+ pack_blob_length= isam_file->s->base.blobs ?
+ _ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0;
+ max_pack_length=pack_ref_length+pack_blob_length;
+
+ DBUG_PRINT("fields", ("==="));
+ mrg_reset(mrg);
+ while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
+ {
+ ulong tot_blob_length=0;
+ if (! error)
+ {
+ if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length))
+ break;
+ record_pos= (uchar*) file_buffer.pos;
+ file_buffer.pos+= max_pack_length;
+ if (null_bytes)
+ {
+ /* Copy null bits 'as is' */
+ memcpy(file_buffer.pos, record, null_bytes);
+ file_buffer.pos+= null_bytes;
+ }
+ for (start_pos=record+null_bytes, count= huff_counts;
+ count < end_count ;
+ count++)
+ {
+ end_pos=start_pos+(field_length=count->field_length);
+ tree=count->tree;
+
+ DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ (ulong) (count - huff_counts + 1),
+ count->field_type,
+ count->pack_type, count->max_zero_fill,
+ count->length_bits, count->tree->tree_number,
+ count->field_length));
+
+ /* Check if the column contains spaces only. */
+ if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
+ {
+ for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
+ if (pos == end_pos)
+ {
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
+ DBUG_PRINT("fields", ("---"));
+ write_bits(1,1);
+ start_pos=end_pos;
+ continue;
+ }
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
+ write_bits(0,1);
+ }
+ end_pos-=count->max_zero_fill;
+ field_length-=count->max_zero_fill;
+
+ switch (count->field_type) {
+ case FIELD_SKIP_ZERO:
+ if (!memcmp((uchar*) start_pos,zero_string,field_length))
+ {
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
+ write_bits(1,1);
+ start_pos=end_pos;
+ break;
+ }
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
+ write_bits(0,1);
+ /* Fall through */
+ case FIELD_NORMAL:
+ DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
+ (ulong) (end_pos - start_pos)));
+ for ( ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ break;
+ case FIELD_SKIP_ENDSPACE:
+ for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
+ length= (ulong) (end_pos - pos);
+ if (count->pack_type & PACK_TYPE_SELECTED)
+ {
+ if (length > count->min_space)
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(1,1);
+ write_bits(length,count->length_bits);
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE not more than min_space, "
+ "bits: 1"));
+ write_bits(0,1);
+ pos=end_pos;
+ }
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
+ (ulong) (pos - start_pos)));
+ for ( ; start_pos < pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ start_pos=end_pos;
+ break;
+ case FIELD_SKIP_PRESPACE:
+ for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
+ length= (ulong) (pos - start_pos);
+ if (count->pack_type & PACK_TYPE_SELECTED)
+ {
+ if (length > count->min_space)
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(1,1);
+ write_bits(length,count->length_bits);
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE not more than min_space, "
+ "bits: 1"));
+ pos=start_pos;
+ write_bits(0,1);
+ }
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
+ (ulong) (end_pos - start_pos)));
+ for (start_pos=pos ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ break;
+ case FIELD_CONSTANT:
+ case FIELD_ZERO:
+ case FIELD_CHECK:
+ DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
+ start_pos=end_pos;
+ break;
+ case FIELD_INTERVALL:
+ global_count=count;
+ pos=(uchar*) tree_search(&count->int_tree, start_pos,
+ count->int_tree.custom_arg);
+ intervall=(uint) (pos - count->tree_buff)/field_length;
+ DBUG_PRINT("fields", ("FIELD_INTERVALL"));
+ DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
+ intervall, hexdigits(tree->code[intervall]),
+ (uint) tree->code_len[intervall]));
+ write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
+ start_pos=end_pos;
+ break;
+ case FIELD_BLOB:
+ {
+ ulong blob_length= _ma_calc_blob_length(field_length-
+ portable_sizeof_char_ptr,
+ start_pos);
+ /* Empty blobs are encoded with a single 1 bit. */
+ if (!blob_length)
+ {
+ DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
+ write_bits(1,1);
+ }
+ else
+ {
+ uchar *blob,*blob_end;
+ DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
+ write_bits(0,1);
+ /* Write the blob length. */
+ DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
+ blob_length, count->length_bits));
+ write_bits(blob_length,count->length_bits);
+ memcpy_fixed(&blob,end_pos-portable_sizeof_char_ptr,
+ sizeof(char*));
+ blob_end=blob+blob_length;
+ /* Encode the blob bytes. */
+ for ( ; blob < blob_end ; blob++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
+ (uint) tree->code_len[(uchar) *blob],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *blob],
+ (uint) tree->code_len[(uchar) *blob]);
+ }
+ tot_blob_length+=blob_length;
+ }
+ start_pos= end_pos;
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
+ ulong col_length= (var_pack_length == 1 ?
+ (uint) *(uchar*) start_pos :
+ uint2korr(start_pos));
+ /* Empty varchar are encoded with a single 1 bit. */
+ if (!col_length)
+ {
+ DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
+ write_bits(1,1); /* Empty varchar */
+ }
+ else
+ {
+ uchar *end= start_pos + var_pack_length + col_length;
+ DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
+ write_bits(0,1);
+ /* Write the varchar length. */
+ DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
+ col_length, count->length_bits));
+ write_bits(col_length,count->length_bits);
+ /* Encode the varchar bytes. */
+ for (start_pos+= var_pack_length ; start_pos < end ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ }
+ start_pos= end_pos;
+ break;
+ }
+ case FIELD_LAST:
+ case FIELD_enum_val_count:
+ abort(); /* Impossible */
+ }
+ start_pos+=count->max_zero_fill;
+ DBUG_PRINT("fields", ("---"));
+ }
+ flush_bits();
+ length=(ulong) ((uchar*) file_buffer.pos - record_pos) - max_pack_length;
+ pack_length= _ma_save_pack_length(pack_version, record_pos, length);
+ if (pack_blob_length)
+ pack_length+= _ma_save_pack_length(pack_version,
+ record_pos + pack_length,
+ tot_blob_length);
+ DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
+ "length-bytes: %lu", (ulong) record_count, length,
+ tot_blob_length, pack_length));
+ DBUG_PRINT("fields", ("==="));
+
+ /* Correct file buffer if the header was smaller */
+ if (pack_length != max_pack_length)
+ {
+ bmove(record_pos+pack_length,record_pos+max_pack_length,length);
+ file_buffer.pos-= (max_pack_length-pack_length);
+ }
+ if (length < (ulong) min_record_length)
+ min_record_length=(uint) length;
+ if (length > (ulong) max_record_length)
+ max_record_length=(uint) length;
+ record_count++;
+ if (write_loop && record_count % WRITE_COUNT == 0)
+ {
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
+ }
+ }
+ else if (error != HA_ERR_RECORD_DELETED)
+ break;
+ }
+ if (error == HA_ERR_END_OF_FILE)
+ error=0;
+ else
+ {
+ VOID(fprintf(stderr, "%s: Got error %d reading records\n",
+ my_progname, error));
+ }
+ if (verbose >= 2)
+ VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf)));
+
+ my_afree((uchar*) record);
+ mrg->ref_length=max_pack_length;
+ mrg->min_pack_length=max_record_length ? min_record_length : 0;
+ mrg->max_pack_length=max_record_length;
+ DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0));
+}
+
+
+static char *make_new_name(char *new_name, char *old_name)
+{
+ return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4);
+}
+
+static char *make_old_name(char *new_name, char *old_name)
+{
+ return fn_format(new_name,old_name,"",OLD_EXT,2+4);
+}
+
+ /* rutines for bit writing buffer */
+
+static void init_file_buffer(File file, pbool read_buffer)
+{
+ file_buffer.file=file;
+ file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE),
+ MYF(MY_WME));
+ file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8;
+ file_buffer.pos_in_file=0;
+ error_on_write=0;
+ if (read_buffer)
+ {
+
+ file_buffer.pos=file_buffer.end;
+ file_buffer.bits=0;
+ }
+ else
+ {
+ file_buffer.pos=file_buffer.buffer;
+ file_buffer.bits=BITS_SAVED;
+ }
+ file_buffer.bitbucket= 0;
+}
+
+
+static int flush_buffer(ulong neaded_length)
+{
+ ulong length;
+
+ /*
+ file_buffer.end is 8 bytes lower than the real end of the buffer.
+ This is done so that the end-of-buffer condition does not need to be
+ checked for every uchar (see write_bits()). Consequently,
+ file_buffer.pos can become greater than file_buffer.end. The
+ algorithms in the other functions ensure that there will never be
+ more than 8 bytes written to the buffer without an end-of-buffer
+ check. So the buffer cannot be overrun. But we need to check for the
+ near-to-buffer-end condition to avoid a negative result, which is
+ casted to unsigned and thus becomes giant.
+ */
+ if ((file_buffer.pos < file_buffer.end) &&
+ ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length))
+ return 0;
+ length=(ulong) (file_buffer.pos-file_buffer.buffer);
+ file_buffer.pos=file_buffer.buffer;
+ file_buffer.pos_in_file+=length;
+ if (test_only)
+ return 0;
+ if (error_on_write|| my_write(file_buffer.file,
+ (const uchar*) file_buffer.buffer,
+ length,
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
+ {
+ error_on_write=1;
+ return 1;
+ }
+
+ if (neaded_length != ~(ulong) 0 &&
+ (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length)
+ {
+ char *tmp;
+ neaded_length+=256; /* some margin */
+ tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME));
+ if (!tmp)
+ return 1;
+ file_buffer.pos= ((uchar*) tmp +
+ (ulong) (file_buffer.pos - file_buffer.buffer));
+ file_buffer.buffer= (uchar*) tmp;
+ file_buffer.end= (uchar*) (tmp+neaded_length-8);
+ }
+ return 0;
+}
+
+
+static void end_file_buffer(void)
+{
+ my_free((uchar*) file_buffer.buffer,MYF(0));
+}
+
+ /* output `bits` low bits of `value' */
+
+static void write_bits(register ulonglong value, register uint bits)
+{
+ DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
+ (bits == 8 * sizeof(value)));
+
+ if ((file_buffer.bits-= (int) bits) >= 0)
+ {
+ file_buffer.bitbucket|= value << file_buffer.bits;
+ }
+ else
+ {
+ reg3 ulonglong bit_buffer;
+ bits= (uint) -file_buffer.bits;
+ bit_buffer= (file_buffer.bitbucket |
+ ((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
+#if BITS_SAVED == 64
+ *file_buffer.pos++= (uchar) (bit_buffer >> 56);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 48);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 40);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 32);
+#endif
+ *file_buffer.pos++= (uchar) (bit_buffer >> 24);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 16);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 8);
+ *file_buffer.pos++= (uchar) (bit_buffer);
+
+ if (bits != 8 * sizeof(value))
+ value&= (((ulonglong) 1) << bits) - 1;
+ if (file_buffer.pos >= file_buffer.end)
+ VOID(flush_buffer(~ (ulong) 0));
+ file_buffer.bits=(int) (BITS_SAVED - bits);
+ file_buffer.bitbucket= value << (BITS_SAVED - bits);
+ }
+ return;
+}
+
+ /* Flush bits in bit_buffer to buffer */
+
+static void flush_bits(void)
+{
+ int bits;
+ ulonglong bit_buffer;
+
+ bits= file_buffer.bits & ~7;
+ bit_buffer= file_buffer.bitbucket >> bits;
+ bits= BITS_SAVED - bits;
+ while (bits > 0)
+ {
+ bits-= 8;
+ *file_buffer.pos++= (uchar) (bit_buffer >> bits);
+ }
+ if (file_buffer.pos >= file_buffer.end)
+ VOID(flush_buffer(~ (ulong) 0));
+ file_buffer.bits= BITS_SAVED;
+ file_buffer.bitbucket= 0;
+}
+
+
+/****************************************************************************
+** functions to handle the joined files
+****************************************************************************/
+
+static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
+ my_off_t new_length,
+ ha_checksum crc)
+{
+ MARIA_SHARE *share=isam_file->s;
+ uint options=mi_uint2korr(share->state.header.options);
+ uint key;
+ DBUG_ENTER("save_state");
+
+ options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA;
+ mi_int2store(share->state.header.options,options);
+ /* Save the original file type of we have to undo the packing later */
+ share->state.header.org_data_file_type= share->state.header.data_file_type;
+ share->state.header.data_file_type= COMPRESSED_RECORD;
+
+ share->state.state.data_file_length=new_length;
+ share->state.state.del=0;
+ share->state.state.empty=0;
+ share->state.dellink= HA_OFFSET_ERROR;
+ share->state.split=(ha_rows) mrg->records;
+ share->state.version=(ulong) time((time_t*) 0);
+ if (share->base.born_transactional)
+ share->state.create_rename_lsn= share->state.is_of_horizon=
+ LSN_REPAIRED_BY_MARIA_CHK;
+ if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
+ {
+ /*
+ Some indexes are disabled, cannot use current key_file_length value
+ as an estimate of upper bound of index file size. Use packed data file
+ size instead.
+ */
+ share->state.state.key_file_length= new_length;
+ }
+ /*
+ If there are no disabled indexes, keep key_file_length value from
+ original file so "maria_chk -rq" can use this value (this is necessary
+ because index size cannot be easily calculated for fulltext keys)
+ */
+ maria_clear_all_keys_active(share->state.key_map);
+ for (key=0 ; key < share->base.keys ; key++)
+ share->state.key_root[key]= HA_OFFSET_ERROR;
+ share->state.key_del= HA_OFFSET_ERROR;
+ isam_file->state->checksum=crc; /* Save crc here */
+ share->changed=1; /* Force write of header */
+ share->state.open_count=0;
+ share->global_changed=0;
+ VOID(my_chsize(share->kfile.file, share->base.keystart, 0, MYF(0)));
+ if (share->base.keys)
+ isamchk_neaded=1;
+ DBUG_RETURN(_ma_state_info_write_sub(share->kfile.file,
+ &share->state, (1 + 2)));
+}
+
+
+static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
+ ha_checksum crc)
+{
+ MARIA_STATE_INFO state;
+ MARIA_HA *isam_file=mrg->file[0];
+ uint options;
+ DBUG_ENTER("save_state_mrg");
+
+ state= isam_file->s->state;
+ options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD |
+ HA_OPTION_READ_ONLY_DATA);
+ mi_int2store(state.header.options,options);
+ state.state.data_file_length=new_length;
+ state.state.del=0;
+ state.state.empty=0;
+ state.state.records=state.split=(ha_rows) mrg->records;
+ /* See comment above in save_state about key_file_length handling. */
+ if (mrg->src_file_has_indexes_disabled)
+ {
+ isam_file->s->state.state.key_file_length=
+ max(isam_file->s->state.state.key_file_length, new_length);
+ }
+ state.dellink= HA_OFFSET_ERROR;
+ state.version=(ulong) time((time_t*) 0);
+ maria_clear_all_keys_active(state.key_map);
+ state.state.checksum=crc;
+ if (isam_file->s->base.keys)
+ isamchk_neaded=1;
+ state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */
+ DBUG_RETURN (_ma_state_info_write_sub(file,&state,1+2));
+}
+
+
+/* reset for mrg_rrnd */
+
+static void mrg_reset(PACK_MRG_INFO *mrg)
+{
+ if (mrg->current)
+ {
+ maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0);
+ mrg->current=0;
+ }
+}
+
+static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf)
+{
+ int error;
+ MARIA_HA *isam_info;
+ my_off_t filepos;
+
+ if (!info->current)
+ {
+ isam_info= *(info->current=info->file);
+ info->end=info->current+info->count;
+ maria_reset(isam_info);
+ maria_extra(isam_info, HA_EXTRA_CACHE, 0);
+ if ((error= maria_scan_init(isam_info)))
+ return(error);
+ }
+ else
+ isam_info= *info->current;
+
+ for (;;)
+ {
+ if (!(error= maria_scan(isam_info, buf)) ||
+ error != HA_ERR_END_OF_FILE)
+ return (error);
+ maria_scan_end(isam_info);
+ maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0);
+ if (info->current+1 == info->end)
+ return(HA_ERR_END_OF_FILE);
+ info->current++;
+ isam_info= *info->current;
+ filepos=isam_info->s->pack.header_length;
+ maria_reset(isam_info);
+ maria_extra(isam_info,HA_EXTRA_CACHE, 0);
+ if ((error= maria_scan_init(isam_info)))
+ return(error);
+ }
+}
+
+
+static int mrg_close(PACK_MRG_INFO *mrg)
+{
+ uint i;
+ int error=0;
+ DBUG_ENTER("mrg_close");
+
+ for (i=0 ; i < mrg->count ; i++)
+ error|=maria_close(mrg->file[i]);
+ if (mrg->free_file)
+ my_free((uchar*) mrg->file,MYF(0));
+ DBUG_RETURN(error);
+}
+
+
+#if !defined(DBUG_OFF)
+/*
+ Fake the counts to get big Huffman codes.
+
+ SYNOPSIS
+ fakebigcodes()
+ huff_counts A pointer to the counts array.
+ end_count A pointer past the counts array.
+
+ DESCRIPTION
+
+ Huffman coding works by removing the two least frequent values from
+ the list of values and add a new value with the sum of their
+ incidences in a loop until only one value is left. Every time a
+ value is reused for a new value, it gets one more bit for its
+ encoding. Hence, the least frequent values get the longest codes.
+
+ To get a maximum code length for a value, two of the values must
+ have an incidence of 1. As their sum is 2, the next infrequent value
+ must have at least an incidence of 2, then 4, 8, 16 and so on. This
+ means that one needs 2**n bytes (values) for a code length of n
+ bits. However, using more distinct values forces the use of longer
+ codes, or reaching the code length with less total bytes (values).
+
+ To get 64(32)-bit codes, I sort the counts by decreasing incidence.
+ I assign counts of 1 to the two most frequent values, a count of 2
+ for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
+ the remaining values get 1. That way every possible uchar has an
+ assigned code, though not all codes are used if not all uchar values
+ are present in the column.
+
+ This strategy would work with distinct column values too, but
+ requires that at least 64(32) values are present. To make things
+ easier here, I cancel all distinct column values and force byte
+ compression for all columns.
+
+ RETURN
+ void
+*/
+
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
+{
+ HUFF_COUNTS *count;
+ my_off_t *cur_count_p;
+ my_off_t *end_count_p;
+ my_off_t **cur_sort_p;
+ my_off_t **end_sort_p;
+ my_off_t *sort_counts[256];
+ my_off_t total;
+ DBUG_ENTER("fakebigcodes");
+
+ for (count= huff_counts; count < end_count; count++)
+ {
+ /*
+ Remove distinct column values.
+ */
+ if (huff_counts->tree_buff)
+ {
+ my_free((uchar*) huff_counts->tree_buff, MYF(0));
+ delete_tree(&huff_counts->int_tree);
+ huff_counts->tree_buff= NULL;
+ DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
+ }
+
+ /*
+ Sort counts by decreasing incidence.
+ */
+ cur_count_p= count->counts;
+ end_count_p= cur_count_p + 256;
+ cur_sort_p= sort_counts;
+ while (cur_count_p < end_count_p)
+ *(cur_sort_p++)= cur_count_p++;
+ (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
+
+ /*
+ Assign faked counts.
+ */
+ cur_sort_p= sort_counts;
+#if SIZEOF_LONG_LONG > 4
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
+#else
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
+#endif
+ /* Most frequent value gets a faked count of 1. */
+ **(cur_sort_p++)= 1;
+ total= 1;
+ while (cur_sort_p < end_sort_p)
+ {
+ **(cur_sort_p++)= total;
+ total<<= 1;
+ }
+ /* Set the last value. */
+ **(cur_sort_p++)= --total;
+ /*
+ Set the remaining counts.
+ */
+ end_sort_p= sort_counts + 256;
+ while (cur_sort_p < end_sort_p)
+ **(cur_sort_p++)= 1;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Compare two counts for reverse sorting.
+
+ SYNOPSIS
+ fakecmp()
+ count1 One count.
+ count2 Another count.
+
+ RETURN
+ 1 count1 < count2
+ 0 count1 == count2
+ -1 count1 > count2
+*/
+
+static int fakecmp(my_off_t **count1, my_off_t **count2)
+{
+ return ((**count1 < **count2) ? 1 :
+ (**count1 > **count2) ? -1 : 0);
+}
+#endif
diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c
new file mode 100644
index 00000000000..ff7be5b533b
--- /dev/null
+++ b/storage/maria/maria_read_log.c
@@ -0,0 +1,268 @@
+/* Copyright (C) 2007 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#include "ma_recovery.h"
+#include <my_getopt.h>
+
+#define LOG_FLAGS 0
+
+static const char *load_default_groups[]= { "maria_read_log",0 };
+static void get_options(int *argc,char * * *argv);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+const char *default_dbug_option= "d:t:i:O,\\maria_read_log.trace";
+#else
+const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace";
+#endif
+#endif /* DBUG_OFF */
+static my_bool opt_display_only, opt_apply, opt_apply_undo, opt_silent;
+static my_bool opt_check;
+static const char *opt_tmpdir;
+static ulong opt_page_buffer_size;
+static ulonglong opt_start_from_lsn;
+static MY_TMPDIR maria_chk_tmpdir;
+
+
+int main(int argc, char **argv)
+{
+ LSN lsn;
+ char **default_argv;
+ uint warnings_count;
+ MY_INIT(argv[0]);
+
+ load_defaults("my", load_default_groups, &argc, &argv);
+ default_argv= argv;
+ get_options(&argc, &argv);
+
+ maria_data_root= ".";
+ maria_in_recovery= TRUE;
+
+ if (maria_init())
+ {
+ fprintf(stderr, "Can't init Maria engine (%d)\n", errno);
+ goto err;
+ }
+ /* we don't want to create a control file, it MUST exist */
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't open control file (%d)\n", errno);
+ goto err;
+ }
+ if (last_logno == FILENO_IMPOSSIBLE)
+ {
+ fprintf(stderr, "Can't find any log\n");
+ goto err;
+ }
+ if (init_pagecache(maria_pagecache, opt_page_buffer_size, 0, 0,
+ TRANSLOG_PAGE_SIZE, MY_WME) == 0)
+ {
+ fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno);
+ goto err;
+ }
+ /*
+ If log handler does not find the "last_logno" log it will return error,
+ which is good.
+ But if it finds a log and this log was crashed, it will create a new log,
+ which is useless. TODO: start log handler in read-only mode.
+ */
+ if (init_pagecache(maria_log_pagecache,
+ TRANSLOG_PAGECACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, MY_WME) == 0 ||
+ translog_init(maria_data_root, TRANSLOG_FILE_SIZE,
+ 0, 0, maria_log_pagecache, TRANSLOG_DEFAULT_FLAGS,
+ opt_display_only))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ goto err;
+ }
+
+ if (opt_display_only)
+ printf("You are using --display-only, NOTHING will be written to disk\n");
+
+ /* LSN could be also --start-from-lsn=# */
+ lsn= translog_first_lsn_in_log();
+ if (lsn == LSN_ERROR)
+ {
+ fprintf(stderr, "Opening transaction log failed\n");
+ goto end;
+ }
+ if (lsn == LSN_IMPOSSIBLE)
+ {
+ fprintf(stdout, "The transaction log is empty\n");
+ }
+ fprintf(stdout, "The transaction log starts from lsn (%lu,0x%lx)\n",
+ LSN_IN_PARTS(lsn));
+
+ if (opt_start_from_lsn)
+ {
+ if (opt_start_from_lsn < (ulonglong) lsn)
+ {
+ fprintf(stderr, "start_from_lsn is too small. Aborting\n");
+ maria_end();
+ goto err;
+ }
+ lsn= (LSN) opt_start_from_lsn;
+ fprintf(stdout, "Starting reading log from lsn (%lu,0x%lx)\n",
+ LSN_IN_PARTS(lsn));
+ }
+
+ fprintf(stdout, "TRACE of the last maria_read_log\n");
+ if (maria_apply_log(lsn, opt_apply ? MARIA_LOG_APPLY :
+ (opt_check ? MARIA_LOG_CHECK :
+ MARIA_LOG_DISPLAY_HEADER), opt_silent ? NULL : stdout,
+ opt_apply_undo, FALSE, FALSE, &warnings_count))
+ goto err;
+ if (warnings_count == 0)
+ fprintf(stdout, "%s: SUCCESS\n", my_progname_short);
+ else
+ fprintf(stdout, "%s: DOUBTFUL (%u warnings, check previous output)\n",
+ my_progname_short, warnings_count);
+
+end:
+ maria_end();
+ free_tmpdir(&maria_chk_tmpdir);
+ free_defaults(default_argv);
+ my_end(0);
+ exit(0);
+ return 0; /* No compiler warning */
+
+err:
+ /* don't touch anything more, in case we hit a bug */
+ fprintf(stderr, "%s: FAILED\n", my_progname_short);
+ free_tmpdir(&maria_chk_tmpdir);
+ free_defaults(default_argv);
+ exit(1);
+}
+
+
+#include "ma_check_standalone.h"
+
+
+static struct my_option my_long_options[] =
+{
+ {"apply", 'a',
+ "Apply log to tables: modifies tables! you should make a backup first! "
+ " Displays a lot of information if not run with --silent",
+ (uchar **) &opt_apply, (uchar **) &opt_apply, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"check", 'c',
+ "if --display-only, check if record is fully readable (for debugging)",
+ (uchar **) &opt_check, (uchar **) &opt_check, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifndef DBUG_OFF
+ {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.",
+ 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"help", '?', "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"display-only", 'd', "display brief info read from records' header",
+ (uchar **) &opt_display_only, (uchar **) &opt_display_only, 0, GET_BOOL,
+ NO_ARG,0, 0, 0, 0, 0, 0},
+ { "page_buffer_size", 'P', "",
+ (uchar**) &opt_page_buffer_size, (uchar**) &opt_page_buffer_size, 0,
+ GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT,
+ (long) USE_BUFFER_INIT, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD,
+ (long) IO_SIZE, 0},
+ { "start_from_lsn", 'o', "Start reading log from this lsn",
+ (uchar**) &opt_start_from_lsn, (uchar**) &opt_start_from_lsn,
+ 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 },
+ {"silent", 's', "Print less information during apply/undo phase",
+ (uchar **) &opt_silent, (uchar **) &opt_silent, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"tmpdir", 't', "Path for temporary files. Multiple paths can be specified, "
+ "separated by "
+#if defined( __WIN__) || defined(__NETWARE__)
+ "semicolon (;)"
+#else
+ "colon (:)"
+#endif
+ , (uchar**) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"undo", 'u', "Apply UNDO records to tables. (disable with --disable-undo)",
+ (uchar **) &opt_apply_undo, (uchar **) &opt_apply_undo, 0,
+ GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
+ {"version", 'V', "Print version and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+#include <help_start.h>
+
+static void print_version(void)
+{
+ VOID(printf("%s Ver 1.2 for %s on %s\n",
+ my_progname_short, SYSTEM_TYPE, MACHINE_TYPE));
+ NETWARE_SET_SCREEN_MODE(1);
+}
+
+
+static void usage(void)
+{
+ print_version();
+ puts("Copyright (C) 2007 MySQL AB");
+ puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
+ puts("and you are welcome to modify and redistribute it under the GPL license\n");
+
+ puts("Display and apply log records from a MARIA transaction log");
+ puts("found in the current directory (for now)");
+ VOID(printf("\nUsage: %s OPTIONS\n", my_progname_short));
+ puts("You need to use one of -o or -a");
+ my_print_help(my_long_options);
+ print_defaults("my", load_default_groups);
+ my_print_variables(my_long_options);
+}
+
+#include <help_end.h>
+
+static my_bool
+get_one_option(int optid __attribute__((unused)),
+ const struct my_option *opt __attribute__((unused)),
+ char *argument __attribute__((unused)))
+{
+ switch (optid) {
+ case '?':
+ usage();
+ exit(0);
+ case 'V':
+ print_version();
+ exit(0);
+#ifndef DBUG_OFF
+ case '#':
+ DBUG_SET_INITIAL(argument ? argument : default_dbug_option);
+ break;
+#endif
+ }
+ return 0;
+}
+
+static void get_options(int *argc,char ***argv)
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ if (!opt_apply)
+ opt_apply_undo= FALSE;
+
+ if ((opt_display_only + opt_apply) != 1)
+ {
+ usage();
+ exit(1);
+ }
+ if (init_tmpdir(&maria_chk_tmpdir, opt_tmpdir))
+ exit(1);
+ maria_tmpdir= &maria_chk_tmpdir;
+}
diff --git a/storage/maria/maria_rename.sh b/storage/maria/maria_rename.sh
new file mode 100755
index 00000000000..fb20e47e635
--- /dev/null
+++ b/storage/maria/maria_rename.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+replace myisam maria MYISAM MARIA MyISAM MARIA -- mysql-test/t/*maria*test mysql-test/r/*maria*result
+
+FILES=`echo sql/ha_maria.{cc,h} include/maria*h storage/maria/*.{c,h}`
+
+replace myisam maria MYISAM MARIA MyISAM MARIA myisam.h maria.h myisamdef.h maria_def.h mi_ maria_ ft_ maria_ft_ "Copyright (C) 2000" "Copyright (C) 2006" MI_ISAMINFO MARIA_INFO MI_CREATE_INFO MARIA_CREATE_INFO maria_isam_ maria_ MI_INFO MARIA_HA MI_ MARIA_ MARIACHK MARIA_CHK rt_index.h ma_rt_index.h rtree_ maria_rtree rt_key.h ma_rt_key.h rt_mbr.h ma_rt_mbr.h -- $FILES
+
+replace check_table_is_closed _ma_check_table_is_closed test_if_reopen _ma_test_if_reopen my_n_base_info_read maria_n_base_info_read update_auto_increment _ma_update_auto_increment save_pack_length _ma_save_packlength calc_pack_length _ma_calc_pack_length -- $FILES
+
+replace mi_ ma_ ft_ ma_ft_ rt_ ma_rt_ myisam maria myisamchk maria_chk myisampack maria_pack myisamlog maria_log -- storage/maria/Makefile.am
+
+#
+# Restore wrong replaces
+#
+
+replace maria_sint1korr mi_sint1korr maria_uint1korr mi_uint1korr maria_sint2korr mi_sint2korr maria_sint3korr mi_sint3korr maria_sint4korr mi_sint4korr maria_sint8korr mi_sint8korr maria_uint2korr mi_uint2korr maria_uint3korr mi_uint3korr maria_uint4korr mi_uint4korr maria_uint5korr mi_uint5korr maria_uint6korr mi_uint6korr maria_uint7korr mi_uint7korr maria_uint8korr mi_uint8korr maria_int1store mi_int1store maria_int2store mi_int2store maria_int3store mi_int3store maria_int4store mi_int4store maria_int5store mi_int5store maria_int6store mi_int6store maria_int7store mi_int7store maria_int8store mi_int8store maria_float4store mi_float4store maria_float4get mi_float4get maria_float8store mi_float8store maria_float8get mi_float8get maria_rowstore mi_rowstore maria_rowkorr mi_rowkorr maria_sizestore mi_sizestore maria_sizekorr mi_sizekorr _maria_maria_ _maria MARIA_MAX_POSSIBLE_KEY HA_MAX_POSSIBLE_KEY MARIA_MAX_KEY_BUFF HA_MAX_KEY_BUFF MARIA_MAX_KEY_SEG HA_MAX_KEY_SEG maria_ft_sintXkorr ft_sintXkorr maria_ft_intXstore ft_intXstore maria_ft_boolean_syntax ft_boolean_syntax maria_ft_min_word_len ft_min_word_len maria_ft_max_word_len ft_max_word_len -- $FILES
diff --git a/storage/maria/plug.in b/storage/maria/plug.in
new file mode 100644
index 00000000000..1ce64f6e2bb
--- /dev/null
+++ b/storage/maria/plug.in
@@ -0,0 +1,8 @@
+MYSQL_STORAGE_ENGINE(maria,, [Maria Storage Engine],
+ [Traditional transactional MySQL tables], [max,max-no-ndb])
+MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria])
+MYSQL_PLUGIN_ACTIONS(maria, [AC_CONFIG_FILES(storage/maria/unittest/Makefile)])
+MYSQL_PLUGIN_STATIC(maria, [libmaria.a])
+# Maria will probably go first into max builds, not all builds,
+# so we don't declare it mandatory.
+MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(maria, [ha_maria.cc])
diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c
new file mode 100644
index 00000000000..1fd9ed6ec72
--- /dev/null
+++ b/storage/maria/tablockman.c
@@ -0,0 +1,676 @@
+/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */
+/* QQ: automatically place S instead of LS if possible */
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <my_base.h>
+#include <hash.h>
+#include "tablockman.h"
+
+/*
+ Lock Manager for Table Locks
+
+ The code below handles locks on resources - but it is optimized for a
+ case when a number of resources is not very large, and there are many of
+ locks per resource - that is a resource is likely to be a table or a
+ database, but hardly a row in a table.
+
+ Locks belong to "lock owners". A Lock Owner is uniquely identified by a
+ 16-bit number - loid (lock owner identifier). A function loid_to_tlo must
+ be provided by the application that takes such a number as an argument
+ and returns a TABLE_LOCK_OWNER structure.
+
+ Lock levels are completely defined by three tables. Lock compatibility
+ matrix specifies which locks can be held at the same time on a resource.
+ Lock combining matrix specifies what lock level has the same behaviour as
+ a pair of two locks of given levels. getlock_result matrix simplifies
+ intention locking and lock escalation for an application, basically it
+ defines which locks are intention locks and which locks are "loose"
+ locks. It is only used to provide better diagnostics for the
+ application, lock manager itself does not differentiate between normal,
+ intention, and loose locks.
+
+ The assumptions are: few distinct resources, many locks are held at the
+ same time on one resource. Thus: a lock structure _per resource_ can be
+ rather large; a lock structure _per lock_ does not need to be very small
+ either; we need to optimize for _speed_. Operations we need are: place a
+ lock, check if a particular transaction already has a lock on this
+ resource, check if a conflicting lock exists, if yes - find who owns it.
+
+ Solution: every resource has a structure with
+ 1. Hash of latest (see the lock upgrade section below) granted locks with
+ loid as a key. Thus, checking if a given transaction has a lock on
+ this resource is O(1) operation.
+ 2. Doubly-linked lists of all granted locks - one list for every lock
+ type. Thus, checking if a conflicting lock exists is a check whether
+ an appropriate list head pointer is not null, also O(1).
+ 3. Every lock has a loid of the owner, thus checking who owns a
+ conflicting lock is also O(1).
+ 4. Deque of waiting locks. It's a deque (double-ended queue) not a fifo,
+ because for lock upgrades requests are added to the queue head, not
+ tail. This is a single place where there it gets O(N) on number
+ of locks - when a transaction wakes up from waiting on a condition,
+ it may need to scan the queue backward to the beginning to find
+ a conflicting lock. It is guaranteed though that "all transactions
+ before it" received the same - or earlier - signal. In other words a
+ transaction needs to scan all transactions before it that received the
+ signal but didn't have a chance to resume the execution yet, so
+ practically OS scheduler won't let the scan to be O(N).
+
+ Waiting: if there is a conflicting lock or if wait queue is not empty, a
+ requested lock cannot be granted at once. It is added to the end of the
+ wait queue. If a queue was empty and there is a conflicting lock - the
+ "blocker" transaction is the owner of this lock. If a queue is not empty,
+ an owner of the previous lock in the queue is the "blocker". But if the
+ previous lock is compatible with the request, then the "blocker" is the
+ transaction that the owner of the lock at the end of the queue is waiting
+ for (in other words, our lock is added to the end of the wait queue, and
+ our blocker is the same as of the lock right before us).
+
+ Lock upgrades: when a thread that has a lock on a given resource,
+ requests a new lock on the same resource and the old lock is not enough
+ to satisfy new lock requirements (which is defined by
+ lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock
+ (defined by lock_combining_matrix as above) is placed. Depending on
+ other granted locks it is immediately granted or it has to wait. Here the
+ lock is added to the start of the waiting queue, not to the end. Old
+ lock, is removed from the hash, but not from the doubly-linked lists.
+ (indeed, a transaction checks "do I have a lock on this resource ?" by
+ looking in a hash, and it should find a latest lock, so old locks must be
+ removed; but a transaction checks "are there conflicting locks ?" by
+ checking doubly-linked lists, it doesn't matter if it will find an old
+ lock - if it would be removed, a new lock would be also a conflict).
+ So, a hash contains only "latest" locks - there can be only one latest
+ lock per resource per transaction. But doubly-linked lists contain all
+ locks, even "obsolete" ones, because it doesnt't hurt. Note that old
+ locks can not be freed early, in particular they stay in the
+ 'active_locks' list of a lock owner, because they may be "re-enabled"
+ on a savepoint rollback.
+
+ To better support table-row relations where one needs to lock the table
+ with an intention lock before locking the row, extended diagnostics is
+ provided. When an intention lock (presumably on a table) is granted,
+ lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row,
+ perhaps the thread already has a normal lock on this table),
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual),
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check
+ whether it's possible to lock the row, but no need to lock it - perhaps
+ the thread has a loose lock on this table). This is defined by
+ getlock_result[] table.
+
+ Instant duration locks are not supported. Though they're trivial to add,
+ they are normally only used on rows, not on tables. So, presumably,
+ they are not needed here.
+
+ Mutexes: there're table mutexes (LOCKED_TABLE::mutex), lock owner mutexes
+ (TABLE_LOCK_OWNER::mutex), and a pool mutex (TABLOCKMAN::pool_mutex).
+ table mutex protects operations on the table lock structures, and lock
+ owner pointers waiting_for and waiting_for_loid.
+ lock owner mutex is only used to wait on lock owner condition
+ (TABLE_LOCK_OWNER::cond), there's no need to protect owner's lock
+ structures, and only lock owner itself may access them.
+ The pool mutex protects a pool of unused locks. Note the locking order:
+ first the table mutex, then the owner mutex or a pool mutex.
+ Table mutex lock cannot be attempted when owner or pool mutex are locked.
+ No mutex lock can be attempted if owner or pool mutex are locked.
+*/
+
+/*
+ Lock compatibility matrix.
+
+ It's asymmetric. Read it as "Somebody has the lock <value in the row
+ label>, can I set the lock <value in the column label> ?"
+
+ ') Though you can take LS lock while somebody has S lock, it makes no
+ sense - it's simpler to take S lock too.
+
+ 1 - compatible
+ 0 - incompatible
+ -1 - "impossible", so that we can assert the impossibility.
+*/
+static const int lock_compatibility_matrix[10][10]=
+{ /* N S X IS IX SIX LS LX SLX LSIX */
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, /* N */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */
+ { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */
+ { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */
+};
+
+/*
+ Lock combining matrix.
+
+ It's symmetric. Read it as "what lock level L is identical to the
+ set of two locks A and B"
+
+ One should never get N from it, we assert the impossibility
+*/
+static const enum lockman_lock_type lock_combining_matrix[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { N, N, N, N, N, N, N, N, N, N}, /* N */
+ { N, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */
+ { N, X, X, X, X, X, X, X, X, X}, /* X */
+ { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */
+ { N, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */
+ { N, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */
+ { N, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */
+ { N, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */
+ { N, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */
+ { N, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */
+};
+
+/*
+ the return codes for lockman_getlock
+
+ It's asymmetric. Read it as "I have the lock <value in the row label>,
+ what value should be returned for <value in the column label> ?"
+
+ 0 means impossible combination (assert!)
+
+ Defines below help to preserve the table structure.
+ I/L/A values are self explanatory
+ x means the combination is possible (assert should not crash)
+ but it cannot happen in row locks, only in table locks (S,X),
+ or lock escalations (LS,LX)
+*/
+#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE
+#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+#define A GOT_THE_LOCK
+#define x GOT_THE_LOCK
+static const enum lockman_getlock_result getlock_result[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */
+ { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */
+ { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */
+ { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */
+ { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */
+ { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */
+ { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */
+ { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */
+ { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */
+ { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */
+};
+#undef I
+#undef L
+#undef A
+#undef x
+
+/*
+ this structure is optimized for a case when there're many locks
+ on the same resource - e.g. a table
+*/
+
+struct st_table_lock {
+ /* QQ: do we need upgraded_from ? */
+ struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev;
+ struct st_locked_table *table;
+ uint16 loid;
+ uchar lock_type;
+};
+
+#define hash_insert my_hash_insert /* for consistency :) */
+
+static inline
+TABLE_LOCK *find_by_loid(LOCKED_TABLE *table, uint16 loid)
+{
+ return (TABLE_LOCK *)hash_search(& table->latest_locks,
+ (uchar *)& loid, sizeof(loid));
+}
+
+static inline
+void remove_from_wait_queue(TABLE_LOCK *lock, LOCKED_TABLE *table)
+{
+ DBUG_ASSERT(table == lock->table);
+ if (lock->prev)
+ {
+ DBUG_ASSERT(table->wait_queue_out != lock);
+ lock->prev->next= lock->next;
+ }
+ else
+ {
+ DBUG_ASSERT(table->wait_queue_out == lock);
+ table->wait_queue_out= lock->next;
+ }
+ if (lock->next)
+ {
+ DBUG_ASSERT(table->wait_queue_in != lock);
+ lock->next->prev= lock->prev;
+ }
+ else
+ {
+ DBUG_ASSERT(table->wait_queue_in == lock);
+ table->wait_queue_in= lock->prev;
+ }
+}
+
+/*
+ DESCRIPTION
+ tries to lock a resource 'table' with a lock level 'lock'.
+
+ RETURN
+ see enum lockman_getlock_result
+*/
+enum lockman_getlock_result
+tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
+ LOCKED_TABLE *table, enum lockman_lock_type lock)
+{
+ TABLE_LOCK *old, *new, *blocker, *blocker2;
+ TABLE_LOCK_OWNER *wait_for;
+ ulonglong deadline;
+ struct timespec timeout;
+ enum lockman_lock_type new_lock;
+ enum lockman_getlock_result res;
+ int i;
+
+ DBUG_ASSERT(lo->waiting_lock == 0);
+ DBUG_ASSERT(lo->waiting_for == 0);
+ DBUG_ASSERT(lo->waiting_for_loid == 0);
+
+ pthread_mutex_lock(& table->mutex);
+ /* do we already have a lock on this resource ? */
+ old= find_by_loid(table, lo->loid);
+
+ /* calculate the level of the upgraded lock, if yes */
+ new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock;
+
+ /* and check if old lock is enough to satisfy the new request */
+ if (old && new_lock == old->lock_type)
+ {
+ /* yes */
+ res= getlock_result[old->lock_type][lock];
+ goto ret;
+ }
+
+ /* no, placing a new lock. first - take a free lock structure from the pool */
+ pthread_mutex_lock(& lm->pool_mutex);
+ new= lm->pool;
+ if (new)
+ {
+ lm->pool= new->next;
+ pthread_mutex_unlock(& lm->pool_mutex);
+ }
+ else
+ {
+ pthread_mutex_unlock(& lm->pool_mutex);
+ new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME));
+ if (unlikely(!new))
+ {
+ res= NO_MEMORY_FOR_LOCK;
+ goto ret;
+ }
+ }
+
+ new->loid= lo->loid;
+ new->lock_type= new_lock;
+ new->table= table;
+
+ /* and try to place it */
+ for (new->prev= table->wait_queue_in;;)
+ {
+ wait_for= 0;
+ if (!old)
+ {
+ /* not upgrading - a lock must be added to the _end_ of the wait queue */
+ for (blocker= new->prev; blocker && !wait_for; blocker= blocker->prev)
+ {
+ TABLE_LOCK_OWNER *tmp= lm->loid_to_tlo(blocker->loid);
+
+ /* find a blocking lock */
+ DBUG_ASSERT(table->wait_queue_out);
+ DBUG_ASSERT(table->wait_queue_in);
+ if (!lock_compatibility_matrix[blocker->lock_type][lock])
+ {
+ /* found! */
+ wait_for= tmp;
+ break;
+ }
+
+ /*
+ hmm, the lock before doesn't block us, let's look one step further.
+ the condition below means:
+
+ if we never waited on a condition yet
+ OR
+ the lock before ours (blocker) waits on a lock (blocker2) that is
+ present in the hash AND and conflicts with 'blocker'
+
+ the condition after OR may fail if 'blocker2' was removed from
+ the hash, its signal woke us up, but 'blocker' itself didn't see
+ the signal yet.
+ */
+ if (!lo->waiting_lock ||
+ ((blocker2= find_by_loid(table, tmp->waiting_for_loid)) &&
+ !lock_compatibility_matrix[blocker2->lock_type]
+ [blocker->lock_type]))
+ {
+ /* but it's waiting for a real lock. we'll wait for the same lock */
+ wait_for= tmp->waiting_for;
+ /*
+ We don't really need tmp->waiting_for, as tmp->waiting_for_loid
+ is enough. waiting_for is just a local cache to avoid calling
+ loid_to_tlo().
+ But it's essensial that tmp->waiting_for pointer can ONLY
+ be dereferenced if find_by_loid() above returns a non-null
+ pointer, because a TABLE_LOCK_OWNER object that it points to
+ may've been freed when we come here after a signal.
+ In particular tmp->waiting_for_loid cannot be replaced
+ with tmp->waiting_for->loid.
+ */
+ DBUG_ASSERT(wait_for == lm->loid_to_tlo(tmp->waiting_for_loid));
+ break;
+ }
+
+ /*
+ otherwise - a lock it's waiting for doesn't exist.
+ We've no choice but to scan the wait queue backwards, looking
+ for a conflicting lock or a lock waiting for a real lock.
+ QQ is there a way to avoid this scanning ?
+ */
+ }
+ }
+
+ if (wait_for == 0)
+ {
+ /* checking for compatibility with existing locks */
+ for (blocker= 0, i= 0; i < LOCK_TYPES; i++)
+ {
+ if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock])
+ {
+ blocker= table->active_locks[i];
+ /* if the first lock in the list is our own - skip it */
+ if (blocker->loid == lo->loid)
+ blocker= blocker->next;
+ if (blocker) /* found a conflicting lock, need to wait */
+ break;
+ }
+ }
+ if (!blocker) /* free to go */
+ break;
+ wait_for= lm->loid_to_tlo(blocker->loid);
+ }
+
+ /* ok, we're here - the wait is inevitable */
+ lo->waiting_for= wait_for;
+ lo->waiting_for_loid= wait_for->loid;
+ if (!lo->waiting_lock) /* first iteration of the for() loop */
+ {
+ /* lock upgrade or new lock request ? */
+ if (old)
+ {
+ /* upgrade - add the lock to the _start_ of the wait queue */
+ new->prev= 0;
+ if ((new->next= table->wait_queue_out))
+ new->next->prev= new;
+ table->wait_queue_out= new;
+ if (!table->wait_queue_in)
+ table->wait_queue_in= table->wait_queue_out;
+ }
+ else
+ {
+ /* new lock - add the lock to the _end_ of the wait queue */
+ new->next= 0;
+ if ((new->prev= table->wait_queue_in))
+ new->prev->next= new;
+ table->wait_queue_in= new;
+ if (!table->wait_queue_out)
+ table->wait_queue_out= table->wait_queue_in;
+ }
+ lo->waiting_lock= new;
+
+ deadline= my_getsystime() + lm->lock_timeout * 10000;
+ timeout.tv_sec= deadline/10000000;
+ timeout.tv_nsec= (deadline % 10000000) * 100;
+ }
+
+ /*
+ prepare to wait.
+ we must lock blocker's mutex to wait on blocker's cond.
+ and we must release table's mutex.
+ note that blocker's mutex is locked _before_ table's mutex is released
+ */
+ pthread_mutex_lock(wait_for->mutex);
+ pthread_mutex_unlock(& table->mutex);
+
+ /* now really wait */
+ i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout);
+
+ pthread_mutex_unlock(wait_for->mutex);
+
+ if (i == ETIMEDOUT || i == ETIME)
+ {
+ /* we rely on the caller to rollback and release all locks */
+ res= LOCK_TIMEOUT;
+ goto ret2;
+ }
+
+ pthread_mutex_lock(& table->mutex);
+
+ /* ... and repeat from the beginning */
+ }
+ /* yeah! we can place the lock now */
+
+ /* remove the lock from the wait queue, if it was there */
+ if (lo->waiting_lock)
+ {
+ remove_from_wait_queue(new, table);
+ lo->waiting_lock= 0;
+ lo->waiting_for= 0;
+ lo->waiting_for_loid= 0;
+ }
+
+ /* add it to the list of all locks of this lock owner */
+ new->next_in_lo= lo->active_locks;
+ lo->active_locks= new;
+
+ /* and to the list of active locks of this lock type */
+ new->prev= 0;
+ if ((new->next= table->active_locks[new_lock-1]))
+ new->next->prev= new;
+ table->active_locks[new_lock-1]= new;
+
+ /* update the latest_locks hash */
+ if (old)
+ hash_delete(& table->latest_locks, (uchar *)old);
+ hash_insert(& table->latest_locks, (uchar *)new);
+
+ new->upgraded_from= old;
+
+ res= getlock_result[lock][lock];
+
+ret:
+ pthread_mutex_unlock(& table->mutex);
+ret2:
+ DBUG_ASSERT(res);
+ return res;
+}
+
+/*
+ DESCRIPTION
+ release all locks belonging to a transaction.
+ signal waiters to continue
+*/
+void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
+{
+ TABLE_LOCK *lock, *local_pool= 0, *local_pool_end;
+
+ /*
+ instead of adding released locks to a pool one by one, we'll link
+ them in a list and add to a pool in one short action (under a mutex)
+ */
+ local_pool_end= lo->waiting_lock ? lo->waiting_lock : lo->active_locks;
+ if (!local_pool_end)
+ return;
+
+ /* release a waiting lock, if any */
+ if ((lock= lo->waiting_lock))
+ {
+ DBUG_ASSERT(lock->loid == lo->loid);
+ pthread_mutex_lock(& lock->table->mutex);
+ remove_from_wait_queue(lock, lock->table);
+
+ /*
+ a special case: if this lock was not the last in the wait queue
+ and it's compatible with the next lock, than the next lock
+ is waiting for our blocker though really it waits for us, indirectly.
+ Signal our blocker to release this next lock (after we removed our
+ lock from the wait queue, of course).
+ */
+ /*
+ An example to clarify the above:
+ trn1> S-lock the table. Granted.
+ trn2> IX-lock the table. Added to the wait queue. trn2 waits on trn1
+ trn3> IS-lock the table. The queue is not empty, so IS-lock is added
+ to the queue. It's compatible with the waiting IX-lock, so trn3
+ waits for trn2->waiting_for, that is trn1.
+ if trn1 releases the lock it signals trn1->cond and both waiting
+ transactions are awaken. But if trn2 times out, trn3 must be notified
+ too (as IS and S locks are compatible). So trn2 must signal trn1->cond.
+ */
+ if (lock->next &&
+ lock_compatibility_matrix[lock->next->lock_type][lock->lock_type])
+ {
+ pthread_mutex_lock(lo->waiting_for->mutex);
+ pthread_cond_broadcast(lo->waiting_for->cond);
+ pthread_mutex_unlock(lo->waiting_for->mutex);
+ }
+ lo->waiting_for= 0;
+ lo->waiting_for_loid= 0;
+ pthread_mutex_unlock(& lock->table->mutex);
+
+ lock->next= local_pool;
+ local_pool= lock;
+ }
+
+ /* now release granted locks */
+ lock= lo->active_locks;
+ while (lock)
+ {
+ TABLE_LOCK *cur= lock;
+ pthread_mutex_t *mutex= & lock->table->mutex;
+ DBUG_ASSERT(cur->loid == lo->loid);
+
+ DBUG_ASSERT(lock != lock->next_in_lo);
+ lock= lock->next_in_lo;
+
+ /* TODO ? group locks by table to reduce the number of mutex locks */
+ pthread_mutex_lock(mutex);
+ hash_delete(& cur->table->latest_locks, (uchar *)cur);
+
+ if (cur->prev)
+ cur->prev->next= cur->next;
+ if (cur->next)
+ cur->next->prev= cur->prev;
+ if (cur->table->active_locks[cur->lock_type-1] == cur)
+ cur->table->active_locks[cur->lock_type-1]= cur->next;
+
+ cur->next= local_pool;
+ local_pool= cur;
+
+ pthread_mutex_unlock(mutex);
+ }
+
+ lo->waiting_lock= lo->active_locks= 0;
+
+ /*
+ okay, all locks released. now signal that we're leaving,
+ in case somebody's waiting for it
+ */
+ pthread_mutex_lock(lo->mutex);
+ pthread_cond_broadcast(lo->cond);
+ pthread_mutex_unlock(lo->mutex);
+
+ /* and push all freed locks to the lockman's pool */
+ pthread_mutex_lock(& lm->pool_mutex);
+ local_pool_end->next= lm->pool;
+ lm->pool= local_pool;
+ pthread_mutex_unlock(& lm->pool_mutex);
+}
+
+void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout)
+{
+ lm->pool= 0;
+ lm->loid_to_tlo= func;
+ lm->lock_timeout= timeout;
+ pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST);
+ my_getsystime(); /* ensure that my_getsystime() is initialized */
+}
+
+void tablockman_destroy(TABLOCKMAN *lm)
+{
+ while (lm->pool)
+ {
+ TABLE_LOCK *tmp= lm->pool;
+ lm->pool= tmp->next;
+ my_free((void *)tmp, MYF(0));
+ }
+ pthread_mutex_destroy(& lm->pool_mutex);
+}
+
+/*
+ initialize a LOCKED_TABLE structure
+
+ SYNOPSYS
+ lt a LOCKED_TABLE to initialize
+ initial_hash_size initial size for 'latest_locks' hash
+*/
+void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size)
+{
+ bzero(lt, sizeof(*lt));
+ pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST);
+ hash_init(& lt->latest_locks, & my_charset_bin, initial_hash_size,
+ offsetof(TABLE_LOCK, loid),
+ sizeof(((TABLE_LOCK*)0)->loid), 0, 0, 0);
+}
+
+void tablockman_destroy_locked_table(LOCKED_TABLE *lt)
+{
+ int i;
+
+ DBUG_ASSERT(lt->wait_queue_out == 0);
+ DBUG_ASSERT(lt->wait_queue_in == 0);
+ DBUG_ASSERT(lt->latest_locks.records == 0);
+ for (i= 0; i<LOCK_TYPES; i++)
+ DBUG_ASSERT(lt->active_locks[i] == 0);
+
+ hash_free(& lt->latest_locks);
+ pthread_mutex_destroy(& lt->mutex);
+}
+
+#ifdef EXTRA_DEBUG
+static const char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX",
+ "LS", "LX", "SLX", "LSIX"};
+
+void tablockman_print_tlo(TABLE_LOCK_OWNER *lo)
+{
+ TABLE_LOCK *lock;
+
+ printf("lo%d>", lo->loid);
+ if ((lock= lo->waiting_lock))
+ printf(" (%s.0x%lx)", lock2str[lock->lock_type], (ulong)lock->table);
+ for (lock= lo->active_locks;
+ lock && lock != lock->next_in_lo;
+ lock= lock->next_in_lo)
+ printf(" %s.0x%lx", lock2str[lock->lock_type], (ulong)lock->table);
+ if (lock && lock == lock->next_in_lo)
+ printf("!");
+ printf("\n");
+}
+#endif
+
diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h
new file mode 100644
index 00000000000..e33d1aa44e8
--- /dev/null
+++ b/storage/maria/tablockman.h
@@ -0,0 +1,87 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _tablockman_h
+#define _tablockman_h
+
+/*
+ Lock levels:
+ ^^^^^^^^^^^
+
+ N - "no lock", not a lock, used sometimes internally to simplify the code
+ S - Shared
+ X - eXclusive
+ IS - Intention Shared
+ IX - Intention eXclusive
+ SIX - Shared + Intention eXclusive
+ LS - Loose Shared
+ LX - Loose eXclusive
+ SLX - Shared + Loose eXclusive
+ LSIX - Loose Shared + Intention eXclusive
+*/
+#ifndef _lockman_h
+/* QQ: TODO remove N-locks */
+enum lockman_lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
+enum lockman_getlock_result {
+ NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
+ GOT_THE_LOCK,
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+};
+#endif
+
+#define LOCK_TYPES (LOCK_TYPE_LAST-1)
+
+typedef struct st_table_lock TABLE_LOCK;
+
+typedef struct st_table_lock_owner {
+ TABLE_LOCK *active_locks; /* list of active locks */
+ TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */
+ struct st_table_lock_owner *waiting_for; /* transaction we're waiting for */
+ pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */
+ pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
+ uint16 loid, waiting_for_loid; /* Lock Owner IDentifier */
+} TABLE_LOCK_OWNER;
+
+typedef struct st_locked_table {
+ pthread_mutex_t mutex; /* mutex for everything below */
+ HASH latest_locks; /* latest locks in a hash */
+ TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */
+ TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque (double-end queue)*/
+} LOCKED_TABLE;
+
+typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16);
+
+typedef struct {
+ pthread_mutex_t pool_mutex;
+ TABLE_LOCK *pool; /* lifo pool of free locks */
+ uint lock_timeout; /* lock timeout in milliseconds */
+ loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */
+} TABLOCKMAN;
+
+void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint);
+void tablockman_destroy(TABLOCKMAN *);
+enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *,
+ LOCKED_TABLE *, enum lockman_lock_type);
+void tablockman_release_locks(TABLOCKMAN *, TABLE_LOCK_OWNER *);
+void tablockman_init_locked_table(LOCKED_TABLE *, int);
+void tablockman_destroy_locked_table(LOCKED_TABLE *);
+
+#ifdef EXTRA_DEBUG
+void tablockman_print_tlo(TABLE_LOCK_OWNER *);
+#endif
+
+#endif
+
diff --git a/storage/maria/test_pack b/storage/maria/test_pack
new file mode 100755
index 00000000000..689645b1661
--- /dev/null
+++ b/storage/maria/test_pack
@@ -0,0 +1,10 @@
+silent="-s"
+suffix=""
+
+ma_test1$suffix -s ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -us test1 ; maria_chk$suffix -es test1
+ma_test1$suffix -s -S ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ;maria_chk$suffix -us test1 ; maria_chk$suffix -es test1
+ma_test1$suffix -s -b ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1
+ma_test1$suffix -s -w ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -ros test1 ; maria_chk$suffix -es test1
+
+ma_test2$suffix -s -t4 ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2
+ma_test2$suffix -s -t4 -b ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2
diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c
new file mode 100644
index 00000000000..147675456aa
--- /dev/null
+++ b/storage/maria/trnman.c
@@ -0,0 +1,746 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include "trnman.h"
+#include "ma_checkpoint.h"
+#include "ma_control_file.h"
+
+/*
+ status variables:
+ how many trns in the active list currently,
+ in the committed list currently, allocated since startup.
+*/
+uint trnman_active_transactions, trnman_committed_transactions,
+ trnman_allocated_transactions;
+
+/* list of active transactions in the trid order */
+static TRN active_list_min, active_list_max;
+/* list of committed transactions in the trid order */
+static TRN committed_list_min, committed_list_max;
+
+/* a counter, used to generate transaction ids */
+static TrID global_trid_generator;
+
+/* the mutex for everything above */
+static pthread_mutex_t LOCK_trn_list;
+
+/* LIFO pool of unused TRN structured for reuse */
+static TRN *pool;
+
+/* a hash for committed transactions that maps trid to a TRN structure */
+static LF_HASH trid_to_committed_trn;
+
+/* an array that maps short_trid of an active transaction to a TRN structure */
+static TRN **short_trid_to_active_trn;
+
+/* locks for short_trid_to_active_trn and pool */
+static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool;
+
+/*
+ Simple interface functions
+ QQ: if they stay so simple, should we make them inline?
+*/
+
+uint trnman_increment_locked_tables(TRN *trn)
+{
+ return trn->locked_tables++;
+}
+
+my_bool trnman_has_locked_tables(TRN *trn)
+{
+ return trn->locked_tables != 0;
+}
+
+uint trnman_decrement_locked_tables(TRN *trn)
+{
+ return --trn->locked_tables;
+}
+
+void trnman_reset_locked_tables(TRN *trn)
+{
+ trn->locked_tables= 0;
+}
+
+
+/*
+ NOTE
+ Just as short_id doubles as loid, this function doubles as
+ short_trid_to_LOCK_OWNER. See the compile-time assert below.
+*/
+
+#ifdef NOT_USED
+static TRN *short_trid_to_TRN(uint16 short_trid)
+{
+ TRN *trn;
+ compile_time_assert(offsetof(TRN, locks) == 0);
+ my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
+ trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]);
+ my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
+ return (TRN *)trn;
+}
+#endif
+
+static uchar *trn_get_hash_key(const uchar *trn, size_t *len,
+ my_bool unused __attribute__ ((unused)))
+{
+ *len= sizeof(TrID);
+ return (uchar *) & ((*((TRN **)trn))->trid);
+}
+
+
+/**
+ @brief Initializes transaction manager.
+
+ @param initial_trid Generated TrIDs will start from initial_trid+1.
+
+ @return Operation status
+ @retval 0 OK
+ @retval !=0 Error
+*/
+
+int trnman_init(TrID initial_trid)
+{
+ DBUG_ENTER("trnman_init");
+
+ short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!short_trid_to_active_trn))
+ DBUG_RETURN(1);
+ short_trid_to_active_trn--; /* min short_trid is 1 */
+
+ /*
+ Initialize lists.
+ active_list_max.min_read_from must be larger than any trid,
+ so that when an active list is empty we would could free
+ all committed list.
+ And committed_list_max itself can not be freed so
+ committed_list_max.commit_trid must not be smaller that
+ active_list_max.min_read_from
+ */
+
+ active_list_max.trid= active_list_min.trid= 0;
+ active_list_max.min_read_from= ~(ulong) 0;
+ active_list_max.next= active_list_min.prev= 0;
+ active_list_max.prev= &active_list_min;
+ active_list_min.next= &active_list_max;
+
+ committed_list_max.commit_trid= ~(ulong) 0;
+ committed_list_max.next= committed_list_min.prev= 0;
+ committed_list_max.prev= &committed_list_min;
+ committed_list_min.next= &committed_list_max;
+
+ trnman_active_transactions= 0;
+ trnman_committed_transactions= 0;
+ trnman_allocated_transactions= 0;
+
+ pool= 0;
+ global_trid_generator= initial_trid;
+ lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE,
+ 0, 0, trn_get_hash_key, 0);
+ DBUG_PRINT("info", ("pthread_mutex_init LOCK_trn_list"));
+ pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST);
+ my_atomic_rwlock_init(&LOCK_short_trid_to_trn);
+ my_atomic_rwlock_init(&LOCK_pool);
+
+#ifdef NOT_USED
+ lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000);
+#endif
+
+ DBUG_RETURN(0);
+}
+
+/*
+ NOTE
+ this could only be called in the "idle" state - no transaction can be
+ running. See asserts below.
+*/
+void trnman_destroy()
+{
+ DBUG_ENTER("trnman_destroy");
+
+ if (short_trid_to_active_trn == NULL) /* trnman already destroyed */
+ DBUG_VOID_RETURN;
+ DBUG_ASSERT(trid_to_committed_trn.count == 0);
+ DBUG_ASSERT(trnman_active_transactions == 0);
+ DBUG_ASSERT(trnman_committed_transactions == 0);
+ DBUG_ASSERT(active_list_max.prev == &active_list_min);
+ DBUG_ASSERT(active_list_min.next == &active_list_max);
+ DBUG_ASSERT(committed_list_max.prev == &committed_list_min);
+ DBUG_ASSERT(committed_list_min.next == &committed_list_max);
+ while (pool)
+ {
+ TRN *trn= pool;
+ pool= pool->next;
+ DBUG_ASSERT(trn->locks.mutex == 0);
+ DBUG_ASSERT(trn->locks.cond == 0);
+ my_free((void *)trn, MYF(0));
+ }
+ lf_hash_destroy(&trid_to_committed_trn);
+ DBUG_PRINT("info", ("pthread_mutex_destroy LOCK_trn_list"));
+ pthread_mutex_destroy(&LOCK_trn_list);
+ my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn);
+ my_atomic_rwlock_destroy(&LOCK_pool);
+ my_free((void *)(short_trid_to_active_trn+1), MYF(0));
+ short_trid_to_active_trn= NULL;
+#ifdef NOT_USED
+ lockman_destroy(&maria_lockman);
+#endif
+ DBUG_VOID_RETURN;
+}
+
+/*
+ NOTE
+ TrID is limited to 6 bytes. Initial value of the generator
+ is set by the recovery code - being read from the last checkpoint
+ (or 1 on a first run).
+*/
+static TrID new_trid()
+{
+ DBUG_ENTER("new_trid");
+ DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL);
+ DBUG_PRINT("info", ("safe_mutex_assert_owner LOCK_trn_list"));
+ safe_mutex_assert_owner(&LOCK_trn_list);
+ DBUG_RETURN(++global_trid_generator);
+}
+
+static void set_short_trid(TRN *trn)
+{
+ int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX + 1;
+ for ( ; !trn->short_id ; i= 1)
+ {
+ my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn);
+ for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */
+ {
+ void *tmp= NULL;
+ if (short_trid_to_active_trn[i] == NULL &&
+ my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
+ {
+ trn->short_id= i;
+ break;
+ }
+ }
+ my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn);
+ }
+}
+
+/*
+ DESCRIPTION
+ start a new transaction, allocate and initialize transaction object
+ mutex and cond will be used for lock waits
+*/
+
+TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond,
+ void *stack_end)
+{
+ TRN *trn;
+ DBUG_ENTER("trnman_new_trn");
+
+ /*
+ we have a mutex, to do simple things under it - allocate a TRN,
+ increment trnman_active_transactions, set trn->min_read_from.
+
+ Note that all the above is fast. generating short_trid may be slow,
+ as it involves scanning a large array - so it's done outside of the
+ mutex.
+ */
+
+ DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
+ pthread_mutex_lock(&LOCK_trn_list);
+
+ /* Allocating a new TRN structure */
+ trn= pool;
+ /*
+ Popping an unused TRN from the pool
+ (ABA isn't possible, we're behind a mutex
+ */
+ my_atomic_rwlock_wrlock(&LOCK_pool);
+ while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn,
+ (void *)trn->next))
+ /* no-op */;
+ my_atomic_rwlock_wrunlock(&LOCK_pool);
+
+ /* Nothing in the pool ? Allocate a new one */
+ if (!trn)
+ {
+ /*
+ trn should be completely initalized at create time to allow
+ one to keep a known state on it.
+ (Like redo_lns, which is assumed to be 0 at start of row handling
+ and reset to zero before end of row handling)
+ */
+ trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME | MY_ZEROFILL));
+ if (unlikely(!trn))
+ {
+ DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list"));
+ pthread_mutex_unlock(&LOCK_trn_list);
+ return 0;
+ }
+ trnman_allocated_transactions++;
+ }
+ trn->pins= lf_hash_get_pins(&trid_to_committed_trn, stack_end);
+ if (!trn->pins)
+ {
+ trnman_free_trn(trn);
+ return 0;
+ }
+
+ trnman_active_transactions++;
+
+ trn->min_read_from= active_list_min.next->trid;
+
+ trn->trid= new_trid();
+ trn->short_id= 0;
+
+ trn->next= &active_list_max;
+ trn->prev= active_list_max.prev;
+ active_list_max.prev= trn->prev->next= trn;
+ DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list"));
+ pthread_mutex_unlock(&LOCK_trn_list);
+
+ if (unlikely(!trn->min_read_from))
+ trn->min_read_from= trn->trid;
+
+ trn->commit_trid= 0;
+ trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0;
+
+ trn->locks.mutex= mutex;
+ trn->locks.cond= cond;
+ trn->locks.waiting_for= 0;
+ trn->locks.all_locks= 0;
+#ifdef NOT_USED
+ trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc);
+#endif
+
+ trn->locked_tables= 0;
+
+ /*
+ only after the following function TRN is considered initialized,
+ so it must be done the last
+ */
+ set_short_trid(trn);
+
+ DBUG_RETURN(trn);
+}
+
+/*
+ remove a trn from the active list.
+ if necessary - move to committed list and set commit_trid
+
+ NOTE
+ Locks are released at the end. In particular, after placing the
+ transaction in commit list, and after setting commit_trid. It's
+ important, as commit_trid affects visibility. Locks don't affect
+ anything they simply delay execution of other threads - they could be
+ released arbitrarily late. In other words, when locks are released it
+ serves as a start banner for other threads, they start to run. So
+ everything they may need must be ready at that point.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+int trnman_end_trn(TRN *trn, my_bool commit)
+{
+ int res= 1;
+ TRN *free_me= 0;
+ LF_PINS *pins= trn->pins;
+ DBUG_ENTER("trnman_end_trn");
+
+ DBUG_ASSERT(trn->rec_lsn == 0);
+ /* if a rollback, all UNDO records should have been executed */
+ DBUG_ASSERT(commit || trn->undo_lsn == 0);
+ DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
+ pthread_mutex_lock(&LOCK_trn_list);
+
+ /* remove from active list */
+ trn->next->prev= trn->prev;
+ trn->prev->next= trn->next;
+
+ /*
+ if trn was the oldest active transaction, now that it goes away there
+ may be committed transactions in the list which no active transaction
+ needs to bother about - clean up the committed list
+ */
+ if (trn->prev == &active_list_min)
+ {
+ uint free_me_count;
+ TRN *t;
+ for (t= committed_list_min.next, free_me_count= 0;
+ t->commit_trid < active_list_min.next->min_read_from;
+ t= t->next, free_me_count++) /* no-op */;
+
+ DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) ||
+ (t == committed_list_min.next && free_me_count == 0));
+ /* found transactions committed before the oldest active one */
+ if (t != committed_list_min.next)
+ {
+ free_me= committed_list_min.next;
+ committed_list_min.next= t;
+ t->prev->next= 0;
+ t->prev= &committed_list_min;
+ trnman_committed_transactions-= free_me_count;
+ }
+ }
+
+ /*
+ if transaction is committed and it was not the only active transaction -
+ add it to the committed list (which is used for read-from relation)
+ */
+ if (commit && active_list_min.next != &active_list_max)
+ {
+ trn->commit_trid= global_trid_generator;
+ trn->next= &committed_list_max;
+ trn->prev= committed_list_max.prev;
+ trnman_committed_transactions++;
+
+ res= lf_hash_insert(&trid_to_committed_trn, pins, &trn);
+ /*
+ By going on with life is res<0, we let other threads block on
+ our rows (because they will never see us committed in
+ trid_to_committed_trn) until they timeout. Though correct, this is not a
+ good situation:
+ - if connection reconnects and wants to check if its rows have been
+ committed, it will not be able to do that (it will just lock on them) so
+ connection stays permanently in doubt
+ - internal structures trid_to_committed_trn and committed_list are
+ desynchronized.
+ So we should take Maria down immediately, the two problems being
+ automatically solved at restart.
+ */
+ DBUG_ASSERT(res <= 0);
+ }
+ if (res)
+ {
+ /*
+ res == 1 means the condition in the if() above
+ was false.
+ res == -1 means lf_hash_insert failed
+ */
+ trn->next= free_me;
+ free_me= trn;
+ }
+ else
+ {
+ committed_list_max.prev= trn->prev->next= trn;
+ }
+ trnman_active_transactions--;
+ DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list"));
+ pthread_mutex_unlock(&LOCK_trn_list);
+
+ /* the rest is done outside of a critical section */
+#ifdef NOT_USED
+ lockman_release_locks(&maria_lockman, &trn->locks);
+#endif
+ trn->locks.mutex= 0;
+ trn->locks.cond= 0;
+ my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
+ my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0);
+ my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
+
+ /*
+ we, under the mutex, removed going-in-free_me transactions from the
+ active and committed lists, thus nobody else may see them when it scans
+ those lists, and thus nobody may want to free them. Now we don't
+ need a mutex to access free_me list
+ */
+ /* QQ: send them to the purge thread */
+ while (free_me)
+ {
+ TRN *t= free_me;
+ free_me= free_me->next;
+
+ /*
+ ignore OOM here. it's harmless, and there's nothing we could do, anyway
+ */
+ (void)lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID));
+
+ trnman_free_trn(t);
+ }
+
+ lf_hash_put_pins(pins);
+#ifdef NOT_USED
+ lf_pinbox_put_pins(trn->locks.pins);
+#endif
+
+ DBUG_RETURN(res < 0);
+}
+
+/*
+ free a trn (add to the pool, that is)
+ note - we can never really free() a TRN if there's at least one other
+ running transaction - see, e.g., how lock waits are implemented in
+ lockman.c
+ The same is true for other lock-free data structures too. We may need some
+ kind of FLUSH command to reset them all - ensuring that no transactions are
+ running. It may even be called automatically on checkpoints if no
+ transactions are running.
+*/
+void trnman_free_trn(TRN *trn)
+{
+ TRN *tmp= pool;
+
+ my_atomic_rwlock_wrlock(&LOCK_pool);
+ do
+ {
+ /*
+ without this volatile cast gcc-3.4.4 moved the assignment
+ down after the loop at -O2
+ */
+ *(TRN * volatile *)&(trn->next)= tmp;
+ } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trn));
+ my_atomic_rwlock_wrunlock(&LOCK_pool);
+}
+
+/*
+ NOTE
+ here we access the hash in a lock-free manner.
+ It's safe, a 'found' TRN can never be freed/reused before we access it.
+ In fact, it cannot be freed before 'trn' ends, because a 'found' TRN
+ can only be removed from the hash when:
+ found->commit_trid < ALL (trn->min_read_from)
+ that is, at least
+ found->commit_trid < trn->min_read_from
+ but
+ found->trid >= trn->min_read_from
+ and
+ found->commit_trid > found->trid
+
+ RETURN
+ 1 can
+ 0 cannot
+ -1 error (OOM)
+*/
+int trnman_can_read_from(TRN *trn, TrID trid)
+{
+ TRN **found;
+ my_bool can;
+ LF_REQUIRE_PINS(3);
+
+ if (trid < trn->min_read_from)
+ return 1; /* can read */
+ if (trid > trn->trid)
+ return 0; /* cannot read */
+
+ found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid, sizeof(trid));
+ if (found == NULL)
+ return 0; /* not in the hash of committed transactions = cannot read */
+ if (found == MY_ERRPTR)
+ return -1;
+
+ can= (*found)->commit_trid < trn->trid;
+ lf_hash_search_unpin(trn->pins);
+ return can;
+}
+
+/* TODO: the stubs below are waiting for savepoints to be implemented */
+
+void trnman_new_statement(TRN *trn __attribute__ ((unused)))
+{
+}
+
+void trnman_rollback_statement(TRN *trn __attribute__ ((unused)))
+{
+}
+
+
+/**
+ @brief Allocates buffers and stores in them some info about transactions
+
+ Does the allocation because the caller cannot know the size itself.
+ Memory freeing is to be done by the caller (if the "str" member of the
+ LEX_STRING is not NULL).
+ The caller has the intention of doing checkpoints.
+
+ @param[out] str_act pointer to where the allocated buffer,
+ and its size, will be put; buffer will be filled
+ with info about active transactions
+ @param[out] str_com pointer to where the allocated buffer,
+ and its size, will be put; buffer will be filled
+ with info about committed transactions
+ @param[out] min_first_undo_lsn pointer to where the minimum
+ first_undo_lsn of all transactions will be put
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
+ LSN *min_rec_lsn, LSN *min_first_undo_lsn)
+{
+ my_bool error;
+ TRN *trn;
+ char *ptr;
+ uint stored_transactions= 0;
+ LSN minimum_rec_lsn= LSN_MAX, minimum_first_undo_lsn= LSN_MAX;
+ DBUG_ENTER("trnman_collect_transactions");
+
+ DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str));
+
+ /* validate the use of read_non_atomic() in general: */
+ compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8));
+ pthread_mutex_lock(&LOCK_trn_list);
+ str_act->length= 2 + /* number of active transactions */
+ LSN_STORE_SIZE + /* minimum of their rec_lsn */
+ TRANSID_SIZE + /* current TrID generator value */
+ (2 + /* short id */
+ 6 + /* long id */
+ LSN_STORE_SIZE + /* undo_lsn */
+#ifdef MARIA_VERSIONING /* not enabled yet */
+ LSN_STORE_SIZE + /* undo_purge_lsn */
+#endif
+ LSN_STORE_SIZE /* first_undo_lsn */
+ ) * trnman_active_transactions;
+ str_com->length= 4 + /* number of committed transactions */
+ (6 + /* long id */
+#ifdef MARIA_VERSIONING /* not enabled yet */
+ LSN_STORE_SIZE + /* undo_purge_lsn */
+#endif
+ LSN_STORE_SIZE /* first_undo_lsn */
+ ) * trnman_committed_transactions;
+ if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) ||
+ (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME)))))
+ goto err;
+ /* First, the active transactions */
+ ptr= str_act->str + 2 + LSN_STORE_SIZE;
+ transid_store(ptr, global_trid_generator);
+ ptr+= TRANSID_SIZE;
+ for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
+ {
+ /*
+ trns with a short trid of 0 are not even initialized, we can ignore
+ them. trns with undo_lsn==0 have done no writes, we can ignore them
+ too. XID not needed now.
+ */
+ uint sid;
+ LSN rec_lsn, undo_lsn, first_undo_lsn;
+ if ((sid= trn->short_id) == 0)
+ {
+ /*
+ Not even inited, has done nothing. Or it is the
+ dummy_transaction_object, which does only non-transactional
+ immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so
+ can be forgotten for Checkpoint.
+ */
+ continue;
+ }
+ /* needed for low-water mark calculation */
+ if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) &&
+ (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0))
+ minimum_rec_lsn= rec_lsn;
+ /*
+ trn may have logged REDOs but not yet UNDO, that's why we read rec_lsn
+ before deciding to ignore if undo_lsn==0.
+ */
+ if ((undo_lsn= trn->undo_lsn) == 0) /* trn can be forgotten */
+ continue;
+ stored_transactions++;
+ int2store(ptr, sid);
+ ptr+= 2;
+ int6store(ptr, trn->trid);
+ ptr+= 6;
+ lsn_store(ptr, undo_lsn); /* needed for rollback */
+ ptr+= LSN_STORE_SIZE;
+ /* needed for low-water mark calculation */
+ if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) &&
+ (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0))
+ minimum_first_undo_lsn= first_undo_lsn;
+ lsn_store(ptr, first_undo_lsn);
+ ptr+= LSN_STORE_SIZE;
+#ifdef MARIA_VERSIONING /* not enabled yet */
+ /* to know where purging should start (last delete of this trn) */
+ lsn_store(ptr, trn->undo_purge_lsn);
+ ptr+= LSN_STORE_SIZE;
+#endif
+ /**
+ @todo RECOVERY: add a comment explaining why we can dirtily read some
+ vars, inspired by the text of "assumption 8" in WL#3072
+ */
+ }
+ str_act->length= ptr - str_act->str; /* as we maybe over-estimated */
+ ptr= str_act->str;
+ DBUG_PRINT("info",("collected %u active transactions",
+ (uint)stored_transactions));
+ int2store(ptr, stored_transactions);
+ ptr+= 2;
+ /* this LSN influences how REDOs for any page can be ignored by Recovery */
+ lsn_store(ptr, minimum_rec_lsn);
+ /* one day there will also be a list of prepared transactions */
+ /* do the same for committed ones */
+ ptr= str_com->str;
+ int4store(ptr, trnman_committed_transactions);
+ ptr+= 4;
+ DBUG_PRINT("info",("collected %u committed transactions",
+ (uint)trnman_committed_transactions));
+ for (trn= committed_list_min.next; trn != &committed_list_max;
+ trn= trn->next)
+ {
+ LSN first_undo_lsn;
+ int6store(ptr, trn->trid);
+ ptr+= 6;
+#ifdef MARIA_VERSIONING /* not enabled yet */
+ lsn_store(ptr, trn->undo_purge_lsn);
+ ptr+= LSN_STORE_SIZE;
+#endif
+ first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn);
+ if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)
+ minimum_first_undo_lsn= first_undo_lsn;
+ lsn_store(ptr, first_undo_lsn);
+ ptr+= LSN_STORE_SIZE;
+ }
+ /*
+ TODO: if we see there exists no transaction (active and committed) we can
+ tell the lock-free structures to do some freeing (my_free()).
+ */
+ error= 0;
+ *min_rec_lsn= minimum_rec_lsn;
+ *min_first_undo_lsn= minimum_first_undo_lsn;
+ goto end;
+err:
+ error= 1;
+end:
+ pthread_mutex_unlock(&LOCK_trn_list);
+ DBUG_RETURN(error);
+}
+
+
+TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid)
+{
+ TrID old_trid_generator= global_trid_generator;
+ TRN *trn;
+ DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
+ if (unlikely((trn= trnman_new_trn(NULL, NULL, NULL)) == NULL))
+ return NULL;
+ /* deallocate excessive allocations of trnman_new_trn() */
+ global_trid_generator= old_trid_generator;
+ set_if_bigger(global_trid_generator, longid);
+ short_trid_to_active_trn[trn->short_id]= 0;
+ DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL);
+ short_trid_to_active_trn[shortid]= trn;
+ trn->trid= longid;
+ trn->short_id= shortid;
+ return trn;
+}
+
+
+TRN *trnman_get_any_trn()
+{
+ TRN *trn= active_list_min.next;
+ return (trn != &active_list_max) ? trn : NULL;
+}
diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h
new file mode 100644
index 00000000000..fce02d9ab89
--- /dev/null
+++ b/storage/maria/trnman.h
@@ -0,0 +1,59 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _trnman_h
+#define _trnman_h
+
+C_MODE_START
+
+#include <lf.h>
+#include "lockman.h"
+#include "trnman_public.h"
+#include "ma_loghandler_lsn.h"
+
+/*
+ trid - 6 uchar transaction identifier. Assigned when a transaction
+ is created. Transaction can always be identified by its trid,
+ even after transaction has ended.
+
+ short_trid - 2-byte transaction identifier, identifies a running
+ transaction, is reassigned when transaction ends.
+*/
+
+/*
+ short transaction id is at the same time its identifier
+ for a lock manager - its lock owner identifier (loid)
+*/
+
+#define short_id locks.loid
+
+struct st_transaction
+{
+ LOCK_OWNER locks; /* must be the first! see short_trid_to_TRN() */
+ LF_PINS *pins;
+ TrID trid, min_read_from, commit_trid;
+ TRN *next, *prev;
+ LSN rec_lsn, undo_lsn;
+ LSN_WITH_FLAGS first_undo_lsn;
+ uint locked_tables;
+ /* Note! if locks.loid is 0, trn is NOT initialized */
+};
+
+#define TRANSACTION_LOGGED_LONG_ID ULL(0x8000000000000000)
+
+C_MODE_END
+
+#endif
+
diff --git a/storage/maria/trnman_public.h b/storage/maria/trnman_public.h
new file mode 100644
index 00000000000..b47bb18e662
--- /dev/null
+++ b/storage/maria/trnman_public.h
@@ -0,0 +1,62 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+/*
+ External definitions for trnman.h
+ We need to split this into two files as gcc 4.1.2 gives error if it tries
+ to include my_atomic.h in C++ code.
+*/
+
+#ifndef _trnman_public_h
+#define _trnman_public_h
+
+#include "ma_loghandler_lsn.h"
+
+C_MODE_START
+typedef uint64 TrID; /* our TrID is 6 bytes */
+typedef struct st_transaction TRN;
+
+#define SHORT_TRID_MAX 65535
+
+extern uint trnman_active_transactions, trnman_allocated_transactions;
+extern TRN dummy_transaction_object;
+
+int trnman_init(TrID);
+void trnman_destroy(void);
+TRN *trnman_new_trn(pthread_mutex_t *, pthread_cond_t *, void *);
+int trnman_end_trn(TRN *trn, my_bool commit);
+#define trnman_commit_trn(T) trnman_end_trn(T, TRUE)
+#define trnman_abort_trn(T) trnman_end_trn(T, FALSE)
+#define trnman_rollback_trn(T) trnman_end_trn(T, FALSE)
+void trnman_free_trn(TRN *trn);
+int trnman_can_read_from(TRN *trn, TrID trid);
+void trnman_new_statement(TRN *trn);
+void trnman_rollback_statement(TRN *trn);
+my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
+ LSN *min_rec_lsn,
+ LSN *min_first_undo_lsn);
+
+uint trnman_increment_locked_tables(TRN *trn);
+uint trnman_decrement_locked_tables(TRN *trn);
+my_bool trnman_has_locked_tables(TRN *trn);
+void trnman_reset_locked_tables(TRN *trn);
+TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid);
+TRN *trnman_get_any_trn();
+#define TRANSID_SIZE 6
+#define transid_store(dst, id) int6store(dst,id)
+#define transid_korr(P) uint6korr(P)
+C_MODE_END
+#endif
diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am
new file mode 100644
index 00000000000..41be27bd014
--- /dev/null
+++ b/storage/maria/unittest/Makefile.am
@@ -0,0 +1,102 @@
+# Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+
+# Only reason to link with libmyisam.a here is that it's where some fulltext
+# pieces are (but soon we'll remove fulltext dependencies from Maria).
+LDADD= $(top_builddir)/unittest/mytap/libmytap.a \
+ $(top_builddir)/storage/maria/libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t \
+ ma_pagecache_single_1k-t ma_pagecache_single_8k-t \
+ ma_pagecache_single_64k-t-big \
+ ma_pagecache_consist_1k-t-big \
+ ma_pagecache_consist_64k-t-big \
+ ma_pagecache_consist_1kHC-t-big \
+ ma_pagecache_consist_64kHC-t-big \
+ ma_pagecache_consist_1kRD-t-big \
+ ma_pagecache_consist_64kRD-t-big \
+ ma_pagecache_consist_1kWR-t-big \
+ ma_pagecache_consist_64kWR-t-big \
+ ma_test_loghandler-t \
+ ma_test_loghandler_multigroup-t \
+ ma_test_loghandler_multithread-t \
+ ma_test_loghandler_pagecache-t \
+ ma_test_loghandler_long-t-big \
+ ma_test_loghandler_noflush-t \
+ ma_test_loghandler_first_lsn-t \
+ ma_test_loghandler_max_lsn-t \
+ ma_test_loghandler_purge-t \
+ ma_test_loghandler_readonly-t\
+ ma_test_loghandler_nologs-t
+
+ma_test_loghandler_t_SOURCES = ma_test_loghandler-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_multigroup_t_SOURCES = ma_test_loghandler_multigroup-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_multithread_t_SOURCES = ma_test_loghandler_multithread-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_pagecache_t_SOURCES = ma_test_loghandler_pagecache-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_long_t_big_SOURCES = ma_test_loghandler-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_long_t_big_CPPFLAGS = -DLONG_LOG_TEST
+ma_test_loghandler_noflush_t_SOURCES = ma_test_loghandler_noflush-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_first_lsn_t_SOURCES = ma_test_loghandler_first_lsn-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_max_lsn_t_SOURCES = ma_test_loghandler_max_lsn-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_purge_t_SOURCES = ma_test_loghandler_purge-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_readonly_t_SOURCES = ma_test_loghandler_multigroup-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+ma_test_loghandler_readonly_t_CPPFLAGS = -DREADONLY_TEST
+ma_test_loghandler_nologs_t_SOURCES = ma_test_loghandler_nologs-t.c ma_maria_log_cleanup.c ma_loghandler_examples.c
+
+ma_pagecache_single_src = ma_pagecache_single.c test_file.c test_file.h
+ma_pagecache_consist_src = ma_pagecache_consist.c test_file.c test_file.h
+ma_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN
+
+ma_pagecache_single_1k_t_SOURCES = $(ma_pagecache_single_src)
+ma_pagecache_single_8k_t_SOURCES = $(ma_pagecache_single_src)
+ma_pagecache_single_64k_t_big_SOURCES = $(ma_pagecache_single_src)
+ma_pagecache_single_1k_t_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024
+ma_pagecache_single_8k_t_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=8192
+ma_pagecache_single_64k_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536
+
+ma_pagecache_consist_1k_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_1k_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024
+ma_pagecache_consist_64k_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_64k_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536
+
+ma_pagecache_consist_1kHC_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_1kHC_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY
+ma_pagecache_consist_64kHC_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_64kHC_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY
+
+ma_pagecache_consist_1kRD_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_1kRD_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS
+ma_pagecache_consist_64kRD_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_64kRD_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS
+
+ma_pagecache_consist_1kWR_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_1kWR_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS
+ma_pagecache_consist_64kWR_t_big_SOURCES = $(ma_pagecache_consist_src)
+ma_pagecache_consist_64kWR_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS
+
+# the generic lock manager may not be used in the end and lockman1-t crashes,
+# so we don't build lockman-t and lockman1-t
+CLEANFILES = maria_log_control page_cache_test_file_1 \
+ maria_log.????????
+
diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c
new file mode 100644
index 00000000000..b36cc2926c5
--- /dev/null
+++ b/storage/maria/unittest/lockman-t.c
@@ -0,0 +1,309 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ lockman for row and table locks
+*/
+
+/* #define EXTRA_VERBOSE */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include "../lockman.h"
+
+#define Nlos 100
+LOCK_OWNER loarray[Nlos];
+pthread_mutex_t mutexes[Nlos];
+pthread_cond_t conds[Nlos];
+LOCKMAN lockman;
+
+#ifndef EXTRA_VERBOSE
+#define print_lockhash(X) /* no-op */
+#define DIAG(X) /* no-op */
+#else
+#define DIAG(X) diag X
+#endif
+
+LOCK_OWNER *loid2lo(uint16 loid)
+{
+ return loarray+loid-1;
+}
+
+#define unlock_all(O) diag("lo" #O "> release all locks"); \
+ lockman_release_locks(&lockman, loid2lo(O));print_lockhash(&lockman)
+#define test_lock(O, R, L, S, RES) \
+ ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \
+ "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \
+ print_lockhash(&lockman)
+#define lock_ok_a(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK)
+#define lock_ok_i(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE)
+#define lock_ok_l(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
+#define lock_conflict(O, R, L) \
+ test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK);
+
+void test_lockman_simple()
+{
+ /* simple */
+ lock_ok_a(1, 1, S);
+ lock_ok_i(2, 2, IS);
+ lock_ok_i(1, 2, IX);
+ /* lock escalation */
+ lock_ok_a(1, 1, X);
+ lock_ok_i(2, 2, IX);
+ /* failures */
+ lock_conflict(2, 1, X);
+ unlock_all(2);
+ lock_ok_a(1, 2, S);
+ lock_ok_a(1, 2, IS);
+ lock_ok_a(1, 2, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_a(2, 3, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_l(2, 3, IS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_conflict(2, 1, S);
+ lock_ok_a(1, 1, LS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_a(2, 1, LS);
+ lock_ok_a(1, 1, LS);
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_i(1, 4, IS);
+ lock_ok_i(2, 4, IS);
+ lock_ok_i(3, 4, IS);
+ lock_ok_a(3, 4, LS);
+ lock_ok_i(4, 4, IS);
+ lock_conflict(4, 4, IX);
+ lock_conflict(2, 4, IX);
+ lock_ok_a(1, 4, LS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+ unlock_all(4);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(2, 1, IX);
+ lock_conflict(1, 1, S);
+ lock_conflict(2, 1, X);
+ unlock_all(1);
+ unlock_all(2);
+}
+
+int rt_num_threads;
+int litmus;
+int thread_number= 0, timeouts= 0;
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ thread_number= timeouts= 0;
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Running %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, 0, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+pthread_mutex_t rt_mutex;
+int Nrows= 100;
+int Ntables= 10;
+int table_lock_ratio= 10;
+enum lockman_lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
+char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
+char *res2str[4]= {
+ "DIDN'T GET THE LOCK",
+ "GOT THE LOCK",
+ "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
+ "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
+pthread_handler_t test_lockman(void *arg)
+{
+ int m= (*(int *)arg);
+ uint x, loid, row, table, res, locklevel, timeout= 0;
+ LOCK_OWNER *lo;
+
+ pthread_mutex_lock(&rt_mutex);
+ loid= ++thread_number;
+ pthread_mutex_unlock(&rt_mutex);
+ lo= loid2lo(loid);
+
+ for (x= ((int)(intptr)(&m)); m > 0; m--)
+ {
+ x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */
+ row= x % Nrows + Ntables;
+ table= row % Ntables;
+ locklevel= (x/Nrows) & 3;
+ if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0)
+ { /* table lock */
+ res= lockman_getlock(&lockman, lo, table, lock_array[locklevel]);
+ DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
+ lock2str[locklevel], res2str[res]));
+ if (res == DIDNT_GET_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ }
+ else
+ { /* row lock */
+ locklevel&= 1;
+ res= lockman_getlock(&lockman, lo, table, lock_array[locklevel + 4]);
+ DIAG(("loid %2d, row %d, lock %s, res %s", loid, row,
+ lock2str[locklevel+4], res2str[res]));
+ switch (res)
+ {
+ case DIDNT_GET_THE_LOCK:
+ lockman_release_locks(&lockman, lo);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ case GOT_THE_LOCK:
+ continue;
+ case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
+ /* not implemented, so take a regular lock */
+ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE:
+ res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]);
+ DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
+ lock2str[locklevel], res2str[res]));
+ if (res == DIDNT_GET_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ continue;
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
+ }
+
+ lockman_release_locks(&lockman, lo);
+
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ timeouts+= timeout;
+ if (!rt_num_threads)
+ diag("number of timeouts: %d", timeouts);
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+
+int main()
+{
+ int i;
+
+ my_init();
+ pthread_mutex_init(&rt_mutex, 0);
+
+ plan(35);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+
+ lockman_init(&lockman, &loid2lo, 50);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ loarray[i].pins= lf_alloc_get_pins(&lockman.alloc);
+ loarray[i].all_locks= 0;
+ loarray[i].waiting_for= 0;
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init (&conds[i], 0);
+ loarray[i].mutex= &mutexes[i];
+ loarray[i].cond= &conds[i];
+ loarray[i].loid= i+1;
+ }
+
+ test_lockman_simple();
+
+#define CYCLES 10000
+#define THREADS Nlos /* don't change this line */
+
+ /* mixed load, stress-test with random locks */
+ Nrows= 100;
+ Ntables= 10;
+ table_lock_ratio= 10;
+ run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
+
+ /* "real-life" simulation - many rows, no table locks */
+ Nrows= 1000000;
+ Ntables= 10;
+ table_lock_ratio= 0;
+ run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ lockman_release_locks(&lockman, &loarray[i]);
+ pthread_mutex_destroy(loarray[i].mutex);
+ pthread_cond_destroy(loarray[i].cond);
+ lf_pinbox_put_pins(loarray[i].pins);
+ }
+
+ {
+ ulonglong now= my_getsystime();
+ lockman_destroy(&lockman);
+ now= my_getsystime()-now;
+ diag("lockman_destroy: %g secs", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c
new file mode 100644
index 00000000000..7f6196af9ff
--- /dev/null
+++ b/storage/maria/unittest/lockman1-t.c
@@ -0,0 +1,335 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ lockman for row locks, tablockman for table locks
+*/
+
+/* #define EXTRA_VERBOSE */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include "../lockman.h"
+#include "../tablockman.h"
+
+#define Nlos 100
+#define Ntbls 10
+LOCK_OWNER loarray[Nlos];
+TABLE_LOCK_OWNER loarray1[Nlos];
+pthread_mutex_t mutexes[Nlos];
+pthread_cond_t conds[Nlos];
+LOCKED_TABLE ltarray[Ntbls];
+LOCKMAN lockman;
+TABLOCKMAN tablockman;
+
+#ifndef EXTRA_VERBOSE
+#define print_lo1(X) /* no-op */
+#define DIAG(X) /* no-op */
+#else
+#define DIAG(X) diag X
+#endif
+
+LOCK_OWNER *loid2lo(uint16 loid)
+{
+ return loarray+loid-1;
+}
+TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
+{
+ return loarray1+loid-1;
+}
+
+#define unlock_all(O) diag("lo" #O "> release all locks"); \
+ tablockman_release_locks(&tablockman, loid2lo1(O));
+#define test_lock(O, R, L, S, RES) \
+ ok(tablockman_getlock(&tablockman, loid2lo1(O), &ltarray[R], L) == RES, \
+ "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \
+ print_lo1(loid2lo1(O));
+#define lock_ok_a(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK)
+#define lock_ok_i(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE)
+#define lock_ok_l(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
+#define lock_conflict(O, R, L) \
+ test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
+
+void test_tablockman_simple()
+{
+ /* simple */
+ lock_ok_a(1, 1, S);
+ lock_ok_i(2, 2, IS);
+ lock_ok_i(1, 2, IX);
+ /* lock escalation */
+ lock_ok_a(1, 1, X);
+ lock_ok_i(2, 2, IX);
+ /* failures */
+ lock_conflict(2, 1, X);
+ unlock_all(2);
+ lock_ok_a(1, 2, S);
+ lock_ok_a(1, 2, IS);
+ lock_ok_a(1, 2, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_a(2, 3, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_l(2, 3, IS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_conflict(2, 1, S);
+ lock_ok_a(1, 1, LS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_a(2, 1, LS);
+ lock_ok_a(1, 1, LS);
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_i(1, 4, IS);
+ lock_ok_i(2, 4, IS);
+ lock_ok_i(3, 4, IS);
+ lock_ok_a(3, 4, LS);
+ lock_ok_i(4, 4, IS);
+ lock_conflict(4, 4, IX);
+ lock_conflict(2, 4, IX);
+ lock_ok_a(1, 4, LS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+ unlock_all(4);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(2, 1, IX);
+ lock_conflict(1, 1, S);
+ lock_conflict(2, 1, X);
+ unlock_all(1);
+ unlock_all(2);
+}
+
+int rt_num_threads;
+int litmus;
+int thread_number= 0, timeouts= 0;
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ thread_number= timeouts= 0;
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Running %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, 0, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+pthread_mutex_t rt_mutex;
+int Nrows= 100;
+int Ntables= 10;
+int table_lock_ratio= 10;
+enum lockman_lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
+char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
+char *res2str[]= {
+ "DIDN'T GET THE LOCK",
+ "OUT OF MEMORY",
+ "DEADLOCK",
+ "LOCK TIMEOUT",
+ "GOT THE LOCK",
+ "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
+ "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
+pthread_handler_t test_lockman(void *arg)
+{
+ int m= (*(int *)arg);
+ uint x, loid, row, table, res, locklevel, timeout= 0;
+ LOCK_OWNER *lo; TABLE_LOCK_OWNER *lo1; DBUG_ASSERT(Ntables <= Ntbls);
+
+ pthread_mutex_lock(&rt_mutex);
+ loid= ++thread_number;
+ pthread_mutex_unlock(&rt_mutex);
+ lo= loid2lo(loid); lo1= loid2lo1(loid);
+
+ for (x= ((int)(intptr)(&m)); m > 0; m--)
+ {
+ x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */
+ row= x % Nrows + Ntables;
+ table= row % Ntables;
+ locklevel= (x/Nrows) & 3;
+ if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0)
+ { /* table lock */
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]);
+ DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
+ lock2str[locklevel], res2str[res]));
+ if (res < GOT_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ }
+ else
+ { /* row lock */
+ locklevel&= 1;
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]);
+ DIAG(("loid %2d, row %d, lock %s, res %s", loid, row,
+ lock2str[locklevel+4], res2str[res]));
+ switch (res)
+ {
+ case GOT_THE_LOCK:
+ continue;
+ case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
+ /* not implemented, so take a regular lock */
+ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE:
+ res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]);
+ DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
+ lock2str[locklevel], res2str[res]));
+ if (res == DIDNT_GET_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo);
+ tablockman_release_locks(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ continue;
+ default:
+ lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ }
+ }
+
+ lockman_release_locks(&lockman, lo);
+ tablockman_release_locks(&tablockman, lo1);
+
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ timeouts+= timeout;
+ if (!rt_num_threads)
+ diag("number of timeouts: %d", timeouts);
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+
+int main()
+{
+ int i;
+
+ my_init();
+ pthread_mutex_init(&rt_mutex, 0);
+
+ plan(35);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+
+ lockman_init(&lockman, &loid2lo, 50);
+ tablockman_init(&tablockman, &loid2lo1, 50);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init (&conds[i], 0);
+
+ loarray[i].pins= lf_alloc_get_pins(&lockman.alloc);
+ loarray[i].all_locks= 0;
+ loarray[i].waiting_for= 0;
+ loarray[i].mutex= &mutexes[i];
+ loarray[i].cond= &conds[i];
+ loarray[i].loid= i+1;
+
+ loarray1[i].active_locks= 0;
+ loarray1[i].waiting_lock= 0;
+ loarray1[i].waiting_for= 0;
+ loarray1[i].mutex= &mutexes[i];
+ loarray1[i].cond= &conds[i];
+ loarray1[i].loid= i+1;
+ }
+
+ for (i= 0; i < Ntbls; i++)
+ {
+ tablockman_init_locked_table(ltarray+i, Nlos);
+ }
+
+ test_tablockman_simple();
+
+#define CYCLES 10000
+#define THREADS Nlos /* don't change this line */
+
+ /* mixed load, stress-test with random locks */
+ Nrows= 100;
+ Ntables= 10;
+ table_lock_ratio= 10;
+ run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
+
+ /* "real-life" simulation - many rows, no table locks */
+ Nrows= 1000000;
+ Ntables= 10;
+ table_lock_ratio= 0;
+ run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ lockman_release_locks(&lockman, &loarray[i]);
+ pthread_mutex_destroy(loarray[i].mutex);
+ pthread_cond_destroy(loarray[i].cond);
+ lf_pinbox_put_pins(loarray[i].pins);
+ }
+
+ {
+ ulonglong now= my_getsystime();
+ lockman_destroy(&lockman);
+ now= my_getsystime()-now;
+ diag("lockman_destroy: %g secs", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c
new file mode 100644
index 00000000000..584c63b4537
--- /dev/null
+++ b/storage/maria/unittest/lockman2-t.c
@@ -0,0 +1,362 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ tablockman for row and table locks
+*/
+
+/* #define EXTRA_VERBOSE */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include "../tablockman.h"
+
+#define Nlos 100
+#define Ntbls 110
+TABLE_LOCK_OWNER loarray1[Nlos];
+pthread_mutex_t mutexes[Nlos];
+pthread_cond_t conds[Nlos];
+LOCKED_TABLE ltarray[Ntbls];
+TABLOCKMAN tablockman;
+
+#ifndef EXTRA_VERBOSE
+#define print_lo1(X) /* no-op */
+#define DIAG(X) /* no-op */
+#else
+#define DIAG(X) diag X
+#endif
+
+TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
+{
+ return loarray1+loid-1;
+}
+
+#define unlock_all(O) diag("lo" #O "> release all locks"); \
+ tablockman_release_locks(&tablockman, loid2lo1(O));
+#define test_lock(O, R, L, S, RES) \
+ ok(tablockman_getlock(&tablockman, loid2lo1(O), &ltarray[R], L) == RES, \
+ "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \
+ print_lo1(loid2lo1(O));
+#define lock_ok_a(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK)
+#define lock_ok_i(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE)
+#define lock_ok_l(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
+#define lock_conflict(O, R, L) \
+ test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
+
+void test_tablockman_simple()
+{
+ /* simple */
+ lock_ok_a(1, 1, S);
+ lock_ok_i(2, 2, IS);
+ lock_ok_i(1, 2, IX);
+ /* lock escalation */
+ lock_ok_a(1, 1, X);
+ lock_ok_i(2, 2, IX);
+ /* failures */
+ lock_conflict(2, 1, X);
+ unlock_all(2);
+ lock_ok_a(1, 2, S);
+ lock_ok_a(1, 2, IS);
+ lock_ok_a(1, 2, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_a(2, 3, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_l(2, 3, IS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_conflict(2, 1, S);
+ lock_ok_a(1, 1, LS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_a(2, 1, LS);
+ lock_ok_a(1, 1, LS);
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_i(1, 4, IS);
+ lock_ok_i(2, 4, IS);
+ lock_ok_i(3, 4, IS);
+ lock_ok_a(3, 4, LS);
+ lock_ok_i(4, 4, IS);
+ lock_conflict(4, 4, IX);
+ lock_conflict(2, 4, IX);
+ lock_ok_a(1, 4, LS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+ unlock_all(4);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(2, 1, IX);
+ lock_conflict(1, 1, S);
+ lock_conflict(2, 1, X);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IS);
+ lock_conflict(2, 1, X);
+ lock_conflict(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_a(1, 1, S);
+ lock_conflict(2, 1, IX);
+ lock_conflict(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+}
+
+int rt_num_threads;
+int litmus;
+int thread_number= 0, timeouts= 0;
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ thread_number= timeouts= 0;
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Running %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, 0, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
+{
+#ifdef NOT_USED_YET
+ TABLE_LOCK_OWNER backup= *lo;
+#endif
+
+ tablockman_release_locks(lm, lo);
+#ifdef NOT_USED_YET
+ pthread_mutex_destroy(lo->mutex);
+ pthread_cond_destroy(lo->cond);
+ bzero(lo, sizeof(*lo));
+
+ lo->mutex= backup.mutex;
+ lo->cond= backup.cond;
+ lo->loid= backup.loid;
+ pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(lo->cond, 0);
+#endif
+}
+
+pthread_mutex_t rt_mutex;
+int Nrows= 100;
+int Ntables= 10;
+int table_lock_ratio= 10;
+enum lockman_lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
+const char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
+const char *res2str[]= {
+ 0,
+ "OUT OF MEMORY",
+ "DEADLOCK",
+ "LOCK TIMEOUT",
+ "GOT THE LOCK",
+ "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
+ "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
+
+pthread_handler_t test_lockman(void *arg)
+{
+ int m= (*(int *)arg);
+ uint x, loid, row, table, res, locklevel, timeout= 0;
+ TABLE_LOCK_OWNER *lo1;
+ DBUG_ASSERT(Ntables <= Ntbls);
+ DBUG_ASSERT(Nrows + Ntables <= Ntbls);
+
+ pthread_mutex_lock(&rt_mutex);
+ loid= ++thread_number;
+ pthread_mutex_unlock(&rt_mutex);
+ lo1= loid2lo1(loid);
+
+ for (x= ((int)(intptr)(&m)); m > 0; m--)
+ {
+ /* three prime numbers */
+ x= (uint) ((x*LL(3628273133) + LL(1500450271)) % LL(9576890767));
+ row= x % Nrows + Ntables;
+ table= row % Ntables;
+ locklevel= (x/Nrows) & 3;
+ if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0)
+ {
+ /* table lock */
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table,
+ lock_array[locklevel]);
+ DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
+ lock2str[locklevel], res2str[res]));
+ if (res < GOT_THE_LOCK)
+ {
+ reinit_tlo(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ }
+ else
+ { /* row lock */
+ locklevel&= 1;
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]);
+ DIAG(("loid %2d, row %d, lock %s, res %s", loid, row,
+ lock2str[locklevel+4], res2str[res]));
+ switch (res)
+ {
+ case GOT_THE_LOCK:
+ continue;
+ case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
+ /* not implemented, so take a regular lock */
+ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE:
+ res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]);
+ DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
+ lock2str[locklevel], res2str[res]));
+ if (res < GOT_THE_LOCK)
+ {
+ reinit_tlo(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ continue;
+ default:
+ reinit_tlo(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ }
+ }
+
+ reinit_tlo(&tablockman, lo1);
+
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ timeouts+= timeout;
+ if (!rt_num_threads)
+ diag("number of timeouts: %d", timeouts);
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+
+int main(int argc __attribute__((unused)), char **argv)
+{
+ int i;
+ MY_INIT(argv[0]);
+
+ my_init();
+ pthread_mutex_init(&rt_mutex, 0);
+
+ plan(40);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+
+ tablockman_init(&tablockman, &loid2lo1, 50);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init (&conds[i], 0);
+
+ loarray1[i].active_locks= 0;
+ loarray1[i].waiting_lock= 0;
+ loarray1[i].waiting_for= 0;
+ loarray1[i].mutex= &mutexes[i];
+ loarray1[i].cond= &conds[i];
+ loarray1[i].loid= i+1;
+ }
+
+ for (i= 0; i < Ntbls; i++)
+ {
+ tablockman_init_locked_table(ltarray+i, Nlos);
+ }
+
+ test_tablockman_simple();
+
+#define CYCLES 10000
+#define THREADS Nlos /* don't change this line */
+
+ /* mixed load, stress-test with random locks */
+ Nrows= 100;
+ Ntables= 10;
+ table_lock_ratio= 10;
+ run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
+#if 0
+ /* "real-life" simulation - many rows, no table locks */
+ Nrows= 1000000;
+ Ntables= 10;
+ table_lock_ratio= 0;
+ run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10);
+#endif
+ for (i= 0; i < Nlos; i++)
+ {
+ tablockman_release_locks(&tablockman, &loarray1[i]);
+ pthread_mutex_destroy(loarray1[i].mutex);
+ pthread_cond_destroy(loarray1[i].cond);
+ }
+
+ {
+ ulonglong now= my_getsystime();
+ for (i= 0; i < Ntbls; i++)
+ {
+ tablockman_destroy_locked_table(ltarray+i);
+ }
+ tablockman_destroy(&tablockman);
+ now= my_getsystime()-now;
+ diag("lockman_destroy: %g secs", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c
new file mode 100644
index 00000000000..472748e491a
--- /dev/null
+++ b/storage/maria/unittest/ma_control_file-t.c
@@ -0,0 +1,583 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Unit test of the control file module of the Maria engine WL#3234 */
+
+/*
+ Note that it is not possible to test the durability of the write (can't
+ pull the plug programmatically :)
+*/
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <tap.h>
+
+#ifndef WITH_MARIA_STORAGE_ENGINE
+/*
+ If Maria is not compiled in, normally we don't come to building this test.
+*/
+#error "Maria engine is not compiled in, test cannot be built"
+#endif
+
+#include "maria.h"
+#include "../../../storage/maria/maria_def.h"
+#include <my_getopt.h>
+
+#define EXTRACT_DEFINITIONS
+#include "../ma_control_file.c"
+#undef EXTRACT_DEFINITIONS
+
+char file_name[FN_REFLEN];
+
+/* The values we'll set and expect the control file module to return */
+LSN expect_checkpoint_lsn;
+uint32 expect_logno;
+
+static int delete_file(myf my_flags);
+/*
+ Those are test-specific wrappers around the module's API functions: after
+ calling the module's API functions they perform checks on the result.
+*/
+static int close_file(); /* wraps ma_control_file_end */
+static int create_or_open_file(); /* wraps ma_control_file_open_or_create */
+static int write_file(); /* wraps ma_control_file_write_and_force */
+
+/* Tests */
+static int test_one_log();
+static int test_five_logs();
+static int test_3_checkpoints_and_2_logs();
+static int test_binary_content();
+static int test_start_stop();
+static int test_2_open_and_2_close();
+static int test_bad_magic_string();
+static int test_bad_checksum();
+static int test_bad_hchecksum();
+static int test_future_size();
+static int test_bad_blocksize();
+static int test_bad_size();
+
+/* Utility */
+static int verify_module_values_match_expected();
+static int verify_module_values_are_impossible();
+static void usage();
+static void get_options(int argc, char *argv[]);
+
+/*
+ If "expr" is FALSE, this macro will make the function print a diagnostic
+ message and immediately return 1.
+ This is inspired from assert() but does not crash the binary (sometimes we
+ may want to see how other tests go even if one fails).
+ RET_ERR means "return error".
+*/
+
+#define RET_ERR_UNLESS(expr) \
+ {if (!(expr)) {diag("line %d: failure: '%s'", __LINE__, #expr); return 1;}}
+
+
+/* Used to ignore error messages from ma_control_file_create_or_open */
+
+static int my_ignore_message(uint error __attribute__((unused)),
+ const char *str __attribute__((unused)),
+ myf MyFlags __attribute__((unused)))
+{
+ DBUG_ENTER("my_message_no_curses");
+ DBUG_PRINT("enter",("message: %s",str));
+ DBUG_RETURN(0);
+}
+
+int (*default_error_handler_hook)(uint my_err, const char *str,
+ myf MyFlags) = 0;
+
+
+/* like ma_control_file_create_or_open(), but without error messages */
+
+static CONTROL_FILE_ERROR local_ma_control_file_create_or_open()
+{
+ CONTROL_FILE_ERROR error;
+ error_handler_hook= my_ignore_message;
+ error= ma_control_file_create_or_open();
+ error_handler_hook= default_error_handler_hook;
+ return error;
+}
+
+
+
+int main(int argc,char *argv[])
+{
+ MY_INIT(argv[0]);
+ my_init();
+
+ maria_data_root= ".";
+ default_error_handler_hook= error_handler_hook;
+
+ plan(12);
+
+ diag("Unit tests for control file");
+
+ get_options(argc,argv);
+
+ diag("Deleting control file at startup, if there is an old one");
+ RET_ERR_UNLESS(0 == delete_file(0)); /* if fails, can't continue */
+
+ diag("Tests of normal conditions");
+ ok(0 == test_one_log(), "test of creating one log");
+ ok(0 == test_five_logs(), "test of creating five logs");
+ ok(0 == test_3_checkpoints_and_2_logs(),
+ "test of creating three checkpoints and two logs");
+ ok(0 == test_binary_content(), "test of the binary content of the file");
+ ok(0 == test_start_stop(), "test of multiple starts and stops");
+ diag("Tests of abnormal conditions");
+ ok(0 == test_2_open_and_2_close(),
+ "test of two open and two close (strange call sequence)");
+ ok(0 == test_bad_magic_string(), "test of bad magic string");
+ ok(0 == test_bad_checksum(), "test of bad checksum");
+ ok(0 == test_bad_hchecksum(), "test of bad hchecksum");
+ ok(0 == test_future_size(), "test of ability to handlr future versions");
+ ok(0 == test_bad_blocksize(), "test of bad blocksize");
+ ok(0 == test_bad_size(), "test of too small/big file");
+
+ return exit_status();
+}
+
+
+static int delete_file(myf my_flags)
+{
+ RET_ERR_UNLESS(fn_format(file_name, CONTROL_FILE_BASE_NAME,
+ maria_data_root, "", MYF(MY_WME)) != NullS);
+ /*
+ Maybe file does not exist, ignore error.
+ The error will however be printed on stderr.
+ */
+ my_delete(file_name, my_flags);
+ expect_checkpoint_lsn= LSN_IMPOSSIBLE;
+ expect_logno= FILENO_IMPOSSIBLE;
+
+ return 0;
+}
+
+/*
+ Verifies that global values last_checkpoint_lsn and last_logno (belonging
+ to the module) match what we expect.
+*/
+static int verify_module_values_match_expected()
+{
+ RET_ERR_UNLESS(last_logno == expect_logno);
+ RET_ERR_UNLESS(last_checkpoint_lsn ==
+ expect_checkpoint_lsn);
+ return 0;
+}
+
+
+/*
+ Verifies that global values last_checkpoint_lsn and last_logno (belonging
+ to the module) are impossible (this is used when the file has been closed).
+*/
+static int verify_module_values_are_impossible()
+{
+ RET_ERR_UNLESS(last_logno == FILENO_IMPOSSIBLE);
+ RET_ERR_UNLESS(last_checkpoint_lsn ==
+ LSN_IMPOSSIBLE);
+ return 0;
+}
+
+
+static int close_file()
+{
+ /* Simulate shutdown */
+ ma_control_file_end();
+ /* Verify amnesia */
+ RET_ERR_UNLESS(verify_module_values_are_impossible() == 0);
+ return 0;
+}
+
+static int create_or_open_file()
+{
+ RET_ERR_UNLESS(local_ma_control_file_create_or_open(TRUE) == CONTROL_FILE_OK);
+ /* Check that the module reports expected information */
+ RET_ERR_UNLESS(verify_module_values_match_expected() == 0);
+ return 0;
+}
+
+static int write_file(const LSN checkpoint_lsn,
+ uint32 logno,
+ uint objs_to_write)
+{
+ RET_ERR_UNLESS(ma_control_file_write_and_force(checkpoint_lsn, logno,
+ objs_to_write) == 0);
+ /* Check that the module reports expected information */
+ RET_ERR_UNLESS(verify_module_values_match_expected() == 0);
+ return 0;
+}
+
+static int test_one_log()
+{
+ uint objs_to_write;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 123;
+ RET_ERR_UNLESS(write_file(LSN_IMPOSSIBLE,
+ expect_logno,
+ objs_to_write) == 0);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_five_logs()
+{
+ uint objs_to_write;
+ uint i;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 100;
+ for (i= 0; i<5; i++)
+ {
+ expect_logno*= 3;
+ RET_ERR_UNLESS(write_file(LSN_IMPOSSIBLE, expect_logno,
+ objs_to_write) == 0);
+ }
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_3_checkpoints_and_2_logs()
+{
+ uint objs_to_write;
+ /*
+ Simulate one checkpoint, one log creation, two checkpoints, one
+ log creation.
+ */
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
+ expect_checkpoint_lsn= MAKE_LSN(5, 10000);
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 17;
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
+ expect_checkpoint_lsn= MAKE_LSN(17, 20000);
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
+ expect_checkpoint_lsn= MAKE_LSN(17, 45000);
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 19;
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_binary_content()
+{
+ uint i;
+ int fd;
+
+ /*
+ TEST4: actually check by ourselves the content of the file.
+ Note that constants (offsets) are hard-coded here, precisely to prevent
+ someone from changing them in the control file module and breaking
+ backward-compatibility.
+ TODO: when we reach the format-freeze state, we may even just do a
+ comparison with a raw binary string, to not depend on any uint4korr
+ future change/breakage.
+ */
+
+ char buffer[45];
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_read(fd, buffer, 45, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ i= uint3korr(buffer + 34 );
+ RET_ERR_UNLESS(i == LSN_FILE_NO(last_checkpoint_lsn));
+ i= uint4korr(buffer + 37);
+ RET_ERR_UNLESS(i == LSN_OFFSET(last_checkpoint_lsn));
+ i= uint4korr(buffer + 41);
+ RET_ERR_UNLESS(i == last_logno);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_start_stop()
+{
+ /* TEST5: Simulate start/nothing/stop/start/nothing/stop/start */
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_2_open_and_2_close()
+{
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+
+static int test_bad_magic_string()
+{
+ char buffer[4];
+ int fd;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ /* Corrupt magic string */
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_pread(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) == 0);
+
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(local_ma_control_file_create_or_open(TRUE) ==
+ CONTROL_FILE_BAD_MAGIC_STRING);
+ /* Restore magic string */
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_bad_checksum()
+{
+ char buffer[4];
+ int fd;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ /* Corrupt checksum */
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_pread(fd, buffer, 1, 30, MYF(MY_FNABP | MY_WME)) == 0);
+ buffer[0]+= 3; /* mangle checksum */
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 30, MYF(MY_FNABP | MY_WME)) == 0);
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(local_ma_control_file_create_or_open(TRUE) ==
+ CONTROL_FILE_BAD_CHECKSUM);
+ /* Restore checksum */
+ buffer[0]-= 3;
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 30, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+
+ return 0;
+}
+
+
+static int test_bad_blocksize()
+{
+ maria_block_size<<= 1;
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(local_ma_control_file_create_or_open(TRUE) ==
+ CONTROL_FILE_WRONG_BLOCKSIZE);
+ /* Restore blocksize */
+ maria_block_size>>= 1;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+
+static int test_future_size()
+{
+ /*
+ Here we check ability to add fields only so we can use
+ defined constants
+ */
+ uint32 sum;
+ int fd;
+ char buffer[CF_CREATE_TIME_TOTAL_SIZE + CF_CHANGEABLE_TOTAL_SIZE + 2];
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_read(fd, buffer,
+ CF_CREATE_TIME_TOTAL_SIZE + CF_CHANGEABLE_TOTAL_SIZE,
+ MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+ /* "add" new field of 1 byte (value 1) to header and variable part */
+ memmove(buffer + CF_CREATE_TIME_TOTAL_SIZE + 1,
+ buffer + CF_CREATE_TIME_TOTAL_SIZE,
+ CF_CHANGEABLE_TOTAL_SIZE);
+ buffer[CF_CREATE_TIME_TOTAL_SIZE - CF_CHECKSUM_SIZE]= '\1';
+ buffer[CF_CREATE_TIME_TOTAL_SIZE + CF_CHANGEABLE_TOTAL_SIZE + 1]= '\1';
+ /* fix lengths */
+ int2store(buffer + CF_CREATE_TIME_SIZE_OFFSET, CF_CREATE_TIME_TOTAL_SIZE + 1);
+ int2store(buffer + CF_CHANGEABLE_SIZE_OFFSET, CF_CHANGEABLE_TOTAL_SIZE + 1);
+ /* recalculete checksums */
+ sum= (uint32) my_checksum(0, buffer, CF_CREATE_TIME_TOTAL_SIZE -
+ CF_CHECKSUM_SIZE + 1);
+ int4store(buffer + CF_CREATE_TIME_TOTAL_SIZE - CF_CHECKSUM_SIZE + 1, sum);
+ sum= (uint32) my_checksum(0, buffer + CF_CREATE_TIME_TOTAL_SIZE + 1 +
+ CF_CHECKSUM_SIZE,
+ CF_CHANGEABLE_TOTAL_SIZE - CF_CHECKSUM_SIZE + 1);
+ int4store(buffer + CF_CREATE_TIME_TOTAL_SIZE + 1, sum);
+ /* write new file and check it */
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_pwrite(fd, buffer,
+ CF_CREATE_TIME_TOTAL_SIZE +
+ CF_CHANGEABLE_TOTAL_SIZE + 2,
+ 0, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ return(0);
+}
+
+static int test_bad_hchecksum()
+{
+ char buffer[4];
+ int fd;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ /* Corrupt checksum */
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_pread(fd, buffer, 1, 26, MYF(MY_FNABP | MY_WME)) == 0);
+ buffer[0]+= 3; /* mangle checksum */
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 26, MYF(MY_FNABP | MY_WME)) == 0);
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(local_ma_control_file_create_or_open(TRUE) ==
+ CONTROL_FILE_BAD_HEAD_CHECKSUM);
+ /* Restore checksum */
+ buffer[0]-= 3;
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 26, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+
+ return 0;
+}
+
+
+static int test_bad_size()
+{
+ char buffer[]=
+ "123456789012345678901234567890123456789012345678901234567890123456";
+ int fd, i;
+
+ /* A too short file */
+ RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR | O_CREAT,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_write(fd, buffer, 10, MYF(MY_FNABP | MY_WME)) == 0);
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(local_ma_control_file_create_or_open(TRUE) ==
+ CONTROL_FILE_TOO_SMALL);
+ for (i= 0; i < 8; i++)
+ {
+ RET_ERR_UNLESS(my_write(fd, buffer, 66, MYF(MY_FNABP | MY_WME)) == 0);
+ }
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(local_ma_control_file_create_or_open(TRUE) ==
+ CONTROL_FILE_TOO_BIG);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+
+ /* Leave a correct control file */
+ RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ return 0;
+}
+
+
+static struct my_option my_long_options[] =
+{
+#ifndef DBUG_OFF
+ {"debug", '#', "Debug log.",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"help", '?', "Display help and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Print version number and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+static void version()
+{
+ printf("ma_control_file_test: unit test for the control file "
+ "module of the Maria storage engine. Ver 1.0 \n");
+}
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument __attribute__((unused)))
+{
+ switch(optid) {
+ case 'V':
+ version();
+ exit(0);
+ case '#':
+ DBUG_PUSH (argument);
+ break;
+ case '?':
+ version();
+ usage();
+ exit(0);
+ }
+ return 0;
+}
+
+
+/* Read options */
+
+static void get_options(int argc, char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options,
+ get_one_option)))
+ exit(ho_error);
+
+ return;
+} /* get options */
+
+
+static void usage()
+{
+ printf("Usage: %s [options]\n\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/unittest/ma_loghandler_examples.c b/storage/maria/unittest/ma_loghandler_examples.c
new file mode 100644
index 00000000000..c35416258ea
--- /dev/null
+++ b/storage/maria/unittest/ma_loghandler_examples.c
@@ -0,0 +1,52 @@
+/* TODO: copyright */
+
+#include "../maria_def.h"
+
+static LOG_DESC INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE=
+{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
+ "fixed0example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0,
+"variable0example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE=
+{LOGRECTYPE_PSEUDOFIXEDLENGTH, 7, 7, NULL, NULL, NULL, 1,
+"fixed1example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 12, NULL, NULL, NULL, 1,
+"variable1example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE=
+{LOGRECTYPE_PSEUDOFIXEDLENGTH, 23, 23, NULL, NULL, NULL, 2,
+"fixed2example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0, 19, NULL, NULL, NULL, 2,
+"variable2example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+
+void translog_example_table_init()
+{
+ int i;
+ log_record_type_descriptor[LOGREC_FIXED_RECORD_0LSN_EXAMPLE]=
+ INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE;
+ log_record_type_descriptor[LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE]=
+ INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE;
+ log_record_type_descriptor[LOGREC_FIXED_RECORD_1LSN_EXAMPLE]=
+ INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE;
+ log_record_type_descriptor[LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE]=
+ INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE;
+ log_record_type_descriptor[LOGREC_FIXED_RECORD_2LSN_EXAMPLE]=
+ INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE;
+ log_record_type_descriptor[LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE]=
+ INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE;
+ for (i= LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE + 1;
+ i < LOGREC_NUMBER_OF_TYPES;
+ i++)
+ log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
+}
+
+
+
diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c
new file mode 100644
index 00000000000..19e95361a71
--- /dev/null
+++ b/storage/maria/unittest/ma_maria_log_cleanup.c
@@ -0,0 +1,49 @@
+#include "../maria_def.h"
+#include <my_dir.h>
+
+my_bool maria_log_remove()
+{
+ MY_DIR *dirp;
+ uint i;
+ MY_STAT stat_buff;
+ char file_name[FN_REFLEN];
+
+ /* Removes control file */
+ if (fn_format(file_name, CONTROL_FILE_BASE_NAME,
+ maria_data_root, "", MYF(MY_WME)) == NullS)
+ return 1;
+ if (my_stat(file_name, &stat_buff, MYF(0)) &&
+ my_delete(file_name, MYF(MY_WME)) != 0)
+ return 1;
+
+ /* Finds and removes transaction log files */
+ if (!(dirp = my_dir(maria_data_root, MYF(MY_DONT_SORT))))
+ return 1;
+
+ for (i= 0; i < dirp->number_off_files; i++)
+ {
+ char *file= dirp->dir_entry[i].name;
+ if (strncmp(file, "maria_log.", 10) == 0 &&
+ file[10] >= '0' && file[10] <= '9' &&
+ file[11] >= '0' && file[11] <= '9' &&
+ file[12] >= '0' && file[12] <= '9' &&
+ file[13] >= '0' && file[13] <= '9' &&
+ file[14] >= '0' && file[14] <= '9' &&
+ file[15] >= '0' && file[15] <= '9' &&
+ file[16] >= '0' && file[16] <= '9' &&
+ file[17] >= '0' && file[17] <= '9' &&
+ file[18] == '\0')
+ {
+ if (fn_format(file_name, file,
+ maria_data_root, "", MYF(MY_WME)) == NullS ||
+ my_delete(file_name, MYF(MY_WME)) != 0)
+ {
+ my_dirend(dirp);
+ return 1;
+ }
+ }
+ }
+ my_dirend(dirp);
+ return 0;
+}
+
diff --git a/storage/maria/unittest/ma_pagecache_consist.c b/storage/maria/unittest/ma_pagecache_consist.c
new file mode 100644
index 00000000000..4ca06147ff4
--- /dev/null
+++ b/storage/maria/unittest/ma_pagecache_consist.c
@@ -0,0 +1,485 @@
+/*
+ TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in
+ my_atomic-t.c (see BUG#22320).
+ Use diag() instead of fprintf(stderr). Use ok() and plan().
+*/
+
+#include <tap.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include "test_file.h"
+#include <tap.h>
+
+#define PCACHE_SIZE (PAGE_SIZE*1024*8)
+
+#ifndef DBUG_OFF
+static const char* default_dbug_option;
+#endif
+
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count;
+static PAGECACHE pagecache;
+
+#ifdef TEST_HIGH_CONCURENCY
+static uint number_of_readers= 10;
+static uint number_of_writers= 20;
+static uint number_of_tests= 30000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20;
+static uint flush_divider= 1000;
+#else /*TEST_HIGH_CONCURENCY*/
+#ifdef TEST_READERS
+static uint number_of_readers= 10;
+static uint number_of_writers= 1;
+static uint number_of_tests= 30000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20;
+static uint flush_divider= 1000;
+#else /*TEST_READERS*/
+#ifdef TEST_WRITERS
+static uint number_of_readers= 0;
+static uint number_of_writers= 10;
+static uint number_of_tests= 30000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20;
+static uint flush_divider= 1000;
+#else /*TEST_WRITERS*/
+static uint number_of_readers= 10;
+static uint number_of_writers= 10;
+static uint number_of_tests= 50000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20000;
+static uint flush_divider= 1000;
+#endif /*TEST_WRITERS*/
+#endif /*TEST_READERS*/
+#endif /*TEST_HIGH_CONCURENCY*/
+
+
+/**
+ @brief Dummy pagecache callback.
+*/
+
+static my_bool
+dummy_callback(uchar *page __attribute__((unused)),
+ pgcache_page_no_t page_no __attribute__((unused)),
+ uchar* data_ptr __attribute__((unused)))
+{
+ return 0;
+}
+
+
+/**
+ @brief Dummy pagecache callback.
+*/
+
+static void
+dummy_fail_callback(uchar* data_ptr __attribute__((unused)))
+{
+ return;
+}
+
+
+/*
+ Get pseudo-random length of the field in (0;limit)
+
+ SYNOPSYS
+ get_len()
+ limit limit for generated value
+
+ RETURN
+ length where length >= 0 & length < limit
+*/
+
+static uint get_len(uint limit)
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / limit);
+ } while (rec_len >= limit || rec_len == 0);
+ return rec_len;
+}
+
+
+/* check page consistency */
+uint check_page(uchar *buff, ulong offset, int page_locked, int page_no,
+ int tag)
+{
+ uint end= sizeof(uint);
+ uint num= *((uint *)buff);
+ uint i;
+ DBUG_ENTER("check_page");
+
+ for (i= 0; i < num; i++)
+ {
+ uint len= *((uint *)(buff + end));
+ uint j;
+ end+= sizeof(uint) + sizeof(uint);
+ if (len + end > PAGE_SIZE)
+ {
+ diag("incorrect field header #%u by offset %lu\n", i, offset + end);
+ goto err;
+ }
+ for(j= 0; j < len; j++)
+ {
+ if (buff[end + j] != (uchar)((i+1) % 256))
+ {
+ diag("incorrect %lu byte\n", offset + end + j);
+ goto err;
+ }
+ }
+ end+= len;
+ }
+ for(i= end; i < PAGE_SIZE; i++)
+ {
+ if (buff[i] != 0)
+ {
+ int h;
+ DBUG_PRINT("err",
+ ("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n",
+ offset + i, offset, i, page_no,
+ (page_locked ? "locked" : "unlocked"),
+ end, num, tag));
+ diag("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n",
+ offset + i, offset, i, page_no,
+ (page_locked ? "locked" : "unlocked"),
+ end, num, tag);
+ h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0));
+ my_pwrite(h, (uchar*) buff, PAGE_SIZE, 0, MYF(0));
+ my_close(h, MYF(0));
+ goto err;
+ }
+ }
+ DBUG_RETURN(end);
+err:
+ DBUG_PRINT("err", ("try to flush"));
+ if (page_locked)
+ {
+ pagecache_delete(&pagecache, &file1, page_no,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, 1);
+ }
+ else
+ {
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE);
+ }
+ exit(1);
+}
+
+void put_rec(uchar *buff, uint end, uint len, uint tag)
+{
+ uint i;
+ uint num= *((uint *)buff);
+ if (!len)
+ len= 1;
+ if (end + sizeof(uint)*2 + len > PAGE_SIZE)
+ return;
+ *((uint *)(buff + end))= len;
+ end+= sizeof(uint);
+ *((uint *)(buff + end))= tag;
+ end+= sizeof(uint);
+ num++;
+ *((uint *)buff)= num;
+ *((uint*)(buff + end))= len;
+ for (i= end; i < (len + end); i++)
+ {
+ buff[i]= (uchar) num % 256;
+ }
+}
+
+/*
+ Recreate and reopen a file for test
+
+ SYNOPSIS
+ reset_file()
+ file File to reset
+ file_name Path (and name) of file which should be reset
+*/
+
+void reset_file(PAGECACHE_FILE file, char *file_name)
+{
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE);
+ if (my_close(file1.file, MYF(0)) != 0)
+ {
+ diag("Got error during %s closing from close() (errno: %d)\n",
+ file_name, errno);
+ exit(1);
+ }
+ my_delete(file_name, MYF(0));
+ if ((file.file= my_open(file_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ diag("Got error during %s creation from open() (errno: %d)\n",
+ file_name, errno);
+ exit(1);
+ }
+}
+
+
+void reader(int num)
+{
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ uint i;
+
+ for (i= 0; i < number_of_tests; i++)
+ {
+ uint page= get_len(number_of_pages);
+ pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ check_page(buffr, page * PAGE_SIZE, 0, page, -num);
+ if (i % 500 == 0)
+ printf("reader%d: %d\n", num, i);
+
+ }
+ printf("reader%d: done\n", num);
+ free(buffr);
+}
+
+
+void writer(int num)
+{
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ uint i;
+
+ for (i= 0; i < number_of_tests; i++)
+ {
+ uint end;
+ uint page= get_len(number_of_pages);
+ pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ 0);
+ end= check_page(buffr, page * PAGE_SIZE, 1, page, num);
+ put_rec(buffr, end, get_len(record_length_limit), num);
+ pagecache_write(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+
+ if (i % flush_divider == 0)
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ if (i % 500 == 0)
+ printf("writer%d: %d\n", num, i);
+ }
+ printf("writer%d: done\n", num);
+ free(buffr);
+}
+
+
+static void *test_thread_reader(void *arg)
+{
+ int param=*((int*) arg);
+
+ my_thread_init();
+ DBUG_ENTER("test_reader");
+ DBUG_PRINT("enter", ("param: %d", param));
+
+ reader(param);
+
+ DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name()));
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((uchar*) arg);
+ my_thread_end();
+ DBUG_RETURN(0);
+}
+
+static void *test_thread_writer(void *arg)
+{
+ int param=*((int*) arg);
+
+ my_thread_init();
+ DBUG_ENTER("test_writer");
+ DBUG_PRINT("enter", ("param: %d", param));
+
+ writer(param);
+
+ DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name()));
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((uchar*) arg);
+ my_thread_end();
+ DBUG_RETURN(0);
+}
+
+int main(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ pthread_t tid;
+ pthread_attr_t thr_attr;
+ int *param, error, pagen;
+
+ MY_INIT(argv[0]);
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/test_pagecache_consist.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+
+ DBUG_ENTER("main");
+ DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
+ if ((file1.file= my_open(file1_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ pagecache_file_init(file1, &dummy_callback, &dummy_callback,
+ &dummy_fail_callback, NULL);
+ DBUG_PRINT("info", ("file1: %d", file1.file));
+ if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0)
+ {
+ fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ my_pwrite(file1.file, "test file", 9, 0, MYF(0));
+
+ if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+ {
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+ if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+
+ if ((error= pthread_attr_init(&thr_attr)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+ {
+ fprintf(stderr,
+ "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+ VOID(thr_setconcurrency(2));
+#endif
+
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PAGE_SIZE, 0)) == 0)
+ {
+ fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ DBUG_PRINT("info", ("Page cache %d pages", pagen));
+ {
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ uint i;
+ memset(buffr, '\0', PAGE_SIZE);
+ for (i= 0; i < number_of_pages; i++)
+ {
+ pagecache_write(&pagecache, &file1, i, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ }
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ free(buffr);
+ }
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ {
+ fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ while (number_of_readers != 0 || number_of_writers != 0)
+ {
+ if (number_of_readers != 0)
+ {
+ param=(int*) malloc(sizeof(int));
+ *param= number_of_readers;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread_reader,
+ (void*) param)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ thread_count++;
+ number_of_readers--;
+ }
+ if (number_of_writers != 0)
+ {
+ param=(int*) malloc(sizeof(int));
+ *param= number_of_writers;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread_writer,
+ (void*) param)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ thread_count++;
+ number_of_writers--;
+ }
+ }
+ DBUG_PRINT("info", ("Thread started"));
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ pthread_attr_destroy(&thr_attr);
+
+ /* wait finishing */
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock\n",error);
+ while (thread_count)
+ {
+ if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count)))
+ fprintf(stderr,"COND_thread_count: %d from pthread_cond_wait\n",error);
+ }
+ if ((error= pthread_mutex_unlock(&LOCK_thread_count)))
+ fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_unlock\n",error);
+ DBUG_PRINT("info", ("thread ended"));
+
+ end_pagecache(&pagecache, 1);
+ DBUG_PRINT("info", ("Page cache ended"));
+
+ if (my_close(file1.file, MYF(0)) != 0)
+ {
+ fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ /*my_delete(file1_name, MYF(0));*/
+ my_end(0);
+
+ DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
+
+ DBUG_PRINT("info", ("Program end"));
+
+ DBUG_RETURN(exit_status());
+}
diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c
new file mode 100644
index 00000000000..3f76bbdb863
--- /dev/null
+++ b/storage/maria/unittest/ma_pagecache_single.c
@@ -0,0 +1,635 @@
+/*
+ TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in
+ my_atomic-t.c (see BUG#22320).
+ Use diag() instead of fprintf(stderr).
+*/
+#include <tap.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include "test_file.h"
+#include <tap.h>
+
+#define PCACHE_SIZE (PAGE_SIZE*1024*10)
+
+#ifndef DBUG_OFF
+static const char* default_dbug_option;
+#endif
+
+static char *file1_name= (char*)"page_cache_test_file_1";
+static char *file2_name= (char*)"page_cache_test_file_2";
+static PAGECACHE_FILE file1;
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count;
+static PAGECACHE pagecache;
+
+/*
+ File contance descriptors
+*/
+static struct file_desc simple_read_write_test_file[]=
+{
+ {PAGE_SIZE, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_read_change_write_read_test_file[]=
+{
+ {PAGE_SIZE/2, '\65'},
+ {PAGE_SIZE/2, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_pin_test_file1[]=
+{
+ {PAGE_SIZE*2, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_pin_test_file2[]=
+{
+ {PAGE_SIZE/2, '\1'},
+ {PAGE_SIZE/2, (unsigned char)129},
+ {PAGE_SIZE, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_delete_forget_test_file[]=
+{
+ {PAGE_SIZE, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_delete_flush_test_file[]=
+{
+ {PAGE_SIZE, '\2'},
+ { 0, 0}
+};
+
+
+/**
+ @brief Dummy pagecache callback.
+*/
+
+static my_bool
+dummy_callback(uchar *page __attribute__((unused)),
+ pgcache_page_no_t page_no __attribute__((unused)),
+ uchar* data_ptr __attribute__((unused)))
+{
+ return 0;
+}
+
+
+/**
+ @brief Dummy pagecache callback.
+*/
+
+static void
+dummy_fail_callback(uchar* data_ptr __attribute__((unused)))
+{
+ return;
+}
+
+
+/*
+ Recreate and reopen a file for test
+
+ SYNOPSIS
+ reset_file()
+ file File to reset
+ file_name Path (and name) of file which should be reset
+*/
+
+void reset_file(PAGECACHE_FILE *file, const char *file_name)
+{
+ flush_pagecache_blocks(&pagecache, file, FLUSH_RELEASE);
+ if (my_close(file->file, MYF(MY_WME)))
+ exit(1);
+ my_delete(file_name, MYF(MY_WME));
+ if ((file->file= my_open(file_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ diag("Got error during %s creation from open() (errno: %d)\n",
+ file_name, my_errno);
+ exit(1);
+ }
+}
+
+/*
+ Write then read page, check file on disk
+*/
+
+int simple_read_write_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_read_write_test");
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)),
+ "Simple write-read page ");
+ if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Got error during flushing pagecache\n");
+ exit(1);
+ }
+ ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_read_write_test_file))),
+ "Simple write-read page file");
+ if (res)
+ reset_file(&file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Prepare page, then read (and lock), change (write new value and unlock),
+ then check the page in the cache and on the disk
+*/
+int simple_read_change_write_read_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res, res2;
+ DBUG_ENTER("simple_read_change_write_read_test");
+
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Got error during flushing pagecache\n");
+ exit(1);
+ }
+ /* test */
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ 0);
+ bfill(buffw, PAGE_SIZE/2, '\65');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)),
+ "Simple read-change-write-read page ");
+ DBUG_ASSERT(pagecache.blocks_changed == 1);
+ if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Got error during flushing pagecache\n");
+ exit(1);
+ }
+ DBUG_ASSERT(pagecache.blocks_changed == 0);
+ ok((res2= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_read_change_write_read_test_file))),
+ "Simple read-change-write-read page file");
+ if (res && res2)
+ reset_file(&file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res && res2);
+}
+
+
+/*
+ Prepare page, read page 0 (and pin) then write page 1 and page 0.
+ Flush the file (shold flush only page 1 and return 1 (page 0 is
+ still pinned).
+ Check file on the disk.
+ Unpin and flush.
+ Check file on the disk.
+*/
+int simple_pin_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_pin_test");
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ /* test */
+ if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Got error during flushing pagecache\n");
+ exit(1);
+ }
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ 0);
+ pagecache_write(&pagecache, &file1, 1, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ bfill(buffw + PAGE_SIZE/2, PAGE_SIZE/2, ((unsigned char) 129));
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_TO_READ,
+ PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ /*
+ We have to get error because one page of the file is pinned,
+ other page should be flushed
+ */
+ if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Did not get error in flush_pagecache_blocks\n");
+ res= 0;
+ goto err;
+ }
+ ok((res= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE*2,
+ simple_pin_test_file1))),
+ "Simple pin page file with pin");
+ pagecache_unlock(&pagecache,
+ &file1,
+ 0,
+ PAGECACHE_LOCK_READ_UNLOCK,
+ PAGECACHE_UNPIN,
+ 0, 0, 0);
+ if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Got error in flush_pagecache_blocks\n");
+ res= 0;
+ goto err;
+ }
+ ok((res&= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE,
+ simple_pin_test_file2))),
+ "Simple pin page result file");
+ if (res)
+ reset_file(&file1, file1_name);
+err:
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+/*
+ Prepare page, write new value, then delete page from cache without flush,
+ on the disk should be page with old content written during preparation
+*/
+
+int simple_delete_forget_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_delete_forget_test");
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ /* test */
+ bfill(buffw, PAGE_SIZE, '\2');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ pagecache_delete(&pagecache, &file1, 0,
+ PAGECACHE_LOCK_WRITE, 0);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_delete_forget_test_file))),
+ "Simple delete-forget page file");
+ if (res)
+ reset_file(&file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+/*
+ Prepare page with locking, write new content to the page,
+ delete page with flush and on existing lock,
+ check that page on disk contain new value.
+*/
+
+int simple_delete_flush_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_delete_flush_test");
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ /* test */
+ bfill(buffw, PAGE_SIZE, '\2');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ pagecache_delete(&pagecache, &file1, 0,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, 1);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_delete_flush_test_file))),
+ "Simple delete-forget page file");
+ if (res)
+ reset_file(&file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ write then read file bigger then cache
+*/
+
+int simple_big_test()
+{
+ unsigned char *buffw= (unsigned char *) my_malloc(PAGE_SIZE, MYF(MY_WME));
+ unsigned char *buffr= (unsigned char *) my_malloc(PAGE_SIZE, MYF(MY_WME));
+ struct file_desc *desc= ((struct file_desc *)
+ my_malloc((PCACHE_SIZE/(PAGE_SIZE/2) + 1) *
+ sizeof(struct file_desc), MYF(MY_WME)));
+ int res, i;
+ DBUG_ENTER("simple_big_test");
+
+ /* prepare the file twice larger then cache */
+ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++)
+ {
+ bfill(buffw, PAGE_SIZE, (unsigned char) (i & 0xff));
+ desc[i].length= PAGE_SIZE;
+ desc[i].content= (i & 0xff);
+ pagecache_write(&pagecache, &file1, i, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ }
+ desc[i].length= 0;
+ desc[i].content= '\0';
+ ok(1, "Simple big file write");
+ /* check written pages sequentally read */
+ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++)
+ {
+ int j;
+ pagecache_read(&pagecache, &file1, i, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ for(j= 0; j < PAGE_SIZE; j++)
+ {
+ if (buffr[j] != (i & 0xff))
+ {
+ diag("simple_big_test seq: page %u byte %u mismatch\n", i, j);
+ res= 0;
+ goto err;
+ }
+ }
+ }
+ ok(1, "Simple big file sequential read");
+ /* chack random reads */
+ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE); i++)
+ {
+ int j, page;
+ page= rand() % (PCACHE_SIZE/(PAGE_SIZE/2));
+ pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ for(j= 0; j < PAGE_SIZE; j++)
+ {
+ if (buffr[j] != (page & 0xff))
+ {
+ diag("simple_big_test rnd: page %u byte %u mismatch\n", page, j);
+ res= 0;
+ goto err;
+ }
+ }
+ }
+ ok(1, "Simple big file random read");
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+
+ ok((res= test(test_file(file1, file1_name, PCACHE_SIZE*2, PAGE_SIZE,
+ desc))),
+ "Simple big file");
+ if (res)
+ reset_file(&file1, file1_name);
+
+err:
+ my_free(buffw, 0);
+ my_free(buffr, 0);
+ my_free(desc, 0);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Thread function
+*/
+
+static void *test_thread(void *arg)
+{
+#ifndef DBUG_OFF
+ int param= *((int*) arg);
+#endif
+
+ my_thread_init();
+ DBUG_ENTER("test_thread");
+ DBUG_PRINT("enter", ("param: %d", param));
+
+ if (!simple_read_write_test() ||
+ !simple_read_change_write_read_test() ||
+ !simple_pin_test() ||
+ !simple_delete_forget_test() ||
+ !simple_delete_flush_test())
+ exit(1);
+
+ SKIP_BIG_TESTS(4)
+ {
+ if (!simple_big_test())
+ exit(1);
+ }
+
+ DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name()));
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((uchar*) arg);
+ my_thread_end();
+ DBUG_RETURN(0);
+}
+
+
+int main(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ pthread_t tid;
+ pthread_attr_t thr_attr;
+ int *param, error, pagen;
+ File tmp_file;
+ MY_INIT(argv[0]);
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\test_pagecache_single.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/test_pagecache_single.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+ DBUG_ENTER("main");
+ DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
+ if ((tmp_file= my_open(file2_name, O_CREAT | O_TRUNC | O_RDWR,
+ MYF(MY_WME))) < 0)
+ exit(1);
+
+ if ((file1.file= my_open(file1_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ pagecache_file_init(file1, &dummy_callback, &dummy_callback,
+ &dummy_fail_callback, NULL);
+ my_close(tmp_file, MYF(0));
+ my_delete(file2_name, MYF(0));
+
+ DBUG_PRINT("info", ("file1: %d", file1.file));
+ if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0)
+ {
+ fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ my_pwrite(file1.file, "test file", 9, 0, MYF(0));
+
+ if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+ if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+
+ if ((error= pthread_attr_init(&thr_attr)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+ {
+ fprintf(stderr,
+ "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+ VOID(thr_setconcurrency(2));
+#endif
+
+ plan(12);
+
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PAGE_SIZE, MYF(MY_WME))) == 0)
+ {
+ fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ DBUG_PRINT("info", ("Page cache %d pages", pagen));
+
+ if ((error=pthread_mutex_lock(&LOCK_thread_count)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_mutex_lock (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ param=(int*) malloc(sizeof(int));
+ *param= 1;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread, (void*) param)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ thread_count++;
+ DBUG_PRINT("info", ("Thread started"));
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ pthread_attr_destroy(&thr_attr);
+
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ fprintf(stderr,"Got error: %d from pthread_mutex_lock\n",error);
+ while (thread_count)
+ {
+ if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count)))
+ fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error);
+ }
+ if ((error= pthread_mutex_unlock(&LOCK_thread_count)))
+ fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error);
+ DBUG_PRINT("info", ("thread ended"));
+
+ end_pagecache(&pagecache, 1);
+ DBUG_PRINT("info", ("Page cache ended"));
+
+ if (my_close(file1.file, MYF(MY_WME)))
+ exit(1);
+
+ /*my_delete(file1_name, MYF(0));*/
+ my_end(0);
+
+ DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
+
+ DBUG_PRINT("info", ("Program end"));
+
+ DBUG_RETURN(exit_status());
+}
diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c
new file mode 100644
index 00000000000..8d9ac59cae8
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler-t.c
@@ -0,0 +1,627 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void example_loghandler_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+static TRN *trn= &dummy_transaction_object;
+
+#define PCACHE_SIZE (1024*1024*10)
+
+#define LONG_BUFFER_SIZE (100 * 1024)
+
+#ifdef LONG_LOG_TEST
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE (1024L*1024L*8)
+#define ITERATIONS (1600*4)
+
+#else
+#define LOG_FLAGS (TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC)
+#define LOG_FILE_SIZE (1024L*1024L*8L)
+#define ITERATIONS 1600
+#endif
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*1024L
+#define ITERATIONS 181000
+*/
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+*/
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*100L
+#define ITERATIONS 65000
+*/
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(uchar *ptr, ulong length)
+{
+ ulong i;
+ uchar buff[2];
+ for (i= 0; i < length; i++)
+ {
+ if (i % 2 == 0)
+ int2store(buff, i >> 1);
+ if (ptr[i] != buff[i % 2])
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) buff[i % 2]);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Report OK for read operation
+
+ SYNOPSIS
+ read_ok()
+ rec the record header
+*/
+
+void read_ok(TRANSLOG_HEADER_BUFFER *rec)
+{
+ char buff[80];
+ snprintf(buff, sizeof(buff), "read record type: %u LSN: (%lu,0x%lx)",
+ rec->type, LSN_IN_PARTS(rec->lsn));
+ ok(1, buff);
+}
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ uchar *buffer, uint skip)
+{
+ DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2);
+ if (translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL) !=
+ rec->record_length)
+ return 1;
+ return check_content(buffer + skip, rec->record_length - skip);
+}
+
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ uint32 i;
+ uint32 rec_len;
+ uint pagen;
+ uchar long_tr_id[6];
+ uchar lsn_buff[23]=
+ {
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
+ };
+ uchar long_buffer[LONG_BUFFER_SIZE * 2 + LSN_STORE_SIZE * 2 + 2];
+ PAGECACHE pagecache;
+ LSN lsn, lsn_base, first_lsn;
+ TRANSLOG_HEADER_BUFFER rec;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 3];
+ struct st_translog_scanner_data scanner;
+ int rc;
+
+ MY_INIT(argv[0]);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+
+ for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2)
+ {
+ int2store(long_buffer + i, (i >> 1));
+ /* long_buffer[i]= (i & 0xFF); */
+ }
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ plan(((ITERATIONS - 1) * 4 + 1)*2 + ITERATIONS - 1 + 1);
+
+ srandom(122334817L);
+
+ long_tr_id[5]= 0xff;
+
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ trn->short_id= 0;
+ trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+ lsn_base= first_lsn= lsn;
+
+ for (i= 1; i < ITERATIONS; i++)
+ {
+ trn->short_id= i % 0xFFFF;
+ if (i % 2)
+ {
+ lsn_store(lsn_buff, lsn_base);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
+ /* check auto-count feature */
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= NULL;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].length= 0;
+ if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_1LSN_EXAMPLE, trn,
+ NULL, LSN_STORE_SIZE, 0, parts, NULL, NULL))
+ {
+ fprintf(stderr, "1 Can't write reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
+ lsn_store(lsn_buff, lsn_base);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12)
+ rec_len= 12;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
+ /* check record length auto-counting */
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE,
+ trn, NULL, 0, TRANSLOG_INTERNAL_PARTS + 2,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "1 Can't write var reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE");
+ }
+ else
+ {
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 23;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_2LSN_EXAMPLE,
+ trn, NULL, 23, TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "0 Can't write reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE");
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19)
+ rec_len= 19;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 14;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE,
+ trn, NULL, 14 + rec_len,
+ TRANSLOG_INTERNAL_PARTS + 2, parts, NULL,
+ NULL))
+ {
+ fprintf(stderr, "0 Can't write var reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
+ }
+ int4store(long_tr_id, i);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ trn, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+
+ lsn_base= lsn;
+
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9)
+ rec_len= 9;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len;
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
+ trn, NULL, rec_len,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE");
+ if (translog_flush(lsn))
+ {
+ fprintf(stderr, "Can't flush #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "flush");
+ exit(1);
+ }
+ ok(1, "flush");
+ }
+
+ if (translog_flush(translog_get_horizon()))
+ {
+ fprintf(stderr, "Can't flush up to horizon\n");
+ translog_destroy();
+ ok(0, "flush");
+ exit(1);
+ }
+ ok(1, "flush");
+
+ srandom(122334817L);
+
+ rc= 1;
+
+ {
+ int len= translog_read_record_header(first_lsn, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "translog_read_record_header failed (%d)\n", errno);
+ goto err;
+ }
+ if (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE || rec.short_trid != 0 ||
+ rec.record_length != 6 || uint4korr(rec.header) != 0 ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF ||
+ first_lsn != rec.lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE "
+ "data read(0)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, "
+ "lsn(%lu,0x%lx)\n",
+ (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length,
+ (uint) uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ read_ok(&rec);
+ translog_free_record_header(&rec);
+ lsn= first_lsn;
+ if (translog_scanner_init(first_lsn, 1, &scanner, 0))
+ {
+ fprintf(stderr, "scanner init failed\n");
+ goto err;
+ }
+ for (i= 1;; i++)
+ {
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ goto err;
+ }
+ if (len == RECHEADER_READ_EOF)
+ {
+ if (i != ITERATIONS)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS);
+ goto err;
+ }
+ break;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ if (rec.type != LOGREC_FIXED_RECORD_1LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 7 || ref != lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_1LSN_EXAMPLE "
+ "data read(%d) "
+ "type: %u strid: %u len: %u"
+ "ref: (%lu,0x%lx) (%lu,0x%lx) "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ LSN_IN_PARTS(ref), LSN_IN_PARTS(lsn),
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ if (rec.type != LOGREC_FIXED_RECORD_2LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 23 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ ((uchar)rec.header[22]) != 0x55 ||
+ ((uchar)rec.header[21]) != 0xAA ||
+ ((uchar)rec.header[20]) != 0x55 ||
+ ((uchar)rec.header[19]) != 0xAA ||
+ ((uchar)rec.header[18]) != 0x55 ||
+ ((uchar)rec.header[17]) != 0xAA ||
+ ((uchar)rec.header[16]) != 0x55 ||
+ ((uchar)rec.header[15]) != 0xAA ||
+ ((uchar)rec.header[14]) != 0x55)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_2LSN_EXAMPLE "
+ "data read(%d) "
+ "type %u, strid %u, len %u, ref1(%lu,0x%lx), "
+ "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ LSN_IN_PARTS(ref1), LSN_IN_PARTS(ref2),
+ (uint) rec.header[14], (uint) rec.header[15],
+ (uint) rec.header[16], (uint) rec.header[17],
+ (uint) rec.header[18], (uint) rec.header[19],
+ (uint) rec.header[20], (uint) rec.header[21],
+ (uint) rec.header[22],
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ }
+ read_ok(&rec);
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header (var) "
+ "failed (%d)\n", i, errno);
+ goto err;
+ }
+ if (len == RECHEADER_READ_EOF)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration (first var) %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12)
+ rec_len= 12;
+ if (rec.type != LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE ||
+ len != 12 || ref != lsn ||
+ check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE))
+ {
+ fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE "
+ "data read(%d)"
+ "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), "
+ "hdr len: %u (%d), "
+ "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n",
+ i, (uint) rec.type,
+ rec.type != LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE,
+ (uint) rec.short_trid,
+ rec.short_trid != (i % 0xFFFF),
+ (ulong) rec.record_length, (ulong) rec_len,
+ rec.record_length != rec_len + LSN_STORE_SIZE,
+ (uint) len,
+ len != 12,
+ LSN_IN_PARTS(ref), LSN_IN_PARTS(rec.lsn),
+ (len != 12 || ref != lsn),
+ check_content(rec.header + LSN_STORE_SIZE,
+ len - LSN_STORE_SIZE));
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE "
+ "in whole rec read lsn(%lu,0x%lx)\n",
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19)
+ rec_len= 19;
+ if (rec.type != LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE * 2 ||
+ len != 19 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ check_content(rec.header + LSN_STORE_SIZE * 2,
+ len - LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE "
+ "data read(%d) "
+ "type %u, strid %u, len %lu != %lu + 14, hdr len: %d, "
+ "ref1(%lu,0x%lx), ref2(%lu,0x%lx), "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ len, LSN_IN_PARTS(ref1), LSN_IN_PARTS(ref2),
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE "
+ "in whole rec read lsn(%lu,0x%lx)\n",
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ }
+ read_ok(&rec);
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ goto err;
+ }
+ if (len == RECHEADER_READ_EOF)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (rec.type != LOGREC_FIXED_RECORD_0LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 6 || uint4korr(rec.header) != i ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE "
+ "data read(%d)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint) uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ lsn= rec.lsn;
+ read_ok(&rec);
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9)
+ rec_len= 9;
+ if (rec.type != LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len ||
+ len != 9 || check_content(rec.header, (uint)len))
+ {
+ fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE "
+ "data read(%d) "
+ "type %u, strid %u, len %lu != %lu, hdr len: %d, "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ len, LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE "
+ "in whole rec read lsn(%lu,0x%lx)\n",
+ LSN_IN_PARTS(rec.lsn));
+ goto err;
+ }
+ read_ok(&rec);
+ translog_free_record_header(&rec);
+ }
+ }
+
+ rc= 0;
+err:
+ if (rc)
+ ok(0, "read record");
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ if (maria_log_remove())
+ exit(1);
+
+ return(test(exit_status()));
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c
new file mode 100644
index 00000000000..d67d33e5cad
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c
@@ -0,0 +1,147 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void translog_example_table_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define LOG_FLAGS 0
+
+static char *first_translog_file= (char*)"maria_log.00000001";
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ uint pagen;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN lsn, first_lsn, theor_lsn;
+ MY_STAT st;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+
+ MY_INIT(argv[0]);
+
+ plan(2);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+ /* be sure that we have no logs in the directory*/
+ if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0)))
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ if (my_stat(first_translog_file, &st, MYF(0)))
+ my_delete(first_translog_file, MYF(0));
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ theor_lsn= translog_first_theoretical_lsn();
+ if (theor_lsn == 1)
+ {
+ fprintf(stderr, "Error reading the first log file.");
+ translog_destroy();
+ exit(1);
+ }
+ if (theor_lsn == LSN_IMPOSSIBLE)
+ {
+ fprintf(stderr, "There is no first log file.");
+ translog_destroy();
+ exit(1);
+ }
+ first_lsn= translog_first_lsn_in_log();
+ if (first_lsn != LSN_IMPOSSIBLE)
+ {
+ fprintf(stderr, "Incorrect first lsn response (%lu,0x%lx).",
+ LSN_IN_PARTS(first_lsn));
+ translog_destroy();
+ exit(1);
+ }
+ ok(1, "Empty log response");
+
+
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ theor_lsn= translog_first_theoretical_lsn();
+ if (theor_lsn == 1)
+ {
+ fprintf(stderr, "Error reading the first log file\n");
+ translog_destroy();
+ exit(1);
+ }
+ if (theor_lsn == LSN_IMPOSSIBLE)
+ {
+ fprintf(stderr, "There is no first log file\n");
+ translog_destroy();
+ exit(1);
+ }
+ first_lsn= translog_first_lsn_in_log();
+ if (first_lsn != theor_lsn)
+ {
+ fprintf(stderr, "Incorrect first lsn: (%lu,0x%lx) "
+ " theoretical first: (%lu,0x%lx)\n",
+ LSN_IN_PARTS(first_lsn), LSN_IN_PARTS(theor_lsn));
+ translog_destroy();
+ exit(1);
+ }
+
+ ok(1, "Full log response");
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+ exit(0);
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c
new file mode 100644
index 00000000000..c971c23d878
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c
@@ -0,0 +1,140 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void translog_example_table_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (8*1024L*1024L)
+#define LOG_FLAGS 0
+
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ ulong i;
+ uint pagen;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN lsn, max_lsn, last_lsn= LSN_IMPOSSIBLE;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+
+ MY_INIT(argv[0]);
+
+ plan(2);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ max_lsn= translog_get_file_max_lsn_stored(1);
+ if (max_lsn == 1)
+ {
+ fprintf(stderr, "Error reading the first log file.");
+ translog_destroy();
+ exit(1);
+ }
+ if (max_lsn != LSN_IMPOSSIBLE)
+ {
+ fprintf(stderr, "Incorrect first lsn response (%lu,0x%lx).",
+ LSN_IN_PARTS(max_lsn));
+ translog_destroy();
+ exit(1);
+ }
+ ok(1, "Empty log response");
+
+
+ /* write more then 1 file */
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ for(i= 0; i < LOG_FILE_SIZE/6; i++)
+ {
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+ if (LSN_FILE_NO(lsn) == 1)
+ last_lsn= lsn;
+ }
+
+
+ max_lsn= translog_get_file_max_lsn_stored(1);
+ if (max_lsn == 1)
+ {
+ fprintf(stderr, "Error reading the first log file\n");
+ translog_destroy();
+ exit(1);
+ }
+ if (max_lsn == LSN_IMPOSSIBLE)
+ {
+ fprintf(stderr, "Isn't first file still finished?!!\n");
+ translog_destroy();
+ exit(1);
+ }
+ if (max_lsn != last_lsn)
+ {
+ fprintf(stderr, "Incorrect max lsn: (%lu,0x%lx) "
+ " last lsn on first file: (%lu,0x%lx)\n",
+ LSN_IN_PARTS(max_lsn), LSN_IN_PARTS(last_lsn));
+ translog_destroy();
+ exit(1);
+ }
+
+ ok(1, "First file max LSN");
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+ exit(0);
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
new file mode 100644
index 00000000000..f44b85598ae
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
@@ -0,0 +1,650 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void translog_example_table_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+static TRN *trn= &dummy_transaction_object;
+
+
+#ifndef READONLY_TEST
+
+#define PCACHE_SIZE (1024*1024*10)
+#define LONG_BUFFER_SIZE ((1024L*1024L*1024L) + (1024L*1024L*512))
+#define MIN_REC_LENGTH (1024L*1024L + 1024L*512L + 1)
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define ITERATIONS 2
+#define READONLY 0
+
+#else
+
+#define PCACHE_SIZE (1024*1024*10)
+#define LONG_BUFFER_SIZE (1024L*1024L)
+#define MIN_REC_LENGTH (1024L)
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define ITERATIONS 2
+#define READONLY 1
+
+#endif /*READONLY_TEST*/
+
+
+/*
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+*/
+/*
+#define LOG_FILE_SIZE 1024L*1024L*100L
+#define ITERATIONS 65000
+*/
+
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(uchar *ptr, ulong length)
+{
+ ulong i;
+ uchar buff[4];
+ DBUG_ENTER("check_content");
+ for (i= 0; i < length; i++)
+ {
+ if (i % 4 == 0)
+ int4store(buff, (i >> 2));
+ if (ptr[i] != buff[i % 4])
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) buff[i % 4]);
+ DBUG_DUMP("mem", ptr +(ulong) (i > 16 ? i - 16 : 0),
+ (i > 16 ? 16 : i) + (i + 16 < length ? 16 : length - i));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ uchar *buffer, uint skip)
+{
+ int res= 0;
+ translog_size_t len;
+ DBUG_ENTER("read_and_check_content");
+ DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2);
+ if ((len= translog_read_record(rec->lsn, 0, rec->record_length,
+ buffer, NULL)) != rec->record_length)
+ {
+ fprintf(stderr, "Requested %lu byte, read %lu\n",
+ (ulong) rec->record_length, (ulong) len);
+ res= 1;
+ }
+ res|= check_content(buffer + skip, rec->record_length - skip);
+ DBUG_RETURN(res);
+}
+
+
+static uint32 get_len()
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH;
+ } while (rec_len >= LONG_BUFFER_SIZE);
+ return rec_len;
+}
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ uint32 i;
+ uint32 rec_len;
+ uint pagen;
+ uchar long_tr_id[6];
+ uchar lsn_buff[23]=
+ {
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
+ };
+ uchar *long_buffer= malloc(LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2);
+ PAGECACHE pagecache;
+ LSN lsn, lsn_base, first_lsn;
+ TRANSLOG_HEADER_BUFFER rec;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 2];
+ struct st_translog_scanner_data scanner;
+ int rc;
+
+ MY_INIT(argv[0]);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+
+ {
+ uchar buff[4];
+ for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i++)
+ {
+ if (i % 4 == 0)
+ int4store(buff, (i >> 2));
+ long_buffer[i]= buff[i % 4];
+ }
+ }
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ 0, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ plan(((ITERATIONS - 1) * 4 + 1) * 2);
+
+ srandom(122334817L);
+
+ long_tr_id[5]= 0xff;
+
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ trn->short_id= 0;
+ trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
+ if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, parts,
+ NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+ lsn_base= first_lsn= lsn;
+
+ for (i= 1; i < ITERATIONS; i++)
+ {
+ if (i % 2)
+ {
+ lsn_store(lsn_buff, lsn_base);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
+ trn->short_id= i % 0xFFFF;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_1LSN_EXAMPLE, trn, NULL,
+ LSN_STORE_SIZE, TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "1 Can't write reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
+ lsn_store(lsn_buff, lsn_base);
+ rec_len= get_len();
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
+ trn->short_id= i % 0xFFFF;
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE,
+ trn, NULL, LSN_STORE_SIZE + rec_len,
+ TRANSLOG_INTERNAL_PARTS + 2,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "1 Can't write var reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE");
+ }
+ else
+ {
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].length= 23;
+ trn->short_id= i % 0xFFFF;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_2LSN_EXAMPLE,
+ trn, NULL, 23, TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "0 Can't write reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE");
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ rec_len= get_len();
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE * 2;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
+ trn->short_id= i % 0xFFFF;
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE,
+ trn, NULL, LSN_STORE_SIZE * 2 + rec_len,
+ TRANSLOG_INTERNAL_PARTS + 2,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "0 Can't write var reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
+ }
+ int4store(long_tr_id, i);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ trn->short_id= i % 0xFFFF;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ trn, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
+
+ lsn_base= lsn;
+
+ rec_len= get_len();
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len;
+ trn->short_id= i % 0xFFFF;
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
+ trn, NULL, rec_len,
+ TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE");
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "pass2: Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, 0)) == 0)
+ {
+ fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ 0, READONLY, &translog_example_table_init))
+ {
+ fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+
+ srandom(122334817L);
+
+ rc= 1;
+
+ {
+ int len= translog_read_record_header(first_lsn, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "translog_read_record_header failed (%d)\n", errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE || rec.short_trid != 0 ||
+ rec.record_length != 6 || uint4korr(rec.header) != 0 ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF ||
+ first_lsn != rec.lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE "
+ "data read(0)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, "
+ "lsn(0x%lu,0x%lx)\n",
+ (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length,
+ (uint)uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+ lsn= first_lsn;
+ if (translog_scanner_init(first_lsn, 1, &scanner, 0))
+ {
+ fprintf(stderr, "scanner init failed\n");
+ goto err;
+ }
+ for (i= 1;; i++)
+ {
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (len == RECHEADER_READ_EOF)
+ {
+ if (i != ITERATIONS)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ break;
+ }
+
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ if (rec.type != LOGREC_FIXED_RECORD_1LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != LSN_STORE_SIZE || ref != lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_1LSN_EXAMPLE "
+ "data read(%d)"
+ "type %u, strid %u, len %u, ref(%lu,0x%lx), lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ LSN_IN_PARTS(ref), LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ if (rec.type != LOGREC_FIXED_RECORD_2LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 23 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ ((uchar)rec.header[22]) != 0x55 ||
+ ((uchar)rec.header[21]) != 0xAA ||
+ ((uchar)rec.header[20]) != 0x55 ||
+ ((uchar)rec.header[19]) != 0xAA ||
+ ((uchar)rec.header[18]) != 0x55 ||
+ ((uchar)rec.header[17]) != 0xAA ||
+ ((uchar)rec.header[16]) != 0x55 ||
+ ((uchar)rec.header[15]) != 0xAA ||
+ ((uchar)rec.header[14]) != 0x55)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_2LSN_EXAMPLE "
+ "data read(%d) "
+ "type %u, strid %u, len %u, ref1(%lu,0x%lx), "
+ "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ LSN_IN_PARTS(ref1), LSN_IN_PARTS(ref2),
+ (uint) rec.header[14], (uint) rec.header[15],
+ (uint) rec.header[16], (uint) rec.header[17],
+ (uint) rec.header[18], (uint) rec.header[19],
+ (uint) rec.header[20], (uint) rec.header[21],
+ (uint) rec.header[22],
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header (var) "
+ "failed (%d)\n", i, errno);
+ goto err;
+ }
+ if (len == RECHEADER_READ_EOF)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration (first var) %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ rec_len= get_len();
+ if (rec.type !=LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE ||
+ len != 12 || ref != lsn ||
+ check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE))
+ {
+ fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE "
+ "data read(%d)"
+ "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), "
+ "hdr len: %d (%d), "
+ "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n",
+ i, (uint) rec.type,
+ rec.type !=LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE,
+ (uint) rec.short_trid,
+ rec.short_trid != (i % 0xFFFF),
+ (ulong) rec.record_length, (ulong) rec_len,
+ rec.record_length != rec_len + LSN_STORE_SIZE,
+ len,
+ len != 12,
+ LSN_IN_PARTS(ref), LSN_IN_PARTS(rec.lsn),
+ (ref != lsn),
+ check_content(rec.header + LSN_STORE_SIZE,
+ len - LSN_STORE_SIZE));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE "
+ "in whole rec read lsn(%lu,0x%lx)\n",
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ rec_len= get_len();
+ if (rec.type != LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE * 2 ||
+ len != 19 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ check_content(rec.header + LSN_STORE_SIZE * 2,
+ len - LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE "
+ " data read(%d) "
+ "type %u, strid %u, len %lu != %lu + 14, hdr len: %d, "
+ "ref1(%lu,0x%lx), ref2(%lu,0x%lx), "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ len,
+ LSN_IN_PARTS(ref1), LSN_IN_PARTS(ref2),
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE "
+ "in whole rec read lsn(%lu,0x%lx)\n",
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (len == RECHEADER_READ_EOF)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.type != LOGREC_FIXED_RECORD_0LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 6 || uint4korr(rec.header) != i ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE "
+ "data read(%d)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint)uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ lsn= rec.lsn;
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ rec_len= get_len();
+ if (rec.type != LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len ||
+ len != 9 || check_content(rec.header, len))
+ {
+ fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE "
+ "data read(%d) "
+ "type %u, strid %u, len %lu != %lu, hdr len: %d, "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ len, LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE "
+ "in whole rec read lsn(%lu,0x%lx)\n",
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+ }
+ }
+
+ rc= 0;
+err:
+ if (rc)
+ ok(0, "read record");
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+
+ return (test(exit_status()));
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
new file mode 100644
index 00000000000..54c1d82be8f
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
@@ -0,0 +1,479 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void translog_example_table_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+
+/*#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC */
+#define LOG_FLAGS 0
+/*#define LONG_BUFFER_SIZE (1024L*1024L*1024L + 1024L*1024L*512)*/
+#define LONG_BUFFER_SIZE (1024L*1024L*1024L)
+#define MIN_REC_LENGTH 30
+#define SHOW_DIVIDER 10
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define ITERATIONS 3
+#define WRITERS 3
+static uint number_of_writers= WRITERS;
+
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count;
+
+static ulong lens[WRITERS][ITERATIONS];
+static LSN lsns1[WRITERS][ITERATIONS];
+static LSN lsns2[WRITERS][ITERATIONS];
+static uchar *long_buffer;
+
+/*
+ Get pseudo-random length of the field in
+ limits [MIN_REC_LENGTH..LONG_BUFFER_SIZE]
+
+ SYNOPSIS
+ get_len()
+
+ RETURN
+ length - length >= 0 length <= LONG_BUFFER_SIZE
+*/
+
+static uint32 get_len()
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH;
+ } while (rec_len >= LONG_BUFFER_SIZE);
+ return rec_len;
+}
+
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(uchar *ptr, ulong length)
+{
+ ulong i;
+ for (i= 0; i < length; i++)
+ {
+ if (((uchar)ptr[i]) != (i & 0xFF))
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) (i & 0xFF));
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ uchar *buffer, uint skip)
+{
+ int res= 0;
+ translog_size_t len;
+
+ if ((len= translog_read_record(rec->lsn, 0, rec->record_length,
+ buffer, NULL)) != rec->record_length)
+ {
+ fprintf(stderr, "Requested %lu byte, read %lu\n",
+ (ulong) rec->record_length, (ulong) len);
+ res= 1;
+ }
+ res|= check_content(buffer + skip, rec->record_length - skip);
+ return(res);
+}
+
+void writer(int num)
+{
+ LSN lsn;
+ TRN trn;
+ uchar long_tr_id[6];
+ uint i;
+
+ trn.short_id= num;
+ trn.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
+ for (i= 0; i < ITERATIONS; i++)
+ {
+ uint len= get_len();
+ lens[num][i]= len;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+
+ int2store(long_tr_id, num);
+ int4store(long_tr_id + 2, i);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write LOGREC_FIXED_RECORD_0LSN_EXAMPLE record #%lu "
+ "thread %i\n", (ulong) i, num);
+ translog_destroy();
+ pthread_mutex_lock(&LOCK_thread_count);
+ ok(0, "write records");
+ pthread_mutex_unlock(&LOCK_thread_count);
+ return;
+ }
+ lsns1[num][i]= lsn;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= len;
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
+ &trn, NULL,
+ len, TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ pthread_mutex_lock(&LOCK_thread_count);
+ ok(0, "write records");
+ pthread_mutex_unlock(&LOCK_thread_count);
+ return;
+ }
+ lsns2[num][i]= lsn;
+ pthread_mutex_lock(&LOCK_thread_count);
+ ok(1, "write records");
+ pthread_mutex_unlock(&LOCK_thread_count);
+ }
+ return;
+}
+
+
+static void *test_thread_writer(void *arg)
+{
+ int param= *((int*) arg);
+
+ my_thread_init();
+
+ writer(param);
+
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ ok(1, "writer finished"); /* just to show progress */
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are
+ ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((uchar*) arg);
+ my_thread_end();
+ return(0);
+}
+
+
+int main(int argc __attribute__((unused)),
+ char **argv __attribute__ ((unused)))
+{
+ uint32 i;
+ uint pagen;
+ PAGECACHE pagecache;
+ LSN first_lsn;
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ pthread_t tid;
+ pthread_attr_t thr_attr;
+ int *param, error;
+ int rc;
+
+ plan(WRITERS + ITERATIONS * WRITERS * 3);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2);
+ if (long_buffer == 0)
+ {
+ fprintf(stderr, "End of memory\n");
+ exit(1);
+ }
+ for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++)
+ long_buffer[i]= (i & 0xFF);
+
+ MY_INIT(argv[0]);
+ if (maria_log_remove())
+ exit(1);
+
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+
+ if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+ {
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_init(&thr_attr)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_attr_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+ {
+ fprintf(stderr,
+ "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+ VOID(thr_setconcurrency(2));
+#endif
+
+ my_thread_global_init();
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ srandom(122334817L);
+ {
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+ uchar long_tr_id[6]=
+ {
+ 0x11, 0x22, 0x33, 0x44, 0x55, 0x66
+ };
+
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
+ if (translog_write_record(&first_lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write the first record\n");
+ translog_destroy();
+ exit(1);
+ }
+ }
+
+
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+
+ while (number_of_writers != 0)
+ {
+ param= (int*) malloc(sizeof(int));
+ *param= number_of_writers - 1;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread_writer,
+ (void*) param)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_create (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+ thread_count++;
+ number_of_writers--;
+ }
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ pthread_attr_destroy(&thr_attr);
+
+ /* wait finishing */
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock\n", error);
+ while (thread_count)
+ {
+ if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count)))
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_wait\n", error);
+ }
+ if ((error= pthread_mutex_unlock(&LOCK_thread_count)))
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_unlock\n", error);
+
+ /* Find last LSN and flush up to it (all our log) */
+ {
+ LSN max= 0;
+ for (i= 0; i < WRITERS; i++)
+ {
+ if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0)
+ max= lsns2[i][ITERATIONS - 1];
+ }
+ translog_flush(max);
+ }
+
+ rc= 1;
+
+ {
+ uint indeces[WRITERS];
+ uint index, stage;
+ int len;
+ bzero(indeces, sizeof(uint) * WRITERS);
+
+ bzero(indeces, sizeof(indeces));
+
+ if (translog_scanner_init(first_lsn, 1, &scanner, 0))
+ {
+ fprintf(stderr, "scanner init failed\n");
+ goto err;
+ }
+ for (i= 0;; i++)
+ {
+ len= translog_read_next_record_header(&scanner, &rec);
+
+ if (len == RECHEADER_READ_ERROR)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (len == RECHEADER_READ_EOF)
+ {
+ if (i != WRITERS * ITERATIONS * 2)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS * WRITERS * 2);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ break;
+ }
+ index= indeces[rec.short_trid] / 2;
+ stage= indeces[rec.short_trid] % 2;
+ if (stage == 0)
+ {
+ if (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE ||
+ rec.record_length != 6 ||
+ uint2korr(rec.header) != rec.short_trid ||
+ index != uint4korr(rec.header + 2) ||
+ cmp_translog_addr(lsns1[rec.short_trid][index], rec.lsn) != 0)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE "
+ "data read(%d)\n"
+ "type %u, strid %u %u, len %u, i: %u %u, "
+ "lsn(%lu,0x%lx) (%lu,0x%lx)\n",
+ i, (uint) rec.type,
+ (uint) rec.short_trid, (uint) uint2korr(rec.header),
+ (uint) rec.record_length,
+ (uint) index, (uint) uint4korr(rec.header + 2),
+ LSN_IN_PARTS(rec.lsn),
+ LSN_IN_PARTS(lsns1[rec.short_trid][index]));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ if (rec.type != LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE ||
+ len != 9 ||
+ rec.record_length != lens[rec.short_trid][index] ||
+ cmp_translog_addr(lsns2[rec.short_trid][index], rec.lsn) != 0 ||
+ check_content(rec.header, (uint)len))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE "
+ "data read(%d) "
+ "thread: %d, iteration %d, stage %d\n"
+ "type %u (%d), len %d, length %lu %lu (%d) "
+ "lsn(%lu,0x%lx) (%lu,0x%lx)\n",
+ i, (uint) rec.short_trid, index, stage,
+ (uint) rec.type, (rec.type !=
+ LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE),
+ len,
+ (ulong) rec.record_length, lens[rec.short_trid][index],
+ (rec.record_length != lens[rec.short_trid][index]),
+ LSN_IN_PARTS(rec.lsn),
+ LSN_IN_PARTS(lsns2[rec.short_trid][index]));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE "
+ "in whole rec read lsn(%lu,0x%lx)\n",
+ LSN_IN_PARTS(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ ok(1, "record read");
+ translog_free_record_header(&rec);
+ indeces[rec.short_trid]++;
+ }
+ }
+
+ rc= 0;
+err:
+ if (rc)
+ ok(0, "record read");
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+
+ return(exit_status());
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_noflush-t.c b/storage/maria/unittest/ma_test_loghandler_noflush-t.c
new file mode 100644
index 00000000000..60483c3debc
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_noflush-t.c
@@ -0,0 +1,132 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void translog_example_table_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define LOG_FLAGS 0
+
+static char *first_translog_file= (char*)"maria_log.00000001";
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ uint pagen;
+ int rc= 1;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN first_lsn;
+ MY_STAT st;
+ TRANSLOG_HEADER_BUFFER rec;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+
+ MY_INIT(argv[0]);
+
+ plan(1);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+ /* be sure that we have no logs in the directory*/
+ if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0)))
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ if (my_stat(first_translog_file, &st, MYF(0)))
+ my_delete(first_translog_file, MYF(0));
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ int4store(long_tr_id, 0);
+ long_tr_id[5]= 0xff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&first_lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ translog_size_t len= translog_read_record_header(first_lsn, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "translog_read_record_header failed (%d)\n", errno);
+ goto err;
+ }
+ if (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE || rec.short_trid != 0 ||
+ rec.record_length != 6 || uint4korr(rec.header) != 0 ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF ||
+ first_lsn != rec.lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE "
+ "data read(0)\n"
+ "type: %u (%d) strid: %u (%d) len: %u (%d) i: %u (%d), "
+ "4: %u (%d) 5: %u (%d) "
+ "lsn(%lu,0x%lx) (%d)\n",
+ (uint) rec.type, (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE),
+ (uint) rec.short_trid, (rec.short_trid != 0),
+ (uint) rec.record_length, (rec.record_length != 6),
+ (uint) uint4korr(rec.header), (uint4korr(rec.header) != 0),
+ (uint) rec.header[4], (((uchar)rec.header[4]) != 0),
+ (uint) rec.header[5], (((uchar)rec.header[5]) != 0xFF),
+ LSN_IN_PARTS(rec.lsn), (first_lsn != rec.lsn));
+ goto err;
+ }
+
+ ok(1, "read OK");
+ rc= 0;
+
+err:
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+
+ exit(rc);
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_nologs-t.c b/storage/maria/unittest/ma_test_loghandler_nologs-t.c
new file mode 100644
index 00000000000..b0dc08e6894
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_nologs-t.c
@@ -0,0 +1,179 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void example_loghandler_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (8*1024L*1024L)
+#define LOG_FLAGS 0
+#define LONG_BUFFER_SIZE (LOG_FILE_SIZE + LOG_FILE_SIZE / 2)
+
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ ulong i;
+ uint pagen;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN lsn;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+ uchar *long_buffer= malloc(LONG_BUFFER_SIZE);
+
+ MY_INIT(argv[0]);
+
+ plan(2);
+
+ bzero(&pagecache, sizeof(pagecache));
+ bzero(long_buffer, LONG_BUFFER_SIZE);
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ /* write more then 1 file */
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ for(i= 0; i < LOG_FILE_SIZE/6 && LSN_FILE_NO(lsn) == 1; i++)
+ {
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ {
+ MY_STAT stat_buff;
+ char file_name[FN_REFLEN];
+ for (i= 1; i <= 2; i++)
+ {
+ translog_filename_by_fileno(i, file_name);
+ if (my_stat(file_name, &stat_buff, MY_WME) == NULL)
+ {
+ fprintf(stderr, "No file '%s'\n", file_name);
+ exit(1);
+ }
+ if (my_delete(file_name, MYF(MY_WME)) != 0)
+ {
+ fprintf(stderr, "Error %d during removing file'%s'\n",
+ errno, file_name);
+ exit(1);
+ }
+ }
+ }
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ ok(1, "Log init OK");
+
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ if (!translog_is_file(3))
+ {
+ fprintf(stderr, "No file #3\n");
+ exit(1);
+ }
+
+ ok(1, "New log is OK");
+
+ if (maria_log_remove())
+ exit(1);
+ exit(0);
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
new file mode 100644
index 00000000000..8bc5cd5a45b
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
@@ -0,0 +1,186 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void translog_example_table_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define LOG_FLAGS 0
+
+static char *first_translog_file= (char*)"maria_log.00000001";
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+
+
+/**
+ @brief Dummy pagecache callback.
+*/
+
+static my_bool
+dummy_callback(uchar *page __attribute__((unused)),
+ pgcache_page_no_t page_no __attribute__((unused)),
+ uchar* data_ptr __attribute__((unused)))
+{
+ return 0;
+}
+
+
+/**
+ @brief Dummy pagecache callback.
+*/
+
+static void
+dummy_fail_callback(uchar* data_ptr __attribute__((unused)))
+{
+ return;
+}
+
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ uint pagen;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN lsn;
+ MY_STAT st, *stat;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+
+ MY_INIT(argv[0]);
+
+ plan(1);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+ /* be sure that we have no logs in the directory*/
+ if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0)))
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ if (my_stat(first_translog_file, &st, MYF(0)))
+ my_delete(first_translog_file, MYF(0));
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler_pagecache.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler_pagecache.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0)
+ {
+ fprintf(stderr, "There is no %s (%d)\n", first_translog_file, errno);
+ exit(1);
+ }
+ if (st.st_size != TRANSLOG_PAGE_SIZE)
+ {
+ fprintf(stderr,
+ "incorrect initial size of %s: %ld instead of %ld\n",
+ first_translog_file, (long)st.st_size, (long)TRANSLOG_PAGE_SIZE);
+ exit(1);
+ }
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ if ((file1.file= my_open(file1_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ pagecache_file_init(file1, &dummy_callback, &dummy_callback,
+ &dummy_fail_callback, NULL);
+ if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0)
+ {
+ fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+
+ {
+ uchar page[PCACHE_PAGE];
+
+ bzero(page, PCACHE_PAGE);
+#define PAGE_LSN_OFFSET 0
+ lsn_store(page + PAGE_LSN_OFFSET, lsn);
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)page,
+ PAGECACHE_LSN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0, LSN_IMPOSSIBLE);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ }
+ if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0)
+ {
+ fprintf(stderr, "can't stat %s (%d)\n", first_translog_file, errno);
+ exit(1);
+ }
+ if (st.st_size != TRANSLOG_PAGE_SIZE * 2)
+ {
+ fprintf(stderr,
+ "incorrect initial size of %s: %ld instead of %ld\n",
+ first_translog_file,
+ (long)st.st_size, (long)(TRANSLOG_PAGE_SIZE * 2));
+ ok(0, "log triggered");
+ exit(1);
+ }
+ ok(1, "log triggered");
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ my_delete(first_translog_file, MYF(0));
+ my_delete(file1_name, MYF(0));
+
+ exit(0);
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_purge-t.c b/storage/maria/unittest/ma_test_loghandler_purge-t.c
new file mode 100644
index 00000000000..ac6fd33e327
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_purge-t.c
@@ -0,0 +1,176 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+#include "../trnman.h"
+
+extern my_bool maria_log_remove();
+extern void translog_example_table_init();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (8*1024L*1024L)
+#define LOG_FLAGS 0
+#define LONG_BUFFER_SIZE (LOG_FILE_SIZE + LOG_FILE_SIZE / 2)
+
+
+int main(int argc __attribute__((unused)), char *argv[])
+{
+ ulong i;
+ uint pagen;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN lsn;
+ LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1];
+ uchar *long_buffer= malloc(LONG_BUFFER_SIZE);
+
+ MY_INIT(argv[0]);
+
+ plan(4);
+
+ bzero(&pagecache, sizeof(pagecache));
+ bzero(long_buffer, LONG_BUFFER_SIZE);
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open(TRUE))
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE, 0)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ exit(1);
+ }
+ /* Suppressing of automatic record writing */
+ dummy_transaction_object.first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID;
+
+ /* write more then 1 file */
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ translog_purge(lsn);
+ if (!translog_is_file(1))
+ {
+ fprintf(stderr, "First file was removed after first record\n");
+ translog_destroy();
+ exit(1);
+ }
+ ok(1, "First is not removed");
+
+ for(i= 0; i < LOG_FILE_SIZE/6 && LSN_FILE_NO(lsn) == 1; i++)
+ {
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+ }
+
+ translog_purge(lsn);
+ if (translog_is_file(1))
+ {
+ fprintf(stderr, "First file was not removed.\n");
+ translog_destroy();
+ exit(1);
+ }
+
+ ok(1, "First file is removed");
+
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= LONG_BUFFER_SIZE;
+ if (translog_write_record(&lsn,
+ LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, LONG_BUFFER_SIZE,
+ TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write variable record\n");
+ translog_destroy();
+ exit(1);
+ }
+
+ translog_purge(lsn);
+ if (!translog_is_file(2) || !translog_is_file(3))
+ {
+ fprintf(stderr, "Second file (%d) or third file (%d) is not present.\n",
+ translog_is_file(2), translog_is_file(3));
+ translog_destroy();
+ exit(1);
+ }
+
+ ok(1, "Second and third files are not removed");
+
+ int4store(long_tr_id, 0);
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
+ if (translog_write_record(&lsn,
+ LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
+ &dummy_transaction_object, NULL, 6,
+ TRANSLOG_INTERNAL_PARTS + 1,
+ parts, NULL, NULL))
+ {
+ fprintf(stderr, "Can't write last record\n");
+ translog_destroy();
+ exit(1);
+ }
+
+ translog_purge(lsn);
+ if (translog_is_file(2))
+ {
+ fprintf(stderr, "Second file is not removed\n");
+ translog_destroy();
+ exit(1);
+ }
+
+ ok(1, "Second file is removed");
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+ exit(0);
+}
diff --git a/storage/maria/unittest/test_file.c b/storage/maria/unittest/test_file.c
new file mode 100644
index 00000000000..215fd54a819
--- /dev/null
+++ b/storage/maria/unittest/test_file.c
@@ -0,0 +1,77 @@
+#include <tap.h>
+#include <my_sys.h>
+#include <my_dir.h>
+#include "test_file.h"
+
+
+/*
+ Check that file contance correspond to descriptor
+
+ SYNOPSIS
+ test_file()
+ file File to test
+ file_name Path (and name) of file which is tested
+ size size of file
+ buff_size size of buffer which is enought to check the file
+ desc file descriptor to check with
+
+ RETURN
+ 1 file if OK
+ 0 error
+*/
+
+int test_file(PAGECACHE_FILE file, char *file_name,
+ off_t size, size_t buff_size, struct file_desc *desc)
+{
+ MY_STAT stat_buff, *stat;
+ unsigned char *buffr= my_malloc(buff_size, MYF(0));
+ off_t pos= 0;
+ size_t byte;
+ int step= 0;
+ int res= 1; /* ok */
+
+ if ((stat= my_stat(file_name, &stat_buff, MYF(0))) == NULL)
+ {
+ diag("Can't stat() %s (errno: %d)\n", file_name, errno);
+ res= 0;
+ goto err;
+ }
+ if (stat->st_size != size)
+ {
+ diag("file %s size is %lu (should be %lu)\n",
+ file_name, (ulong) stat->st_size, (ulong) size);
+ res= 0; /* failed */
+ /* continue to get more information */
+ }
+
+ /* check content */
+ my_seek(file.file, 0, SEEK_SET, MYF(MY_WME));
+ while (desc[step].length != 0)
+ {
+ if (my_read(file.file, (char*)buffr, desc[step].length, MYF(0)) !=
+ desc[step].length)
+ {
+ diag("Can't read %u bytes from %s (file: %d errno: %d)\n",
+ (uint)desc[step].length, file_name, file.file, errno);
+ res= 0;
+ goto err;
+ }
+ for (byte= 0; byte < desc[step].length; byte++)
+ {
+ if (buffr[byte] != desc[step].content)
+ {
+ diag("content of %s mismatch 0x%x in position %lu instead of 0x%x\n",
+ file_name, (uint) buffr[byte], (ulong) (pos + byte),
+ desc[step].content);
+ res= 0;
+ goto err;
+ }
+ }
+ pos+= desc[step].length;
+ step++;
+ }
+
+err:
+ my_free(buffr, 0);
+ return res;
+}
diff --git a/storage/maria/unittest/test_file.h b/storage/maria/unittest/test_file.h
new file mode 100644
index 00000000000..293c692717e
--- /dev/null
+++ b/storage/maria/unittest/test_file.h
@@ -0,0 +1,14 @@
+#include <m_string.h>
+#include "../ma_pagecache.h"
+
+/*
+ File content descriptor
+*/
+struct file_desc
+{
+ unsigned int length;
+ unsigned char content;
+};
+
+int test_file(PAGECACHE_FILE file, char *file_name,
+ off_t size, size_t buff_size, struct file_desc *desc);
diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c
new file mode 100644
index 00000000000..103f76cf776
--- /dev/null
+++ b/storage/maria/unittest/trnman-t.c
@@ -0,0 +1,195 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include <m_string.h>
+#include "../trnman.h"
+
+pthread_mutex_t rt_mutex;
+pthread_attr_t attr;
+size_t stacksize= 0;
+#define STACK_SIZE (((int)stacksize-2048)*STACK_DIRECTION)
+
+int rt_num_threads;
+int litmus;
+
+/*
+ create and end (commit or rollback) transactions randomly
+*/
+#define MAX_ITER 100
+pthread_handler_t test_trnman(void *arg)
+{
+ uint x, y, i, n;
+ TRN *trn[MAX_ITER];
+ pthread_mutex_t mutexes[MAX_ITER];
+ pthread_cond_t conds[MAX_ITER];
+ int m= (*(int *)arg);
+
+ for (i= 0; i < MAX_ITER; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&conds[i], 0);
+ }
+
+ for (x= ((int)(intptr)(&m)); m > 0; )
+ {
+ y= x= (x*LL(3628273133) + LL(1500450271)) % LL(9576890767); /* three prime numbers */
+ m-= n= x % MAX_ITER;
+ for (i= 0; i < n; i++)
+ {
+ trn[i]= trnman_new_trn(&mutexes[i], &conds[i], &m + STACK_SIZE);
+ if (!trn[i])
+ {
+ diag("trnman_new_trn() failed");
+ litmus++;
+ }
+ }
+ for (i= 0; i < n; i++)
+ {
+ y= (y*19 + 7) % 31;
+ trnman_end_trn(trn[i], y & 1);
+ }
+ }
+ for (i= 0; i < MAX_ITER; i++)
+ {
+ pthread_mutex_destroy(&mutexes[i]);
+ pthread_cond_destroy(&conds[i]);
+ }
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+#undef MAX_ITER
+
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Testing %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, &attr, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+#define ok_read_from(T1, T2, RES) \
+ i= trnman_can_read_from(trn[T1], trn[T2]->trid); \
+ ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot")
+#define start_transaction(T) \
+ trn[T]= trnman_new_trn(&mutexes[T], &conds[T], &i + STACK_SIZE)
+#define commit(T) trnman_commit_trn(trn[T])
+#define abort(T) trnman_abort_trn(trn[T])
+
+#define Ntrns 4
+void test_trnman_read_from()
+{
+ TRN *trn[Ntrns];
+ pthread_mutex_t mutexes[Ntrns];
+ pthread_cond_t conds[Ntrns];
+ int i;
+
+ for (i= 0; i < Ntrns; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&conds[i], 0);
+ }
+
+ start_transaction(0); /* start trn1 */
+ start_transaction(1); /* start trn2 */
+ ok_read_from(1, 0, 0);
+ commit(0); /* commit trn1 */
+ start_transaction(2); /* start trn4 */
+ abort(2); /* abort trn4 */
+ start_transaction(3); /* start trn5 */
+ ok_read_from(3, 0, 1);
+ ok_read_from(3, 1, 0);
+ ok_read_from(3, 2, 0);
+ commit(1); /* commit trn2 */
+ ok_read_from(3, 1, 0);
+ commit(3); /* commit trn5 */
+
+ for (i= 0; i < Ntrns; i++)
+ {
+ pthread_mutex_destroy(&mutexes[i]);
+ pthread_cond_destroy(&conds[i]);
+ }
+}
+
+int main(int argc __attribute__((unused)), char **argv)
+{
+ MY_INIT(argv[0]);
+ my_init();
+
+ plan(6);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+ pthread_mutex_init(&rt_mutex, 0);
+ pthread_attr_init(&attr);
+#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE
+ pthread_attr_getstacksize(&attr, &stacksize);
+ if (stacksize == 0)
+#endif
+ stacksize= PTHREAD_STACK_MIN;
+
+#define CYCLES 10000
+#define THREADS 10
+
+ trnman_init(0);
+
+ test_trnman_read_from();
+ run_test("trnman", test_trnman, THREADS, CYCLES);
+
+ diag("mallocs: %d", trnman_allocated_transactions);
+ {
+ ulonglong now= my_getsystime();
+ trnman_destroy();
+ now= my_getsystime()-now;
+ diag("trnman_destroy: %g", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am
index f50c312b8e4..4bd0b177daa 100644
--- a/storage/myisam/Makefile.am
+++ b/storage/myisam/Makefile.am
@@ -97,8 +97,8 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \
mi_delete_table.c mi_rename.c mi_check.c \
mi_keycache.c mi_preload.c \
ft_parser.c ft_stopwords.c ft_static.c \
- ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c \
- ha_myisam.cc \
+ ft_update.c ft_boolean_search.c ft_nlq_search.c \
+ sort.c ha_myisam.cc ft_myisam.c \
rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c
CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY?
diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c
index 5e7a955793f..03d6bdb2cde 100644
--- a/storage/myisam/ft_boolean_search.c
+++ b/storage/myisam/ft_boolean_search.c
@@ -162,7 +162,7 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word DESC, ndepth DESC */
- int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
+ int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
(uchar*) (*a)->word+1,(*a)->len-1,0,0);
if (!i)
i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
@@ -196,7 +196,7 @@ static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
case FT_TOKEN_WORD:
ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
sizeof(FTB_WORD) +
- (info->trunc ? MI_MAX_KEY_BUFF :
+ (info->trunc ? HA_MAX_KEY_BUFF :
word_len * ftb_param->ftb->charset->mbmaxlen +
HA_FT_WLEN +
ftb_param->ftb->info->s->rec_reflength));
@@ -345,7 +345,6 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength;
uchar *lastkey_buf=ftbw->word+ftbw->off;
- LINT_INIT(off);
if (ftbw->flags & FTB_FLAG_TRUNC)
lastkey_buf+=ftbw->len;
@@ -395,7 +394,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
if (!r && !ftbw->off)
{
- r= mi_compare_text(ftb->charset,
+ r= ha_compare_text(ftb->charset,
info->lastkey+1,
info->lastkey_length-extra-1,
(uchar*) ftbw->word+1,
@@ -868,7 +867,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
{
ftbw= ftb->list[c];
- if (mi_compare_text(ftb->charset, (uchar*)word, len,
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word+1, ftbw->len-1,
(my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0)
b= c;
@@ -878,7 +877,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (; c >= 0; c--)
{
ftbw= ftb->list[c];
- if (mi_compare_text(ftb->charset, (uchar*)word, len,
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word + 1,ftbw->len - 1,
(my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
break;
diff --git a/storage/myisam/ft_eval.c b/storage/myisam/ft_eval.c
index 7eb78861e5e..de01510fdd7 100644
--- a/storage/myisam/ft_eval.c
+++ b/storage/myisam/ft_eval.c
@@ -48,7 +48,7 @@ int main(int argc, char *argv[])
recinfo[0].type=FIELD_SKIP_ENDSPACE;
recinfo[0].length=docid_length;
recinfo[1].type=FIELD_BLOB;
- recinfo[1].length= 4+mi_portable_sizeof_char_ptr;
+ recinfo[1].length= 4+portable_sizeof_char_ptr;
/* Define a key over the first column */
keyinfo[0].seg=keyseg;
diff --git a/storage/myisam/ft_myisam.c b/storage/myisam/ft_myisam.c
new file mode 100644
index 00000000000..bef3fbfd5f5
--- /dev/null
+++ b/storage/myisam/ft_myisam.c
@@ -0,0 +1,36 @@
+/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/*
+ This function is for interface functions between fulltext and myisam
+*/
+
+#include "ftdefs.h"
+
+FT_INFO *ft_init_search(uint flags, void *info, uint keynr,
+ uchar *query, uint query_len, CHARSET_INFO *cs,
+ uchar *record)
+{
+ FT_INFO *res;
+ if (flags & FT_BOOL)
+ res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs);
+ else
+ res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags,
+ record);
+ return res;
+}
diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c
index 8df8c2da4eb..eb563638d36 100644
--- a/storage/myisam/ft_nlq_search.c
+++ b/storage/myisam/ft_nlq_search.c
@@ -103,7 +103,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
{
if (keylen &&
- mi_compare_text(aio->charset,info->lastkey+1,
+ ha_compare_text(aio->charset,info->lastkey+1,
info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0))
break;
diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c
index df2423aa50f..042a999fffa 100644
--- a/storage/myisam/ft_parser.c
+++ b/storage/myisam/ft_parser.c
@@ -31,7 +31,7 @@ typedef struct st_my_ft_parser_param
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
- return mi_compare_text(cs, (uchar*) w1->pos, w1->len,
+ return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
(uchar*) w2->pos, w2->len, 0, 0);
}
diff --git a/storage/myisam/ft_static.c b/storage/myisam/ft_static.c
index 610c20eede6..d48bedc9e3b 100644
--- a/storage/myisam/ft_static.c
+++ b/storage/myisam/ft_static.c
@@ -54,20 +54,6 @@ const struct _ft_vft _ft_vft_boolean = {
ft_boolean_get_relevance, ft_boolean_reinit_search
};
-
-FT_INFO *ft_init_search(uint flags, void *info, uint keynr,
- uchar *query, uint query_len, CHARSET_INFO *cs,
- uchar *record)
-{
- FT_INFO *res;
- if (flags & FT_BOOL)
- res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs);
- else
- res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags,
- record);
- return res;
-}
-
const char *ft_stopword_file = 0;
const char *ft_precompiled_stopwords[] = {
diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c
index 59866d9a351..8aefffbee1d 100644
--- a/storage/myisam/ft_stopwords.c
+++ b/storage/myisam/ft_stopwords.c
@@ -29,7 +29,7 @@ static TREE *stopwords3=NULL;
static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
FT_STOPWORD *w1, FT_STOPWORD *w2)
{
- return mi_compare_text(default_charset_info,
+ return ha_compare_text(default_charset_info,
(uchar *)w1->pos,w1->len,
(uchar *)w2->pos,w2->len,0,0);
}
@@ -51,10 +51,11 @@ static int ft_add_stopword(const char *w)
int ft_init_stopwords()
{
+ DBUG_ENTER("ft_init_stopwords");
if (!stopwords3)
{
if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0))))
- return -1;
+ DBUG_RETURN(-1);
init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,
0,
(ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0),
@@ -70,10 +71,10 @@ int ft_init_stopwords()
int error=-1;
if (!*ft_stopword_file)
- return 0;
+ DBUG_RETURN(0);
if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1)
- return -1;
+ DBUG_RETURN(-1);
len=(uint)my_seek(fd, 0L, MY_SEEK_END, MYF(0));
my_seek(fd, 0L, MY_SEEK_SET, MYF(0));
if (!(start=buffer=my_malloc(len+1, MYF(MY_WME))))
@@ -90,7 +91,7 @@ err1:
my_free(buffer, MYF(0));
err0:
my_close(fd, MYF(MY_WME));
- return error;
+ DBUG_RETURN(error);
}
else
{
@@ -100,13 +101,14 @@ err0:
for (;*sws;sws++)
{
if (ft_add_stopword(*sws))
- return -1;
+ DBUG_RETURN(-1);
}
ft_stopword_file="(built-in)"; /* for SHOW VARIABLES */
}
- return 0;
+ DBUG_RETURN(0);
}
+
int is_stopword(char *word, uint len)
{
FT_STOPWORD sw;
@@ -118,6 +120,8 @@ int is_stopword(char *word, uint len)
void ft_free_stopwords()
{
+ DBUG_ENTER("ft_free_stopwords");
+
if (stopwords3)
{
delete_tree(stopwords3); /* purecov: inspected */
@@ -125,4 +129,5 @@ void ft_free_stopwords()
stopwords3=0;
}
ft_stopword_file= 0;
+ DBUG_VOID_RETURN;
}
diff --git a/storage/myisam/ft_test1.c b/storage/myisam/ft_test1.c
index e49c47bb268..b37935a0d7a 100644
--- a/storage/myisam/ft_test1.c
+++ b/storage/myisam/ft_test1.c
@@ -75,12 +75,12 @@ static int run_test(const char *filename)
/* First define 2 columns */
recinfo[0].type=extra_field;
- recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr :
+ recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr :
extra_length);
if (extra_field == FIELD_VARCHAR)
recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length);
recinfo[1].type=key_field;
- recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr :
+ recinfo[1].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr :
key_length);
if (key_field == FIELD_VARCHAR)
recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);
diff --git a/storage/myisam/ft_update.c b/storage/myisam/ft_update.c
index e3e4c62158f..d1548e32870 100644
--- a/storage/myisam/ft_update.c
+++ b/storage/myisam/ft_update.c
@@ -180,7 +180,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const uchar *rec1, const uchar *rec2)
{
if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos ||
- mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
+ ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
(uchar*) ftsi2.pos,ftsi2.len,0,0)))
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
}
@@ -209,7 +209,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf,
error=0;
while(old_word->pos && new_word->pos)
{
- cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len,
+ cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
(uchar*) new_word->pos,new_word->len,0,0);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
diff --git a/storage/myisam/fulltext.h b/storage/myisam/fulltext.h
index 856e93e034d..9aef2d0d002 100644
--- a/storage/myisam/fulltext.h
+++ b/storage/myisam/fulltext.h
@@ -20,18 +20,8 @@
#include "myisamdef.h"
#include "ft_global.h"
-#define HA_FT_WTYPE HA_KEYTYPE_FLOAT
-#define HA_FT_WLEN 4
-#define FT_SEGS 2
-
-#define ft_sintXkorr(A) mi_sint4korr(A)
-#define ft_intXstore(T,A) mi_int4store(T,A)
-
-extern const HA_KEYSEG ft_keysegs[FT_SEGS];
-
int _mi_ft_cmp(MI_INFO *, uint, const uchar *, const uchar *);
int _mi_ft_add(MI_INFO *, uint, uchar *, const uchar *, my_off_t);
int _mi_ft_del(MI_INFO *, uint, uchar *, const uchar *, my_off_t);
uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *);
-
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index e74464a1834..70116b80c1f 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -22,6 +22,7 @@
#include "mysql_priv.h"
#include <mysql/plugin.h>
#include <m_ctype.h>
+#include <my_bit.h>
#include <myisampack.h>
#include "ha_myisam.h"
#include <stdarg.h>
@@ -56,7 +57,7 @@ static handler *myisam_create_handler(handlerton *hton,
// collect errors printed by mi_check routines
-static void mi_check_print_msg(MI_CHECK *param, const char* msg_type,
+static void mi_check_print_msg(HA_CHECK *param, const char* msg_type,
const char *fmt, va_list args)
{
THD* thd = (THD*)param->thd;
@@ -255,30 +256,31 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d",
(long) found, recpos, minpos, length));
if (recpos != minpos)
- { // Reserved space (Null bits?)
+ {
+ /* reserve space for null bits */
bzero((char*) recinfo_pos, sizeof(*recinfo_pos));
- recinfo_pos->type= (int) FIELD_NORMAL;
+ recinfo_pos->type= FIELD_NORMAL;
recinfo_pos++->length= (uint16) (minpos - recpos);
}
if (!found)
break;
if (found->flags & BLOB_FLAG)
- recinfo_pos->type= (int) FIELD_BLOB;
+ recinfo_pos->type= FIELD_BLOB;
else if (found->type() == MYSQL_TYPE_VARCHAR)
recinfo_pos->type= FIELD_VARCHAR;
else if (!(options & HA_OPTION_PACK_RECORD))
- recinfo_pos->type= (int) FIELD_NORMAL;
+ recinfo_pos->type= FIELD_NORMAL;
else if (found->zero_pack())
- recinfo_pos->type= (int) FIELD_SKIP_ZERO;
+ recinfo_pos->type= FIELD_SKIP_ZERO;
else
- recinfo_pos->type= (int) ((length <= 3 ||
- (found->flags & ZEROFILL_FLAG)) ?
- FIELD_NORMAL :
- found->type() == MYSQL_TYPE_STRING ||
- found->type() == MYSQL_TYPE_VAR_STRING ?
- FIELD_SKIP_ENDSPACE :
- FIELD_SKIP_PRESPACE);
+ recinfo_pos->type= ((length <= 3 ||
+ (found->flags & ZEROFILL_FLAG)) ?
+ FIELD_NORMAL :
+ found->type() == MYSQL_TYPE_STRING ||
+ found->type() == MYSQL_TYPE_VAR_STRING ?
+ FIELD_SKIP_ENDSPACE :
+ FIELD_SKIP_PRESPACE);
if (found->null_ptr)
{
recinfo_pos->null_bit= found->null_bit;
@@ -304,7 +306,7 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
Check for underlying table conformance
SYNOPSIS
- check_definition()
+ myisam_check_definition()
t1_keyinfo in First table key definition
t1_recinfo in First table record definition
t1_keys in Number of keys in first table
@@ -446,13 +448,13 @@ int check_definition(MI_KEYDEF *t1_keyinfo, MI_COLUMNDEF *t1_recinfo,
extern "C" {
-volatile int *killed_ptr(MI_CHECK *param)
+volatile int *killed_ptr(HA_CHECK *param)
{
/* In theory Unsafe conversion, but should be ok for now */
return (int*) &(((THD *)(param->thd))->killed);
}
-void mi_check_print_error(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_error(HA_CHECK *param, const char *fmt,...)
{
param->error_printed|=1;
param->out_flag|= O_DATA_LOST;
@@ -462,7 +464,7 @@ void mi_check_print_error(MI_CHECK *param, const char *fmt,...)
va_end(args);
}
-void mi_check_print_info(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_info(HA_CHECK *param, const char *fmt,...)
{
va_list args;
va_start(args, fmt);
@@ -470,7 +472,7 @@ void mi_check_print_info(MI_CHECK *param, const char *fmt,...)
va_end(args);
}
-void mi_check_print_warning(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_warning(HA_CHECK *param, const char *fmt,...)
{
param->warning_printed=1;
param->out_flag|= O_DATA_LOST;
@@ -725,7 +727,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
{
if (!file) return HA_ADMIN_INTERNAL_ERROR;
int error;
- MI_CHECK param;
+ HA_CHECK param;
MYISAM_SHARE* share = file->s;
const char *old_proc_info=thd->proc_info;
@@ -736,7 +738,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
param.db_name= table->s->db.str;
param.table_name= table->alias;
param.testflag = check_opt->flags | T_CHECK | T_SILENT;
- param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
+ param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method;
if (!(table->db_stat & HA_READ_ONLY))
param.testflag|= T_STATISTICS;
@@ -817,7 +819,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
{
int error=0;
- MI_CHECK param;
+ HA_CHECK param;
MYISAM_SHARE* share = file->s;
myisamchk_init(&param);
@@ -828,7 +830,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
T_DONT_CHECK_CHECKSUM);
param.using_global_keycache = 1;
- param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
+ param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method;
if (!(share->state.changed & STATE_NOT_ANALYZED))
return HA_ADMIN_ALREADY_DONE;
@@ -877,7 +879,7 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt)
err:
{
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "restore";
@@ -940,7 +942,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt)
err:
{
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "backup";
@@ -956,7 +958,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt)
int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt)
{
int error;
- MI_CHECK param;
+ HA_CHECK param;
ha_rows start_records;
if (!file) return HA_ADMIN_INTERNAL_ERROR;
@@ -1006,7 +1008,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt)
{
int error;
if (!file) return HA_ADMIN_INTERNAL_ERROR;
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd = thd;
@@ -1025,7 +1027,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt)
}
-int ha_myisam::repair(THD *thd, MI_CHECK &param, bool do_optimize)
+int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
{
int error=0;
uint local_testflag=param.testflag;
@@ -1213,7 +1215,7 @@ int ha_myisam::assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt)
if (error != HA_ADMIN_OK)
{
/* Send error to user */
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "assign_to_keycache";
@@ -1277,7 +1279,7 @@ int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt)
err:
{
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "preload_keys";
@@ -1384,7 +1386,7 @@ int ha_myisam::enable_indexes(uint mode)
else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
{
THD *thd=current_thd;
- MI_CHECK param;
+ HA_CHECK param;
const char *save_proc_info=thd->proc_info;
thd_proc_info(thd, "Creating index");
myisamchk_init(&param);
@@ -1393,7 +1395,8 @@ int ha_myisam::enable_indexes(uint mode)
T_CREATE_MISSING_KEYS);
param.myf_rw&= ~MY_WAIT_IF_FULL;
param.sort_buffer_length= thd->variables.myisam_sort_buff_size;
- param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
+ param.stats_method=
+ (enum_handler_stats_method)thd->variables.myisam_stats_method;
param.tmpdir=&mysql_tmpdir_list;
if ((error= (repair(thd,param,0) != HA_ADMIN_OK)) && param.retry_repair)
{
@@ -1678,9 +1681,15 @@ int ha_myisam::rnd_next(uchar *buf)
return error;
}
-int ha_myisam::restart_rnd_next(uchar *buf, uchar *pos)
+int ha_myisam::remember_rnd_pos()
+{
+ position((uchar*) 0);
+ return 0;
+}
+
+int ha_myisam::restart_rnd_next(uchar *buf)
{
- return rnd_pos(buf,pos);
+ return rnd_pos(buf, ref);
}
int ha_myisam::rnd_pos(uchar *buf, uchar *pos)
@@ -1896,7 +1905,7 @@ void ha_myisam::get_auto_increment(ulonglong offset, ulonglong increment,
{
ulonglong nr;
int error;
- uchar key[MI_MAX_KEY_LENGTH];
+ uchar key[HA_MAX_KEY_LENGTH];
if (!table->s->next_number_key_offset)
{ // Autoincrement at key-start
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index ca44ae9ad87..076e31c07e8 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -21,6 +21,7 @@
/* class for the the myisam handler */
#include <myisam.h>
+#include <myisamchk.h>
#include <ft_global.h>
#define HA_RECOVER_NONE 0 /* No automatic recover */
@@ -39,7 +40,7 @@ class ha_myisam: public handler
ulonglong int_table_flags;
char *data_file_name, *index_file_name;
bool can_enable_indexes;
- int repair(THD *thd, MI_CHECK &param, bool optimize);
+ int repair(THD *thd, HA_CHECK &param, bool optimize);
public:
ha_myisam(handlerton *hton, TABLE_SHARE *table_arg);
@@ -56,8 +57,8 @@ class ha_myisam: public handler
HA_READ_ORDER | HA_KEYREAD_ONLY);
}
uint max_supported_keys() const { return MI_MAX_KEY; }
- uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; }
- uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; }
+ uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
+ uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
uint checksum() const;
int open(const char *name, int mode, uint test_if_locked);
@@ -93,7 +94,8 @@ class ha_myisam: public handler
int rnd_init(bool scan);
int rnd_next(uchar *buf);
int rnd_pos(uchar * buf, uchar *pos);
- int restart_rnd_next(uchar *buf, uchar *pos);
+ int remember_rnd_pos();
+ int restart_rnd_next(uchar *buf);
void position(const uchar *record);
int info(uint);
int extra(enum ha_extra_function operation);
diff --git a/storage/myisam/mi_cache.c b/storage/myisam/mi_cache.c
index d6dcc431a8d..1ccf038a570 100644
--- a/storage/myisam/mi_cache.c
+++ b/storage/myisam/mi_cache.c
@@ -97,8 +97,8 @@ int _mi_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos, uint length,
DBUG_PRINT("error",
("Error %d reading next-multi-part block (Got %d bytes)",
my_errno, (int) read_length));
- if (!my_errno || my_errno == -1)
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ if (!my_errno || my_errno == -1 || my_errno == HA_ERR_FILE_TOO_SHORT)
+ my_errno= HA_ERR_WRONG_IN_RECORD;
DBUG_RETURN(1);
}
bzero(buff+read_length,MI_BLOCK_INFO_HEADER_LENGTH - in_buff_length -
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 3cc82b832c9..fc9d3a346dd 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -59,14 +59,14 @@
/* Functions defined in this file */
-static int check_k_link(MI_CHECK *param, MI_INFO *info,uint nr);
-static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
+static int check_k_link(HA_CHECK *param, MI_INFO *info,uint nr);
+static int chk_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
my_off_t page, uchar *buff, ha_rows *keys,
ha_checksum *key_checksum, uint level);
static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo);
static ha_checksum calc_checksum(ha_rows count);
static int writekeys(MI_SORT_PARAM *sort_param);
-static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
+static int sort_one_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
my_off_t pagepos, File new_file);
static int sort_key_read(MI_SORT_PARAM *sort_param,void *key);
static int sort_ft_key_read(MI_SORT_PARAM *sort_param,void *key);
@@ -80,13 +80,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
reg1 SORT_KEY_BLOCKS *key_block,
uchar *key, my_off_t prev_block);
static int sort_delete_record(MI_SORT_PARAM *sort_param);
-/*static int flush_pending_blocks(MI_CHECK *param);*/
-static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks,
+/*static int flush_pending_blocks(HA_CHECK *param);*/
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
uint buffer_length);
static ha_checksum mi_byte_checksum(const uchar *buf, uint length);
-static void set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share);
+static void set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share);
-void myisamchk_init(MI_CHECK *param)
+void myisamchk_init(HA_CHECK *param)
{
bzero((uchar*) param,sizeof(*param));
param->opt_follow_links=1;
@@ -108,7 +108,7 @@ void myisamchk_init(MI_CHECK *param)
/* Check the status flags for the table */
-int chk_status(MI_CHECK *param, register MI_INFO *info)
+int chk_status(HA_CHECK *param, register MI_INFO *info)
{
MYISAM_SHARE *share=info->s;
@@ -136,7 +136,7 @@ int chk_status(MI_CHECK *param, register MI_INFO *info)
/* Check delete links */
-int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag)
+int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag)
{
reg2 ha_rows i;
uint delete_link_length;
@@ -245,7 +245,7 @@ wrong:
/* Check delete links in index file */
-static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr)
+static int check_k_link(HA_CHECK *param, register MI_INFO *info, uint nr)
{
my_off_t next_link;
uint block_size=(nr+1)*MI_MIN_KEY_BLOCK_LENGTH;
@@ -323,7 +323,7 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr)
/* Check sizes of files */
-int chk_size(MI_CHECK *param, register MI_INFO *info)
+int chk_size(HA_CHECK *param, register MI_INFO *info)
{
int error=0;
register my_off_t skr,size;
@@ -399,7 +399,7 @@ int chk_size(MI_CHECK *param, register MI_INFO *info)
/* Check keys */
-int chk_key(MI_CHECK *param, register MI_INFO *info)
+int chk_key(HA_CHECK *param, register MI_INFO *info)
{
uint key,found_keys=0,full_text_keys=0,result=0;
ha_rows keys;
@@ -584,7 +584,7 @@ do_stat:
} /* chk_key */
-static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
+static int chk_index_down(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
my_off_t page, uchar *buff, ha_rows *keys,
ha_checksum *key_checksum, uint level)
{
@@ -731,13 +731,13 @@ int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
/* Check if index is ok */
-static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
+static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
my_off_t page, uchar *buff, ha_rows *keys,
ha_checksum *key_checksum, uint level)
{
int flag;
uint used_length,comp_flag,nod_flag,key_length=0;
- uchar key[MI_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
+ uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
my_off_t next_page,record;
char llbuff[22];
uint diff_pos[2];
@@ -934,7 +934,7 @@ static uint isam_key_length(MI_INFO *info, register MI_KEYDEF *keyinfo)
/* Check that record-link is ok */
-int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
+int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend)
{
int error,got_error,flag;
uint key,left_length,b_type,field;
@@ -944,7 +944,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
uchar *record= 0, *to;
char llbuff[22],llbuff2[22],llbuff3[22];
ha_checksum intern_record_checksum;
- ha_checksum key_checksum[MI_MAX_POSSIBLE_KEY];
+ ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY];
my_bool static_row_size;
MI_KEYDEF *keyinfo;
MI_BLOCK_INFO block_info;
@@ -992,6 +992,9 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
if (*killed_ptr(param))
goto err2;
switch (info->s->data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
case STATIC_RECORD:
if (my_b_read(&param->read_cache,(uchar*) record,
info->s->base.pack_reclength))
@@ -1005,7 +1008,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
del_length+=info->s->base.pack_reclength;
continue; /* Record removed */
}
- param->glob_crc+= mi_static_checksum(info,record);
+ param->glob_crc+= (*info->s->calc_check_checksum)(info,record);
used+=info->s->base.pack_reclength;
break;
case DYNAMIC_RECORD:
@@ -1159,7 +1162,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
}
else
{
- info->checksum=mi_checksum(info,record);
+ info->checksum= (*info->s->calc_check_checksum)(info,record);
if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
{
if (_mi_rec_check(info,record, info->rec_buff,block_info.rec_len,
@@ -1205,10 +1208,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
llstr(start_recpos,llbuff));
got_error=1;
}
- if (static_row_size)
- param->glob_crc+= mi_static_checksum(info,record);
- else
- param->glob_crc+= mi_checksum(info,record);
+ param->glob_crc+= (*info->s->calc_check_checksum)(info,record);
link_used+= (block_info.filepos - start_recpos);
used+= (pos-start_recpos);
} /* switch */
@@ -1429,7 +1429,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
then recrate all indexes.
*/
-static int mi_drop_all_indexes(MI_CHECK *param, MI_INFO *info, my_bool force)
+static int mi_drop_all_indexes(HA_CHECK *param, MI_INFO *info, my_bool force)
{
MYISAM_SHARE *share= info->s;
MI_STATE_INFO *state= &share->state;
@@ -1512,7 +1512,7 @@ static int mi_drop_all_indexes(MI_CHECK *param, MI_INFO *info, my_bool force)
/* Recover old table by reading each record and writing all keys */
/* Save new datafile-name in temp_filename */
-int mi_repair(MI_CHECK *param, register MI_INFO *info,
+int mi_repair(HA_CHECK *param, register MI_INFO *info,
char * name, int rep_quick)
{
int error,got_error;
@@ -1521,7 +1521,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info,
File new_file;
MYISAM_SHARE *share=info->s;
char llbuff[22],llbuff2[22];
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
MI_SORT_PARAM sort_param;
DBUG_ENTER("mi_repair");
@@ -1889,7 +1889,7 @@ int movepoint(register MI_INFO *info, uchar *record, my_off_t oldpos,
/* Tell system that we want all memory for our cache */
-void lock_memory(MI_CHECK *param __attribute__((unused)))
+void lock_memory(HA_CHECK *param __attribute__((unused)))
{
#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */
if (param->opt_lock_memory)
@@ -1905,7 +1905,7 @@ void lock_memory(MI_CHECK *param __attribute__((unused)))
/* Flush all changed blocks to disk */
-int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file)
+int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file)
{
if (flush_key_blocks(key_cache, file, FLUSH_RELEASE))
{
@@ -1920,12 +1920,12 @@ int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file)
/* Sort index for more efficent reads */
-int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name)
+int mi_sort_index(HA_CHECK *param, register MI_INFO *info, char * name)
{
reg2 uint key;
reg1 MI_KEYDEF *keyinfo;
File new_file;
- my_off_t index_pos[MI_MAX_POSSIBLE_KEY];
+ my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
uint r_locks,w_locks;
int old_lock;
MYISAM_SHARE *share=info->s;
@@ -2020,12 +2020,12 @@ err2:
/* Sort records recursive using one index */
-static int sort_one_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
+static int sort_one_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
my_off_t pagepos, File new_file)
{
uint length,nod_flag,used_length, key_length;
uchar *buff,*keypos,*endpos;
- uchar key[MI_MAX_POSSIBLE_KEY_BUFF];
+ uchar key[HA_MAX_POSSIBLE_KEY_BUFF];
my_off_t new_page_pos,next_page;
char llbuff[22];
DBUG_ENTER("sort_one_index");
@@ -2140,12 +2140,12 @@ int change_to_newfile(const char * filename, const char * old_ext,
/* Locks a whole file */
/* Gives an error-message if file can't be locked */
-int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type,
+int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type,
const char *filetype, const char *filename)
{
if (my_lock(file,lock_type,start,F_TO_EOF,
param->testflag & T_WAIT_FOREVER ? MYF(MY_SEEK_NOT_DONE) :
- MYF(MY_SEEK_NOT_DONE | MY_DONT_WAIT)))
+ MYF(MY_SEEK_NOT_DONE | MY_SHORT_WAIT)))
{
mi_check_print_error(param," %d when locking %s '%s'",my_errno,filetype,filename);
param->error_printed=2; /* Don't give that data is crashed */
@@ -2157,7 +2157,7 @@ int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type,
/* Copy a block between two files */
-int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
+int filecopy(HA_CHECK *param, File to,File from,my_off_t start,
my_off_t length, const char *type)
{
char tmp_buff[IO_SIZE],*buff;
@@ -2208,7 +2208,7 @@ err:
<>0 Error
*/
-int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
+int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick)
{
int got_error;
@@ -2222,7 +2222,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
HA_KEYSEG *keyseg;
ulong *rec_per_key_part;
char llbuff[22];
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
ulonglong key_map;
DBUG_ENTER("mi_repair_by_sort");
LINT_INIT(key_map);
@@ -2627,7 +2627,7 @@ err:
<>0 Error
*/
-int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
+int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick)
{
#ifndef THREAD
@@ -2646,7 +2646,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
char llbuff[22];
IO_CACHE new_data_cache; /* For non-quick repair. */
IO_CACHE_SHARE io_share;
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
ulonglong key_map;
pthread_attr_t thr_attr;
ulong max_pack_reclength;
@@ -2671,14 +2671,14 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
/*
Quick repair (not touching data file, rebuilding indexes):
{
- Read cache is (MI_CHECK *param)->read_cache using info->dfile.
+ Read cache is (HA_CHECK *param)->read_cache using info->dfile.
}
Non-quick repair (rebuilding data file and indexes):
{
Master thread:
- Read cache is (MI_CHECK *param)->read_cache using info->dfile.
+ Read cache is (HA_CHECK *param)->read_cache using info->dfile.
Write cache is (MI_INFO *info)->rec_cache using new_file.
Slave threads:
@@ -3123,7 +3123,7 @@ err:
static int sort_key_read(MI_SORT_PARAM *sort_param, void *key)
{
int error;
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
MI_INFO *info=sort_info->info;
DBUG_ENTER("sort_key_read");
@@ -3150,7 +3150,7 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key)
static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key)
{
int error;
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
MI_INFO *info=sort_info->info;
FT_WORD *wptr=0;
DBUG_ENTER("sort_ft_key_read");
@@ -3237,8 +3237,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
my_off_t pos;
uchar *to;
MI_BLOCK_INFO block_info;
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
MI_INFO *info=sort_info->info;
MYISAM_SHARE *share=info->s;
char llbuff[22],llbuff2[22];
@@ -3248,6 +3248,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
DBUG_RETURN(1);
switch (share->data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
case STATIC_RECORD:
for (;;)
{
@@ -3272,7 +3275,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
{
if (sort_param->calc_checksum)
param->glob_crc+= (info->checksum=
- mi_static_checksum(info,sort_param->record));
+ (*info->s->calc_check_checksum)(info,
+ sort_param->
+ record));
DBUG_RETURN(0);
}
if (!sort_param->fix_datafile && sort_param->master)
@@ -3548,7 +3553,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
if (sort_param->read_cache.error < 0)
DBUG_RETURN(1);
if (sort_param->calc_checksum)
- info->checksum= mi_checksum(info, sort_param->record);
+ info->checksum= (*info->s->calc_check_checksum)(info,
+ sort_param->record);
if ((param->testflag & (T_EXTEND | T_REP)) || searching)
{
if (_mi_rec_check(info, sort_param->record, sort_param->rec_buff,
@@ -3633,7 +3639,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
info->packed_length=block_info.rec_len;
if (sort_param->calc_checksum)
param->glob_crc+= (info->checksum=
- mi_checksum(info, sort_param->record));
+ (*info->s->calc_check_checksum)(info,
+ sort_param->
+ record));
DBUG_RETURN(0);
}
}
@@ -3663,8 +3671,8 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
ulong block_length,reclength;
uchar *from;
uchar block_buff[8];
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
MI_INFO *info=sort_info->info;
MYISAM_SHARE *share=info->s;
DBUG_ENTER("sort_write_record");
@@ -3672,6 +3680,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
if (sort_param->fix_datafile)
{
switch (sort_info->new_data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
case STATIC_RECORD:
if (my_b_write(&info->rec_cache,sort_param->record,
share->base.pack_reclength))
@@ -3681,7 +3692,6 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
}
sort_param->filepos+=share->base.pack_reclength;
info->s->state.split++;
- /* sort_info->param->glob_crc+=mi_static_checksum(info, sort_param->record); */
break;
case DYNAMIC_RECORD:
if (! info->blobs)
@@ -3690,7 +3700,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
{
/* must be sure that local buffer is big enough */
reclength=info->s->base.pack_reclength+
- _my_calc_total_blob_length(info,sort_param->record)+
+ _mi_calc_total_blob_length(info,sort_param->record)+
ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+
MI_DYN_DELETE_BLOCK_HEADER;
if (sort_info->buff_length < reclength)
@@ -3704,10 +3714,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
from= sort_info->buff+ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER);
}
/* We can use info->checksum here as only one thread calls this. */
- info->checksum=mi_checksum(info,sort_param->record);
+ info->checksum= (*info->s->calc_check_checksum)(info,sort_param->record);
reclength=_mi_rec_pack(info,from,sort_param->record);
flag=0;
- /* sort_info->param->glob_crc+=info->checksum; */
do
{
@@ -3779,24 +3788,25 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
{
uint diff_pos[2];
char llbuff[22],llbuff2[22];
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param= sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param= sort_info->param;
int cmp;
if (sort_info->key_block->inited)
{
- cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
+ cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
diff_pos);
if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
- ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
+ ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
{
diff_pos[0]= mi_collect_stats_nonulls_next(sort_param->seg,
sort_param->notnull,
- sort_info->key_block->lastkey,
+ (uchar*) sort_info->
+ key_block->lastkey,
(uchar*)a);
}
sort_param->unique[diff_pos[0]-1]++;
@@ -3819,8 +3829,8 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
llstr(sort_info->info->lastpos,llbuff),
llstr(get_record_for_key(sort_info->info,
sort_param->keyinfo,
- sort_info->key_block->
- lastkey),
+ (uchar*) sort_info->
+ key_block->lastkey),
llbuff2));
param->testflag|=T_RETRY_WITHOUT_QUICK;
if (sort_info->param->testflag & T_VERBOSE)
@@ -3841,7 +3851,7 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
{
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
SORT_KEY_BLOCKS *key_block=sort_info->key_block;
MYISAM_SHARE *share=sort_info->info->s;
uint val_off, val_len;
@@ -3851,19 +3861,19 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
val_len=share->ft2_keyinfo.keylength;
get_key_full_length_rdonly(val_off, ft_buf->lastkey);
- to=ft_buf->lastkey+val_off;
+ to= (uchar*) ft_buf->lastkey+val_off;
if (ft_buf->buf)
{
/* flushing first-level tree */
- error=sort_insert_key(sort_param,key_block,ft_buf->lastkey,
+ error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey,
HA_OFFSET_ERROR);
for (from=to+val_len;
- !error && from < ft_buf->buf;
+ !error && from < (uchar*) ft_buf->buf;
from+= val_len)
{
memcpy(to, from, val_len);
- error=sort_insert_key(sort_param,key_block,ft_buf->lastkey,
+ error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey,
HA_OFFSET_ERROR);
}
return error;
@@ -3872,8 +3882,8 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
error=flush_pending_blocks(sort_param);
/* updating lastkey with second-level tree info */
ft_intXstore(ft_buf->lastkey+val_off, -ft_buf->count);
- _mi_dpointer(sort_info->info, ft_buf->lastkey+val_off+HA_FT_WLEN,
- share->state.key_root[sort_param->key]);
+ _mi_dpointer(sort_info->info, (uchar*) ft_buf->lastkey+val_off+HA_FT_WLEN,
+ share->state.key_root[sort_param->key]);
/* restoring first level tree data in sort_info/sort_param */
sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
sort_param->keyinfo=share->keyinfo+sort_param->key;
@@ -3881,14 +3891,14 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
/* writing lastkey in first-level tree */
return error ? error :
sort_insert_key(sort_param,sort_info->key_block,
- ft_buf->lastkey,HA_OFFSET_ERROR);
+ (uchar*) ft_buf->lastkey,HA_OFFSET_ERROR);
}
static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
{
uint a_len, val_off, val_len, error;
uchar *p;
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
SORT_FT_BUF *ft_buf=sort_info->ft_buf;
SORT_KEY_BLOCKS *key_block=sort_info->key_block;
@@ -3918,9 +3928,9 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
}
get_key_full_length_rdonly(val_off, ft_buf->lastkey);
- if (mi_compare_text(sort_param->seg->charset,
+ if (ha_compare_text(sort_param->seg->charset,
((uchar *)a)+1,a_len-1,
- ft_buf->lastkey+1,val_off-1, 0, 0)==0)
+ (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0)
{
if (!ft_buf->buf) /* store in second-level tree */
{
@@ -3936,16 +3946,16 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
return 0;
/* converting to two-level tree */
- p=ft_buf->lastkey+val_off;
+ p= (uchar*) ft_buf->lastkey+val_off;
while (key_block->inited)
key_block++;
sort_info->key_block=key_block;
sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo;
- ft_buf->count=(ft_buf->buf - p)/val_len;
+ ft_buf->count=((uchar*) ft_buf->buf - p)/val_len;
/* flushing buffer to second-level tree */
- for (error=0; !error && p < ft_buf->buf; p+= val_len)
+ for (error=0; !error && p < (uchar*) ft_buf->buf; p+= val_len)
error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
ft_buf->buf=0;
return error;
@@ -3993,13 +4003,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
MI_KEY_PARAM s_temp;
MI_INFO *info;
MI_KEYDEF *keyinfo=sort_param->keyinfo;
- SORT_INFO *sort_info= sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
DBUG_ENTER("sort_insert_key");
- anc_buff=key_block->buff;
+ anc_buff= (uchar*) key_block->buff;
info=sort_info->info;
- lastkey=key_block->lastkey;
+ lastkey= (uchar*) key_block->lastkey;
nod_flag= (key_block == sort_info->key_block ? 0 :
info->s->base.key_reflength);
@@ -4012,7 +4022,7 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
DBUG_RETURN(1);
}
a_length=2+nod_flag;
- key_block->end_pos=anc_buff+2;
+ key_block->end_pos= (char*) anc_buff+2;
lastkey=0; /* No previous key in block */
}
else
@@ -4020,18 +4030,18 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
/* Save pointer to previous block */
if (nod_flag)
- _mi_kpointer(info,key_block->end_pos,prev_block);
+ _mi_kpointer(info,(uchar*) key_block->end_pos,prev_block);
t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
(uchar*) 0,lastkey,lastkey,key,
&s_temp);
- (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
+ (*keyinfo->store_key)(keyinfo, (uchar*) key_block->end_pos+nod_flag,&s_temp);
a_length+=t_length;
mi_putint(anc_buff,a_length,nod_flag);
key_block->end_pos+=t_length;
if (a_length <= keyinfo->block_length)
{
- VOID(_mi_move_key(keyinfo,key_block->lastkey,key));
+ VOID(_mi_move_key(keyinfo,(uchar*) key_block->lastkey,key));
key_block->last_length=a_length-t_length;
DBUG_RETURN(0);
}
@@ -4056,7 +4066,8 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
DBUG_DUMP("buff",(uchar*) anc_buff,mi_getint(anc_buff));
/* Write separator-key to block in next level */
- if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
+ if (sort_insert_key(sort_param,key_block+1,(uchar*) key_block->lastkey,
+ filepos))
DBUG_RETURN(1);
/* clear old block and write new key in it */
@@ -4072,8 +4083,8 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param)
uint i;
int old_file,error;
uchar *key;
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
MI_INFO *info=sort_info->info;
DBUG_ENTER("sort_delete_record");
@@ -4129,7 +4140,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
uint nod_flag,length;
my_off_t filepos,key_file_length;
SORT_KEY_BLOCKS *key_block;
- SORT_INFO *sort_info= sort_param->sort_info;
+ MI_SORT_INFO *sort_info= sort_param->sort_info;
myf myf_rw=sort_info->param->myf_rw;
MI_INFO *info=sort_info->info;
MI_KEYDEF *keyinfo=sort_param->keyinfo;
@@ -4142,7 +4153,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
key_block->inited=0;
length=mi_getint(key_block->buff);
if (nod_flag)
- _mi_kpointer(info,key_block->end_pos,filepos);
+ _mi_kpointer(info,(uchar*) key_block->end_pos,filepos);
key_file_length=info->state->key_file_length;
bzero((uchar*) key_block->buff+length, keyinfo->block_length-length);
if ((filepos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
@@ -4152,7 +4163,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
if (key_file_length == info->state->key_file_length)
{
if (_mi_write_keypage(info, keyinfo, filepos,
- DFLT_INIT_HITS, key_block->buff))
+ DFLT_INIT_HITS, (uchar*) key_block->buff))
DBUG_RETURN(1);
}
else if (my_pwrite(info->s->kfile,(uchar*) key_block->buff,
@@ -4167,7 +4178,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
/* alloc space and pointers for key_blocks */
-static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks,
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
uint buffer_length)
{
reg1 uint i;
@@ -4204,7 +4215,7 @@ int test_if_almost_full(MI_INFO *info)
/* Recreate table with bigger more alloced record-data */
-int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename)
+int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename)
{
int error;
MI_INFO info;
@@ -4377,7 +4388,7 @@ end:
/* write suffix to data file if neaded */
-int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile)
+int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile)
{
MI_INFO *info=sort_info->info;
@@ -4398,7 +4409,7 @@ int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile)
/* Update state and myisamchk_time of indexfile */
-int update_state_info(MI_CHECK *param, MI_INFO *info,uint update)
+int update_state_info(HA_CHECK *param, MI_INFO *info,uint update)
{
MYISAM_SHARE *share=info->s;
@@ -4470,7 +4481,7 @@ err:
param->auto_increment is bigger than the biggest key.
*/
-void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
+void update_auto_increment_key(HA_CHECK *param, MI_INFO *info,
my_bool repair_only)
{
uchar *record= 0;
@@ -4702,7 +4713,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows,
static void
-set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share)
+set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share)
{
if ((sort_info->new_data_file_type=share->data_file_type) ==
COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
diff --git a/storage/myisam/mi_checksum.c b/storage/myisam/mi_checksum.c
index 4e87de373bd..8c408ef7ff5 100644
--- a/storage/myisam/mi_checksum.c
+++ b/storage/myisam/mi_checksum.c
@@ -19,27 +19,34 @@
ha_checksum mi_checksum(MI_INFO *info, const uchar *buf)
{
- uint i;
ha_checksum crc=0;
- MI_COLUMNDEF *rec=info->s->rec;
+ const uchar *record= buf;
+ MI_COLUMNDEF *column= info->s->rec;
+ MI_COLUMNDEF *column_end= column+ info->s->base.fields;
+ my_bool skip_null_bits= test(info->s->options & HA_OPTION_NULL_FIELDS);
- for (i=info->s->base.fields ; i-- ; buf+=(rec++)->length)
+ for ( ; column != column_end ; buf+= column++->length)
{
const uchar *pos;
ulong length;
- switch (rec->type) {
+
+ if ((record[column->null_pos] & column->null_bit) &&
+ skip_null_bits)
+ continue; /* Null field */
+
+ switch (column->type) {
case FIELD_BLOB:
{
- length=_mi_calc_blob_length(rec->length-
- mi_portable_sizeof_char_ptr,
- buf);
- memcpy((char*) &pos, buf+rec->length- mi_portable_sizeof_char_ptr,
+ length=_mi_calc_blob_length(column->length-
+ portable_sizeof_char_ptr,
+ buf);
+ memcpy((char*) &pos, buf+column->length- portable_sizeof_char_ptr,
sizeof(char*));
break;
}
case FIELD_VARCHAR:
{
- uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1);
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column->length-1);
if (pack_length == 1)
length= (ulong) *(uchar*) buf;
else
@@ -48,7 +55,7 @@ ha_checksum mi_checksum(MI_INFO *info, const uchar *buf)
break;
}
default:
- length=rec->length;
+ length=column->length;
pos=buf;
break;
}
diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c
index 07105aea88d..747555dbdfb 100644
--- a/storage/myisam/mi_close.c
+++ b/storage/myisam/mi_close.c
@@ -75,6 +75,7 @@ int mi_close(register MI_INFO *info)
not change the crashed state.
We can NOT write the state in other cases as other threads
may be using the file at this point
+ IF using --external-locking.
*/
if (share->mode != O_RDONLY && mi_is_crashed(info))
mi_state_info_write(share->kfile, &share->state, 1);
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index 0cac5f08b3b..23f84819949 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -17,6 +17,7 @@
#include "ftdefs.h"
#include "sp_defs.h"
+#include <my_bit.h>
#if defined(MSDOS) || defined(__WIN__)
#ifdef __WIN__
@@ -40,11 +41,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
File dfile,file;
int errpos,save_errno, create_mode= O_RDWR | O_TRUNC;
myf create_flag;
- uint fields,length,max_key_length,packed,pointer,real_length_diff,
+ uint fields,length,max_key_length,packed,pack_bytes,pointer,real_length_diff,
key_length,info_length,key_segs,options,min_key_length_skip,
base_pos,long_varchar_count,varchar_length,
max_key_block_length,unique_key_parts,fulltext_keys,offset;
- uint aligned_key_start, block_length;
+ uint aligned_key_start, block_length, res;
ulong reclength, real_reclength,min_pack_length;
char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr;
ulong pack_reclength;
@@ -56,7 +57,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
HA_KEYSEG *keyseg,tmp_keyseg;
MI_COLUMNDEF *rec;
ulong *rec_per_key_part;
- my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
MI_CREATE_INFO tmp_create_info;
DBUG_ENTER("mi_create");
DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u",
@@ -94,7 +95,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */
if (!(rec_per_key_part=
- (ulong*) my_malloc((keys + uniques)*MI_MAX_KEY_SEG*sizeof(long),
+ (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long),
MYF(MY_WME | MY_ZEROFILL))))
DBUG_RETURN(my_errno);
@@ -107,6 +108,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
rec++,fields++)
{
reclength+=rec->length;
+ if (rec->null_bit)
+ options|= HA_OPTION_NULL_FIELDS;
+
if ((type=(enum en_fieldtype) rec->type) != FIELD_NORMAL &&
type != FIELD_CHECK)
{
@@ -116,10 +120,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
share.base.blobs++;
if (pack_reclength != INT_MAX32)
{
- if (rec->length == 4+mi_portable_sizeof_char_ptr)
+ if (rec->length == 4+portable_sizeof_char_ptr)
pack_reclength= INT_MAX32;
else
- pack_reclength+=(1 << ((rec->length-mi_portable_sizeof_char_ptr)*8)); /* Max blob length */
+ pack_reclength+=(1 << ((rec->length-portable_sizeof_char_ptr)*8)); /* Max blob length */
}
}
else if (type == FIELD_SKIP_PRESPACE ||
@@ -141,6 +145,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
long_varchar_count++;
pack_reclength+= 2; /* May be packed on 3 bytes */
}
+ options|= HA_OPTION_NULL_FIELDS; /* Use of mi_checksum() */
}
else if (type != FIELD_SKIP_ZERO)
{
@@ -180,7 +185,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
if (flags & HA_CREATE_TMP_TABLE)
{
options|= HA_OPTION_TMP_TABLE;
- create_mode|= O_EXCL | O_NOFOLLOW;
+ create_mode|= O_NOFOLLOW;
}
if (flags & HA_CREATE_CHECKSUM || (options & HA_OPTION_CHECKSUM))
{
@@ -192,11 +197,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
if (flags & HA_CREATE_RELIES_ON_SQL_LAYER)
options|= HA_OPTION_RELIES_ON_SQL_LAYER;
- packed=(packed+7)/8;
+ pack_bytes= (packed+7)/8;
if (pack_reclength != INT_MAX32)
pack_reclength+= reclength+packed +
test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD));
- min_pack_length+=packed;
+ min_pack_length+= pack_bytes;
if (!ci->data_file_length && ci->max_rows)
{
@@ -273,7 +278,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
keyseg->type != HA_KEYTYPE_VARBINARY2)
{
my_errno=HA_WRONG_CREATE_OPTION;
- goto err;
+ goto err_no_lock;
}
}
keydef->keysegs+=sp_segs;
@@ -282,7 +287,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
min_key_length_skip+=SPLEN*2*SPDIMS;
#else
my_errno= HA_ERR_UNSUPPORTED;
- goto err;
+ goto err_no_lock;
#endif /*HAVE_SPATIAL*/
}
else if (keydef->flag & HA_FULLTEXT)
@@ -298,7 +303,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
keyseg->type != HA_KEYTYPE_VARTEXT2)
{
my_errno=HA_WRONG_CREATE_OPTION;
- goto err;
+ goto err_no_lock;
}
if (!(keyseg->flag & HA_BLOB_PART) &&
(keyseg->type == HA_KEYTYPE_VARTEXT1 ||
@@ -420,10 +425,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
}
} /* if HA_FULLTEXT */
key_segs+=keydef->keysegs;
- if (keydef->keysegs > MI_MAX_KEY_SEG)
+ if (keydef->keysegs > HA_MAX_KEY_SEG)
{
my_errno=HA_WRONG_CREATE_OPTION;
- goto err;
+ goto err_no_lock;
}
/*
key_segs may be 0 in the case when we only want to be able to
@@ -435,7 +440,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
share.state.rec_per_key_part[key_segs-1]=1L;
length+=key_length;
/* Get block length for key, if defined by user */
- block_length= (keydef->block_length ?
+ block_length= (keydef->block_length ?
my_round_up_to_next_power(keydef->block_length) :
myisam_block_size);
block_length= max(block_length, MI_MIN_KEY_BLOCK_LENGTH);
@@ -445,10 +450,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
pointer,MI_MAX_KEYPTR_SIZE,
block_length);
if (keydef->block_length > MI_MAX_KEY_BLOCK_LENGTH ||
- length >= MI_MAX_KEY_BUFF)
+ length >= HA_MAX_KEY_BUFF)
{
my_errno=HA_WRONG_CREATE_OPTION;
- goto err;
+ goto err_no_lock;
}
set_if_bigger(max_key_block_length,keydef->block_length);
keydef->keylength= (uint16) key_length;
@@ -495,7 +500,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
"indexes and/or unique constraints.",
MYF(0), name + dirname_length(name));
my_errno= HA_WRONG_CREATE_OPTION;
- goto err;
+ goto err_no_lock;
}
bmove(share.state.header.file_version,(uchar*) myisam_file_magic,4);
@@ -550,9 +555,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM);
share.base.max_pack_length=pack_reclength;
share.base.min_pack_length=min_pack_length;
- share.base.pack_bits=packed;
+ share.base.pack_bits= pack_bytes;
share.base.fields=fields;
- share.base.pack_fields=packed;
+ share.base.pack_fields= packed;
#ifdef USE_RAID
share.base.raid_type=ci->raid_type;
share.base.raid_chunks=ci->raid_chunks;
@@ -826,13 +831,16 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
}
errpos=0;
pthread_mutex_unlock(&THR_LOCK_myisam);
+ res= 0;
if (my_close(file,MYF(0)))
- goto err;
+ res= my_errno;
my_free((char*) rec_per_key_part,MYF(0));
- DBUG_RETURN(0);
+ DBUG_RETURN(res);
err:
pthread_mutex_unlock(&THR_LOCK_myisam);
+err_no_lock:
+
save_errno=my_errno;
switch (errpos) {
case 3:
diff --git a/storage/myisam/mi_dbug.c b/storage/myisam/mi_dbug.c
index 07c314c43e6..659abdce131 100644
--- a/storage/myisam/mi_dbug.c
+++ b/storage/myisam/mi_dbug.c
@@ -45,6 +45,7 @@ void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg,
fprintf(stream,"NULL");
continue;
}
+ end++;
}
switch (keyseg->type) {
@@ -91,7 +92,7 @@ void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg,
key=end;
break;
case HA_KEYTYPE_ULONG_INT:
- l_1=mi_sint4korr(key);
+ l_1=mi_uint4korr(key);
VOID(fprintf(stream,"%lu",(ulong) l_1));
key=end;
break;
diff --git a/storage/myisam/mi_delete.c b/storage/myisam/mi_delete.c
index 6fe31f30c19..88b31d616de 100644
--- a/storage/myisam/mi_delete.c
+++ b/storage/myisam/mi_delete.c
@@ -159,7 +159,7 @@ static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo,
DBUG_RETURN(my_errno=HA_ERR_CRASHED);
}
if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
{
DBUG_PRINT("error",("Couldn't allocate memory"));
DBUG_RETURN(my_errno=ENOMEM);
@@ -221,7 +221,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
my_bool last_key;
uchar *leaf_buff,*keypos;
my_off_t leaf_page,next_block;
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
DBUG_ENTER("d_search");
DBUG_DUMP("page",(uchar*) anc_buff,mi_getint(anc_buff));
@@ -306,7 +306,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{
leaf_page=_mi_kpos(nod_flag,keypos);
if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
{
DBUG_PRINT("error",("Couldn't allocate memory"));
my_errno=ENOMEM;
@@ -365,9 +365,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{ /* This happens only with packed keys */
DBUG_PRINT("test",("Enlarging of key when deleting"));
if (!_mi_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length))
- {
goto err;
- }
ret_value=_mi_insert(info,keyinfo,key,anc_buff,keypos,lastkey,
(uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0);
}
@@ -405,7 +403,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key,
int ret_value,length;
uint a_length,nod_flag,tmp;
my_off_t next_page;
- uchar keybuff[MI_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
+ uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
MYISAM_SHARE *share=info->s;
MI_KEY_PARAM s_temp;
DBUG_ENTER("del");
@@ -422,7 +420,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key,
{
next_page= _mi_kpos(nod_flag,endpos);
if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
DBUG_RETURN(-1);
if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0))
ret_value= -1;
@@ -509,7 +507,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag,
key_reflength,key_length;
my_off_t next_page;
- uchar anc_key[MI_MAX_KEY_BUFF],leaf_key[MI_MAX_KEY_BUFF],
+ uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF],
*buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key,
*after_key;
MI_KEY_PARAM s_temp;
diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c
index 9940cf62204..dbfc62b4fda 100644
--- a/storage/myisam/mi_dynrec.c
+++ b/storage/myisam/mi_dynrec.c
@@ -252,7 +252,7 @@ int _mi_write_blob_record(MI_INFO *info, const uchar *record)
extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+
MI_DYN_DELETE_BLOCK_HEADER+1);
reclength= (info->s->base.pack_reclength +
- _my_calc_total_blob_length(info,record)+ extra);
+ _mi_calc_total_blob_length(info,record)+ extra);
#ifdef NOT_USED /* We now support big rows */
if (reclength > MI_DYN_MAX_ROW_LENGTH)
{
@@ -286,7 +286,7 @@ int _mi_update_blob_record(MI_INFO *info, my_off_t pos, const uchar *record)
extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+
MI_DYN_DELETE_BLOCK_HEADER);
reclength= (info->s->base.pack_reclength+
- _my_calc_total_blob_length(info,record)+ extra);
+ _mi_calc_total_blob_length(info,record)+ extra);
#ifdef NOT_USED /* We now support big rows */
if (reclength > MI_DYN_MAX_ROW_LENGTH)
{
@@ -969,7 +969,7 @@ uint _mi_rec_pack(MI_INFO *info, register uchar *to,
else
{
char *temp_pos;
- size_t tmp_length=length-mi_portable_sizeof_char_ptr;
+ size_t tmp_length=length-portable_sizeof_char_ptr;
memcpy((uchar*) to,from,tmp_length);
memcpy_fixed(&temp_pos,from+tmp_length,sizeof(char*));
memcpy(to+tmp_length,temp_pos,(size_t) blob->length);
@@ -1090,11 +1090,11 @@ my_bool _mi_rec_check(MI_INFO *info,const uchar *record, uchar *rec_buff,
if (type == FIELD_BLOB)
{
uint blob_length=
- _mi_calc_blob_length(length-mi_portable_sizeof_char_ptr,record);
+ _mi_calc_blob_length(length-portable_sizeof_char_ptr,record);
if (!blob_length && !(flag & bit))
goto err;
if (blob_length)
- to+=length - mi_portable_sizeof_char_ptr+ blob_length;
+ to+=length - portable_sizeof_char_ptr+ blob_length;
}
else if (type == FIELD_SKIP_ZERO)
{
@@ -1277,7 +1277,7 @@ ulong _mi_rec_unpack(register MI_INFO *info, register uchar *to, uchar *from,
}
else if (type == FIELD_BLOB)
{
- uint size_length=rec_length- mi_portable_sizeof_char_ptr;
+ uint size_length=rec_length- portable_sizeof_char_ptr;
ulong blob_length=_mi_calc_blob_length(size_length,from);
ulong from_left= (ulong) (from_end - from);
if (from_left < size_length ||
@@ -1327,7 +1327,7 @@ err:
/* Calc length of blob. Update info in blobs->length */
-ulong _my_calc_total_blob_length(MI_INFO *info, const uchar *record)
+ulong _mi_calc_total_blob_length(MI_INFO *info, const uchar *record)
{
ulong length;
MI_BLOB *blob,*end;
@@ -1361,7 +1361,7 @@ ulong _mi_calc_blob_length(uint length, const uchar *pos)
}
-void _my_store_blob_length(uchar *pos,uint pack_length,uint length)
+void _mi_store_blob_length(uchar *pos,uint pack_length,uint length)
{
switch (pack_length) {
case 1:
@@ -1574,7 +1574,7 @@ int _mi_cmp_dynamic_record(register MI_INFO *info, register const uchar *record)
if (info->s->base.blobs)
{
if (!(buffer=(uchar*) my_alloca(info->s->base.pack_reclength+
- _my_calc_total_blob_length(info,record))))
+ _mi_calc_total_blob_length(info,record))))
DBUG_RETURN(-1);
}
reclength=_mi_rec_pack(info,buffer,record);
@@ -1832,7 +1832,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, uchar *buf,
/* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */
if (my_read(info->dfile,(uchar*) to,block_info.data_len,MYF(MY_NABP)))
{
- if (my_errno == -1)
+ if (my_errno == HA_ERR_FILE_TOO_SHORT)
my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */
goto err;
}
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index 1b4c79d13de..d798ef50d7e 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -216,7 +216,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
info->lock_wait=0;
break;
case HA_EXTRA_NO_WAIT_LOCK:
- info->lock_wait=MY_DONT_WAIT;
+ info->lock_wait= MY_SHORT_WAIT;
break;
case HA_EXTRA_NO_KEYS:
if (info->lock_type == F_UNLCK)
@@ -256,15 +256,16 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
share->last_version= 0L; /* Impossible version */
pthread_mutex_unlock(&THR_LOCK_myisam);
break;
- case HA_EXTRA_PREPARE_FOR_DELETE:
+ case HA_EXTRA_PREPARE_FOR_RENAME:
+ case HA_EXTRA_PREPARE_FOR_DROP:
pthread_mutex_lock(&THR_LOCK_myisam);
share->last_version= 0L; /* Impossible version */
#ifdef __WIN__REMOVE_OBSOLETE_WORKAROUND
/* Close the isam and data files as Win32 can't drop an open table */
pthread_mutex_lock(&share->intern_lock);
if (flush_key_blocks(share->key_cache, share->kfile,
- (function == HA_EXTRA_FORCE_REOPEN ?
- FLUSH_RELEASE : FLUSH_IGNORE_CHANGED)))
+ (function == HA_EXTRA_PREPARE_FOR_DROP ?
+ FLUSH_IGNORE_CHANGED : FLUSH_RELEASE)))
{
error=my_errno;
share->changed=1;
diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c
index 3f445ebf44d..94f3f34ec58 100644
--- a/storage/myisam/mi_key.c
+++ b/storage/myisam/mi_key.c
@@ -426,7 +426,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
/* The above changed info->lastkey2. Inform mi_rnext_same(). */
info->update&= ~HA_STATE_RNEXT_SAME;
- _my_store_blob_length(record+keyseg->start,
+ _mi_store_blob_length(record+keyseg->start,
(uint) keyseg->bit_start,length);
key+=length;
}
diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c
index ec359d13a14..ad23a42ab83 100644
--- a/storage/myisam/mi_locking.c
+++ b/storage/myisam/mi_locking.c
@@ -56,9 +56,15 @@ int mi_lock_database(MI_INFO *info, int lock_type)
case F_UNLCK:
ftparser_call_deinitializer(info);
if (info->lock_type == F_RDLCK)
+ {
count= --share->r_locks;
+ mi_restore_status(info);
+ }
else
+ {
count= --share->w_locks;
+ mi_update_status(info);
+ }
--share->tot_locks;
if (info->lock_type == F_WRLCK && !share->w_locks &&
!share->delay_key_write && flush_key_blocks(share->key_cache,
@@ -84,16 +90,16 @@ int mi_lock_database(MI_INFO *info, int lock_type)
if (share->changed && !share->w_locks)
{
#ifdef HAVE_MMAP
- if ((info->s->mmaped_length != info->s->state.state.data_file_length) &&
- (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
- {
- if (info->s->concurrent_insert)
- rw_wrlock(&info->s->mmap_lock);
- mi_remap_file(info, info->s->state.state.data_file_length);
- info->s->nonmmaped_inserts= 0;
- if (info->s->concurrent_insert)
- rw_unlock(&info->s->mmap_lock);
- }
+ if ((info->s->mmaped_length != info->s->state.state.data_file_length) &&
+ (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
+ {
+ if (info->s->concurrent_insert)
+ rw_wrlock(&info->s->mmap_lock);
+ mi_remap_file(info, info->s->state.state.data_file_length);
+ info->s->nonmmaped_inserts= 0;
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ }
#endif
share->state.process= share->last_process=share->this_process;
share->state.unique= info->last_unique= info->this_unique;
@@ -300,6 +306,7 @@ void mi_get_status(void* param, int concurrent_insert)
void mi_update_status(void* param)
{
MI_INFO *info=(MI_INFO*) param;
+ DBUG_ENTER("mi_update_status");
/*
Because someone may have closed the table we point at, we only
update the state if its our own state. This isn't a problem as
@@ -336,20 +343,32 @@ void mi_update_status(void* param)
}
info->opt_flag&= ~WRITE_CACHE_USED;
}
+ DBUG_VOID_RETURN;
}
void mi_restore_status(void *param)
{
MI_INFO *info= (MI_INFO*) param;
+ DBUG_ENTER("mi_restore_status");
+ DBUG_PRINT("info",("key_file: %ld data_file: %ld",
+ (long) info->s->state.state.key_file_length,
+ (long) info->s->state.state.data_file_length));
info->state= &info->s->state.state;
info->append_insert_at_end= 0;
+ DBUG_VOID_RETURN;
}
void mi_copy_status(void* to,void *from)
{
- ((MI_INFO*) to)->state= &((MI_INFO*) from)->save_state;
+ MI_INFO *info= (MI_INFO*) to;
+ DBUG_ENTER("mi_copy_status");
+ info->state= &((MI_INFO*) from)->save_state;
+ DBUG_PRINT("info",("key_file: %ld data_file: %ld",
+ (long) info->state->key_file_length,
+ (long) info->state->data_file_length));
+ DBUG_VOID_RETURN;
}
@@ -377,17 +396,18 @@ void mi_copy_status(void* to,void *from)
my_bool mi_check_status(void *param)
{
MI_INFO *info=(MI_INFO*) param;
+ DBUG_ENTER("mi_check_status");
+ DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
+ (long) info->s->state.dellink, (uint) info->s->r_locks,
+ (uint) info->s->w_locks));
/*
The test for w_locks == 1 is here because this thread has already done an
external lock (in other words: w_locks == 1 means no other threads has
a write lock)
*/
- DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
- (long) info->s->state.dellink, (uint) info->s->r_locks,
- (uint) info->s->w_locks));
- return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
+ DBUG_RETURN((my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
(myisam_concurrent_insert == 2 && info->s->r_locks &&
- info->s->w_locks == 1));
+ info->s->w_locks == 1)));
}
@@ -409,10 +429,10 @@ int _mi_readinfo(register MI_INFO *info, int lock_type, int check_keybuffer)
DBUG_RETURN(1);
if (mi_state_info_read_dsk(share->kfile, &share->state, 1))
{
- int error=my_errno ? my_errno : -1;
+ int error= my_errno ? my_errno : HA_ERR_FILE_TOO_SHORT;
VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,
MYF(MY_SEEK_NOT_DONE)));
- my_errno=error;
+ my_errno= error;
DBUG_RETURN(1);
}
}
diff --git a/storage/myisam/mi_log.c b/storage/myisam/mi_log.c
index 8b9ca038fec..982ba8b4367 100644
--- a/storage/myisam/mi_log.c
+++ b/storage/myisam/mi_log.c
@@ -133,7 +133,7 @@ void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info,
if (!info->s->base.blobs)
length=info->s->base.reclength;
else
- length=info->s->base.reclength+ _my_calc_total_blob_length(info,record);
+ length=info->s->base.reclength+ _mi_calc_total_blob_length(info,record);
buff[0]=(uchar) command;
mi_int2store(buff+1,info->dfile);
mi_int4store(buff+3,pid);
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index a4f6e1291db..0c0e5261cc1 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -82,8 +82,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
uchar *disk_cache, *disk_pos, *end_pos;
MI_INFO info,*m_info,*old_info;
MYISAM_SHARE share_buff,*share;
- ulong rec_per_key_part[MI_MAX_POSSIBLE_KEY*MI_MAX_KEY_SEG];
- my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
+ ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG];
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
ulonglong max_key_file_length, max_data_file_length;
DBUG_ENTER("mi_open");
@@ -105,7 +105,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
share_buff.state.key_root=key_root;
share_buff.state.key_del=key_del;
share_buff.key_cache= multi_key_cache_search((uchar*) name_buff,
- strlen(name_buff));
+ strlen(name_buff),
+ dflt_key_cache);
DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open",
if (strstr(name, "/t1"))
@@ -143,7 +144,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
- HA_OPTION_RELIES_ON_SQL_LAYER))
+ HA_OPTION_RELIES_ON_SQL_LAYER | HA_OPTION_NULL_FIELDS))
{
DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
my_errno=HA_ERR_OLD_FILE;
@@ -179,7 +180,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
{
if ((lock_error=my_lock(kfile,F_RDLCK,0L,F_TO_EOF,
MYF(open_flags & HA_OPEN_WAIT_IF_LOCKED ?
- 0 : MY_DONT_WAIT))) &&
+ 0 : MY_SHORT_WAIT))) &&
!(open_flags & HA_OPEN_IGNORE_IF_LOCKED))
goto err;
}
@@ -210,7 +211,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d",
len,MI_BASE_INFO_SIZE));
}
- disk_pos= my_n_base_info_read(disk_cache + base_pos, &share->base);
+ disk_pos= mi_n_base_info_read(disk_cache + base_pos, &share->base);
share->state.state_length=base_pos;
if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
@@ -235,8 +236,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
key_parts+=fulltext_keys*FT_SEGS;
- if (share->base.max_key_length > MI_MAX_KEY_BUFF || keys > MI_MAX_KEY ||
- key_parts > MI_MAX_KEY * MI_MAX_KEY_SEG)
+ if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MI_MAX_KEY ||
+ key_parts > MI_MAX_KEY * HA_MAX_KEY_SEG)
{
DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts));
my_errno=HA_ERR_UNSUPPORTED;
@@ -452,7 +453,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
if (share->rec[i].type == (int) FIELD_BLOB)
{
share->blobs[j].pack_length=
- share->rec[i].length-mi_portable_sizeof_char_ptr;;
+ share->rec[i].length-portable_sizeof_char_ptr;;
share->blobs[j].offset=offset;
j++;
}
@@ -739,12 +740,14 @@ void mi_setup_functions(register MYISAM_SHARE *share)
{
share->read_record=_mi_read_pack_record;
share->read_rnd=_mi_read_rnd_pack_record;
- if (!(share->options & HA_OPTION_TEMP_COMPRESS_RECORD))
- share->calc_checksum=0; /* No checksum */
- else if (share->options & HA_OPTION_PACK_RECORD)
+ if ((share->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_NULL_FIELDS)))
share->calc_checksum= mi_checksum;
else
share->calc_checksum= mi_static_checksum;
+ share->calc_check_checksum= share->calc_checksum;
+ if (!(share->options & HA_OPTION_TEMP_COMPRESS_RECORD))
+ share->calc_checksum=0; /* No checksum */
}
else if (share->options & HA_OPTION_PACK_RECORD)
{
@@ -754,6 +757,7 @@ void mi_setup_functions(register MYISAM_SHARE *share)
share->compare_record=_mi_cmp_dynamic_record;
share->compare_unique=_mi_cmp_dynamic_unique;
share->calc_checksum= mi_checksum;
+ share->calc_check_checksum= share->calc_checksum;
/* add bits used to pack data to pack_reclength for faster allocation */
share->base.pack_reclength+= share->base.pack_bits;
@@ -777,7 +781,11 @@ void mi_setup_functions(register MYISAM_SHARE *share)
share->update_record=_mi_update_static_record;
share->write_record=_mi_write_static_record;
share->compare_unique=_mi_cmp_static_unique;
- share->calc_checksum= mi_static_checksum;
+ if (share->options & HA_OPTION_NULL_FIELDS)
+ share->calc_checksum= mi_checksum;
+ else
+ share->calc_checksum= mi_static_checksum;
+ share->calc_check_checksum= share->calc_checksum;
}
share->file_read= mi_nommap_pread;
share->file_write= mi_nommap_pwrite;
@@ -1020,7 +1028,7 @@ uint mi_base_info_write(File file, MI_BASE_INFO *base)
}
-uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base)
+uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base)
{
base->keystart = mi_sizekorr(ptr); ptr +=8;
base->max_data_file_length = mi_sizekorr(ptr); ptr +=8;
diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c
index 59c98c978ce..387b82de5ee 100644
--- a/storage/myisam/mi_packrec.c
+++ b/storage/myisam/mi_packrec.c
@@ -105,6 +105,7 @@ static void init_bit_buffer(MI_BIT_BUFF *bit_buff,uchar *buffer,uint length);
static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count);
static void fill_buffer(MI_BIT_BUFF *bit_buff);
static uint max_bit(uint value);
+static uint read_pack_length(uint version, const uchar *buf, ulong *length);
#ifdef HAVE_MMAP
static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
MI_BLOCK_INFO *info, uchar **rec_buff_p,
@@ -1035,7 +1036,7 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
else
{
ulong length=get_bits(bit_buff,rec->space_length_bits);
- uint pack_length=(uint) (end-to)-mi_portable_sizeof_char_ptr;
+ uint pack_length=(uint) (end-to)-portable_sizeof_char_ptr;
if (bit_buff->blob_pos+length > bit_buff->blob_end)
{
bit_buff->error=1;
@@ -1043,7 +1044,7 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
return;
}
decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length);
- _my_store_blob_length((uchar*) to,pack_length,length);
+ _mi_store_blob_length((uchar*) to,pack_length,length);
memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos,
sizeof(char*));
bit_buff->blob_pos+=length;
@@ -1624,7 +1625,7 @@ uint save_pack_length(uint version, uchar *block_buff, ulong length)
}
-uint read_pack_length(uint version, const uchar *buf, ulong *length)
+static uint read_pack_length(uint version, const uchar *buf, ulong *length)
{
if (buf[0] < 254)
{
diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c
index 932a4abd1b3..8bd122c828a 100644
--- a/storage/myisam/mi_range.c
+++ b/storage/myisam/mi_range.c
@@ -260,7 +260,7 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
uchar *keypos, uint *ret_max_key)
{
uint nod_flag,keynr,max_key;
- uchar t_buff[MI_MAX_KEY_BUFF],*end;
+ uchar t_buff[HA_MAX_KEY_BUFF],*end;
end= page+mi_getint(page);
nod_flag=mi_test_if_nod(page);
diff --git a/storage/myisam/mi_rkey.c b/storage/myisam/mi_rkey.c
index f1d35810d36..f20b0366683 100644
--- a/storage/myisam/mi_rkey.c
+++ b/storage/myisam/mi_rkey.c
@@ -85,6 +85,8 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
{
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
+ if (share->concurrent_insert)
+ rw_unlock(&share->key_root_lock[inx]);
goto err;
}
break;
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 2195ac178dd..f4cac27a43f 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -60,7 +60,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
int error,flag;
uint nod_flag;
uchar *keypos,*maxpos;
- uchar lastkey[MI_MAX_KEY_BUFF],*buff;
+ uchar lastkey[HA_MAX_KEY_BUFF],*buff;
DBUG_ENTER("_mi_search");
DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu",
(ulong) pos, nextflag, (ulong) info->lastpos));
@@ -242,7 +242,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
{
int flag;
uint nod_flag,length,not_used[2];
- uchar t_buff[MI_MAX_KEY_BUFF],*end;
+ uchar t_buff[HA_MAX_KEY_BUFF],*end;
DBUG_ENTER("_mi_seq_search");
LINT_INIT(flag); LINT_INIT(length);
@@ -296,7 +296,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
int key_len_skip, seg_len_pack, key_len_left;
uchar *end, *kseg, *vseg;
uchar *sort_order=keyinfo->seg->charset->sort_order;
- uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
+ uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
uchar *saved_from, *saved_to, *saved_vseg;
uint saved_length=0, saved_prefix_len=0;
uint length_pack;
@@ -920,7 +920,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
DBUG_ENTER("_mi_get_binary_pack_key");
page= *page_pos;
- page_end=page+MI_MAX_KEY_BUFF+1;
+ page_end=page+HA_MAX_KEY_BUFF+1;
start_key=key;
/*
@@ -1238,7 +1238,7 @@ int _mi_search_next(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{
int error;
uint nod_flag;
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
DBUG_ENTER("_mi_search_next");
DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu",
nextflag, (ulong) info->lastpos,
diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c
index a68bcbed56c..8e491823939 100644
--- a/storage/myisam/mi_test1.c
+++ b/storage/myisam/mi_test1.c
@@ -71,14 +71,16 @@ static int run_test(const char *filename)
/* First define 2 columns */
recinfo[0].type=FIELD_NORMAL; recinfo[0].length=1; /* For NULL bits */
recinfo[1].type=key_field;
- recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr :
+ recinfo[1].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr :
key_length);
if (key_field == FIELD_VARCHAR)
recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);;
recinfo[2].type=extra_field;
- recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr : 24);
+ recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr : 24);
if (extra_field == FIELD_VARCHAR)
recinfo[2].length+= HA_VARCHAR_PACKLENGTH(recinfo[2].length);
+ recinfo[1].null_bit= null_fields ? 2 : 0;
+
if (opt_unique)
{
recinfo[3].type=FIELD_CHECK;
@@ -630,7 +632,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
key_type= HA_KEYTYPE_VARTEXT1;
break;
case 'k':
- if (key_length < 4 || key_length > MI_MAX_KEY_LENGTH)
+ if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH)
{
fprintf(stderr,"Wrong key length\n");
exit(1);
diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c
index 902801b5e6e..6fb71feb1e7 100644
--- a/storage/myisam/mi_test2.c
+++ b/storage/myisam/mi_test2.c
@@ -26,6 +26,7 @@
#endif
#include "myisamdef.h"
#include <m_ctype.h>
+#include <my_bit.h>
#define STANDARD_LENGTH 37
#define MYISAM_KEYS 6
@@ -187,7 +188,7 @@ int main(int argc, char *argv[])
if (use_blob)
{
recinfo[6].type=FIELD_BLOB;
- recinfo[6].length=4+mi_portable_sizeof_char_ptr;
+ recinfo[6].length=4+portable_sizeof_char_ptr;
recinfo[6].null_bit=0;
recinfo[6].null_pos=0;
}
@@ -605,7 +606,7 @@ int main(int argc, char *argv[])
if (mi_rsame(file,read_record2,(int) i)) goto err;
if (bcmp(read_record,read_record2,reclength) != 0)
{
- printf("is_rsame didn't find same record\n");
+ printf("mi_rsame didn't find same record\n");
goto end;
}
}
@@ -656,10 +657,10 @@ int main(int argc, char *argv[])
sprintf((char*) key2,"%6d",k);
min_key.key= key;
- min_key.length= USE_WHOLE_KEY;
+ min_key.keypart_map= HA_WHOLE_KEY;
min_key.flag= HA_READ_AFTER_KEY;
max_key.key= key2;
- max_key.length= USE_WHOLE_KEY;
+ max_key.keypart_map= HA_WHOLE_KEY;
max_key.flag= HA_READ_BEFORE_KEY;
range_records= mi_records_in_range(file, 0, &min_key, &max_key);
records=0;
@@ -779,8 +780,7 @@ int main(int argc, char *argv[])
{
ulong blob_length,pos;
uchar *ptr;
- longget(blob_length,read_record+blob_pos+4);
- ptr=(uchar*) blob_length;
+ memcpy_fixed(&ptr, read_record+blob_pos+4, sizeof(ptr));
longget(blob_length,read_record+blob_pos);
for (pos=0 ; pos < blob_length ; pos++)
{
diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c
index e490fb683e4..02fcd9289dd 100644
--- a/storage/myisam/mi_unique.c
+++ b/storage/myisam/mi_unique.c
@@ -212,7 +212,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const uchar *a, const uchar *b,
if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
type == HA_KEYTYPE_VARTEXT2)
{
- if (mi_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
+ if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
(uchar *) pos_b, b_length, 0, 1))
return 1;
}
diff --git a/storage/myisam/mi_update.c b/storage/myisam/mi_update.c
index 956334b7806..dc6a1659931 100644
--- a/storage/myisam/mi_update.c
+++ b/storage/myisam/mi_update.c
@@ -23,7 +23,7 @@ int mi_update(register MI_INFO *info, const uchar *oldrec, uchar *newrec)
int flag,key_changed,save_errno;
reg3 my_off_t pos;
uint i;
- uchar old_key[MI_MAX_KEY_BUFF],*new_key;
+ uchar old_key[HA_MAX_KEY_BUFF],*new_key;
bool auto_key_changed=0;
ulonglong changed;
MYISAM_SHARE *share=info->s;
diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c
index 70ba7a4588a..05372ad12e8 100644
--- a/storage/myisam/mi_write.c
+++ b/storage/myisam/mi_write.c
@@ -346,7 +346,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
int error,flag;
uint nod_flag, search_key_length;
uchar *temp_buff,*keypos;
- uchar keybuff[MI_MAX_KEY_BUFF];
+ uchar keybuff[HA_MAX_KEY_BUFF];
my_bool was_last_key;
my_off_t next_page, dupp_key_pos;
DBUG_ENTER("w_search");
@@ -354,7 +354,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY;
if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
DBUG_RETURN(-1);
if (!_mi_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0))
goto err;
@@ -545,7 +545,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
- mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0)
+ ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0)
{
/* yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
@@ -707,7 +707,7 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page,
{
uint keys,length,last_length,key_ref_length;
uchar *end,*lastpos,*prevpos;
- uchar key_buff[MI_MAX_KEY_BUFF];
+ uchar key_buff[HA_MAX_KEY_BUFF];
DBUG_ENTER("_mi_find_last_pos");
key_ref_length=2;
@@ -764,7 +764,7 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo,
length,keys;
uchar *pos,*buff,*extra_buff;
my_off_t next_page,new_pos;
- uchar tmp_part_key[MI_MAX_KEY_BUFF];
+ uchar tmp_part_key[HA_MAX_KEY_BUFF];
DBUG_ENTER("_mi_balance_page");
k_length=keyinfo->keylength;
@@ -930,7 +930,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param)
Probably I can use info->lastkey here, but I'm not sure,
and to be safe I'd better use local lastkey.
*/
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
uint keylen;
MI_KEYDEF *keyinfo;
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index 2300d1e7c23..9f86f14d2ba 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -16,10 +16,10 @@
/* Describe, check and repair of MyISAM tables */
#include "fulltext.h"
-
#include <m_ctype.h>
#include <stdarg.h>
#include <my_getopt.h>
+#include <my_bit.h>
#ifdef HAVE_SYS_VADVICE_H
#include <sys/vadvise.h>
#endif
@@ -40,7 +40,6 @@ static const char *set_collation_name, *opt_tmpdir;
static CHARSET_INFO *set_collation;
static long opt_myisam_block_size;
static long opt_key_cache_block_size;
-static const char *my_progname_short;
static int stopwords_inited= 0;
static MY_TMPDIR myisamchk_tmpdir;
@@ -67,9 +66,9 @@ static const char *myisam_stats_method_str="nulls_unequal";
static void get_options(int *argc,char * * *argv);
static void print_version(void);
static void usage(void);
-static int myisamchk(MI_CHECK *param, char *filename);
-static void descript(MI_CHECK *param, register MI_INFO *info, char * name);
-static int mi_sort_records(MI_CHECK *param, register MI_INFO *info,
+static int myisamchk(HA_CHECK *param, char *filename);
+static void descript(HA_CHECK *param, register MI_INFO *info, char * name);
+static int mi_sort_records(HA_CHECK *param, register MI_INFO *info,
char * name, uint sort_key,
my_bool write_info, my_bool update_index);
static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info,
@@ -77,7 +76,7 @@ static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info,
my_off_t page,uchar *buff,uint sortkey,
File new_file, my_bool update_index);
-MI_CHECK check_param;
+HA_CHECK check_param;
/* Main program */
@@ -85,7 +84,6 @@ int main(int argc, char **argv)
{
int error;
MY_INIT(argv[0]);
- my_progname_short= my_progname+dirname_length(my_progname);
myisamchk_init(&check_param);
check_param.opt_lock_memory=1; /* Lock memory if possible */
@@ -695,7 +693,7 @@ get_one_option(int optid,
case OPT_STATS_METHOD:
{
int method;
- enum_mi_stats_method method_conv;
+ enum_handler_stats_method method_conv;
LINT_INIT(method_conv);
myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
@@ -794,7 +792,7 @@ static void get_options(register int *argc,register char ***argv)
/* Check table */
-static int myisamchk(MI_CHECK *param, char * filename)
+static int myisamchk(HA_CHECK *param, char * filename)
{
int error,lock_type,recreate;
int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS);
@@ -1199,7 +1197,7 @@ end2:
/* Write info about table */
-static void descript(MI_CHECK *param, register MI_INFO *info, char * name)
+static void descript(HA_CHECK *param, register MI_INFO *info, char * name)
{
uint key,keyseg_nr,field,start;
reg3 MI_KEYDEF *keyinfo;
@@ -1464,7 +1462,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, char * name)
/* Sort records according to one key */
-static int mi_sort_records(MI_CHECK *param,
+static int mi_sort_records(HA_CHECK *param,
register MI_INFO *info, char * name,
uint sort_key,
my_bool write_info,
@@ -1478,7 +1476,7 @@ static int mi_sort_records(MI_CHECK *param,
ha_rows old_record_count;
MYISAM_SHARE *share=info->s;
char llbuff[22],llbuff2[22];
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
MI_SORT_PARAM sort_param;
DBUG_ENTER("sort_records");
@@ -1654,10 +1652,10 @@ static int sort_record_index(MI_SORT_PARAM *sort_param,MI_INFO *info,
uint nod_flag,used_length,key_length;
uchar *temp_buff,*keypos,*endpos;
my_off_t next_page,rec_pos;
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
char llbuff[22];
- SORT_INFO *sort_info= sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
DBUG_ENTER("sort_record_index");
nod_flag=mi_test_if_nod(buff);
@@ -1745,7 +1743,7 @@ err:
static int not_killed= 0;
-volatile int *killed_ptr(MI_CHECK *param __attribute__((unused)))
+volatile int *killed_ptr(HA_CHECK *param __attribute__((unused)))
{
return &not_killed; /* always NULL */
}
@@ -1753,7 +1751,7 @@ volatile int *killed_ptr(MI_CHECK *param __attribute__((unused)))
/* print warnings and errors */
/* VARARGS */
-void mi_check_print_info(MI_CHECK *param __attribute__((unused)),
+void mi_check_print_info(HA_CHECK *param __attribute__((unused)),
const char *fmt,...)
{
va_list args;
@@ -1766,7 +1764,7 @@ void mi_check_print_info(MI_CHECK *param __attribute__((unused)),
/* VARARGS */
-void mi_check_print_warning(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_warning(HA_CHECK *param, const char *fmt,...)
{
va_list args;
DBUG_ENTER("mi_check_print_warning");
@@ -1791,7 +1789,7 @@ void mi_check_print_warning(MI_CHECK *param, const char *fmt,...)
/* VARARGS */
-void mi_check_print_error(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_error(HA_CHECK *param, const char *fmt,...)
{
va_list args;
DBUG_ENTER("mi_check_print_error");
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index 721d6b9f271..64c1a8214c3 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -15,8 +15,8 @@
/* This file is included by all internal myisam files */
-#include "myisam.h" /* Structs & some defines */
-#include "myisampack.h" /* packing of keys */
+#include "myisam.h" /* Structs & some defines */
+#include "myisampack.h" /* packing of keys */
#include <my_tree.h>
#ifdef THREAD
#include <my_pthread.h>
@@ -26,15 +26,16 @@
#endif
#if defined(my_write) && !defined(MAP_TO_USE_RAID)
-#undef my_write /* undef map from my_nosys; We need test-if-disk full */
+/* undef map from my_nosys; We need test-if-disk full */
+#undef my_write
#endif
typedef struct st_mi_status_info
{
- ha_rows records; /* Rows in table */
- ha_rows del; /* Removed rows */
- my_off_t empty; /* lost space in datafile */
- my_off_t key_empty; /* lost space in indexfile */
+ ha_rows records; /* Rows in table */
+ ha_rows del; /* Removed rows */
+ my_off_t empty; /* lost space in datafile */
+ my_off_t key_empty; /* lost space in indexfile */
my_off_t key_file_length;
my_off_t data_file_length;
ha_checksum checksum;
@@ -42,347 +43,294 @@ typedef struct st_mi_status_info
typedef struct st_mi_state_info
{
- struct { /* Fileheader */
+ struct
+ { /* Fileheader */
uchar file_version[4];
uchar options[2];
uchar header_length[2];
uchar state_info_length[2];
uchar base_info_length[2];
uchar base_pos[2];
- uchar key_parts[2]; /* Key parts */
- uchar unique_key_parts[2]; /* Key parts + unique parts */
- uchar keys; /* number of keys in file */
- uchar uniques; /* number of UNIQUE definitions */
- uchar language; /* Language for indexes */
- uchar max_block_size_index; /* max keyblock size */
+ uchar key_parts[2]; /* Key parts */
+ uchar unique_key_parts[2]; /* Key parts + unique parts */
+ uchar keys; /* number of keys in file */
+ uchar uniques; /* number of UNIQUE definitions */
+ uchar language; /* Language for indexes */
+ uchar max_block_size_index; /* max keyblock size */
uchar fulltext_keys;
uchar not_used; /* To align to 8 */
} header;
MI_STATUS_INFO state;
- ha_rows split; /* number of split blocks */
- my_off_t dellink; /* Link to next removed block */
+ ha_rows split; /* number of split blocks */
+ my_off_t dellink; /* Link to next removed block */
ulonglong auto_increment;
- ulong process; /* process that updated table last */
- ulong unique; /* Unique number for this process */
- ulong update_count; /* Updated for each write lock */
+ ulong process; /* process that updated table last */
+ ulong unique; /* Unique number for this process */
+ ulong update_count; /* Updated for each write lock */
ulong status;
ulong *rec_per_key_part;
- my_off_t *key_root; /* Start of key trees */
- my_off_t *key_del; /* delete links for trees */
- my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */
-
- ulong sec_index_changed; /* Updated when new sec_index */
- ulong sec_index_used; /* which extra index are in use */
- ulonglong key_map; /* Which keys are in use */
ha_checksum checksum; /* Table checksum */
- ulong version; /* timestamp of create */
- time_t create_time; /* Time when created database */
- time_t recover_time; /* Time for last recover */
- time_t check_time; /* Time for last check */
- uint sortkey; /* sorted by this key (not used) */
+ my_off_t *key_root; /* Start of key trees */
+ my_off_t *key_del; /* delete links for trees */
+ my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */
+
+ ulong sec_index_changed; /* Updated when new sec_index */
+ ulong sec_index_used; /* which extra index are in use */
+ ulonglong key_map; /* Which keys are in use */
+ ulong version; /* timestamp of create */
+ time_t create_time; /* Time when created database */
+ time_t recover_time; /* Time for last recover */
+ time_t check_time; /* Time for last check */
+ uint sortkey; /* sorted by this key (not used) */
uint open_count;
- uint8 changed; /* Changed since myisamchk */
+ uint8 changed; /* Changed since myisamchk */
/* the following isn't saved on disk */
- uint state_diff_length; /* Should be 0 */
- uint state_length; /* Length of state header in file */
+ uint state_diff_length; /* Should be 0 */
+ uint state_length; /* Length of state header in file */
ulong *key_info;
} MI_STATE_INFO;
-#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8)
-#define MI_STATE_KEY_SIZE 8
+#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8)
+#define MI_STATE_KEY_SIZE 8
#define MI_STATE_KEYBLOCK_SIZE 8
-#define MI_STATE_KEYSEG_SIZE 4
-#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*MI_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE)
-#define MI_KEYDEF_SIZE (2+ 5*2)
-#define MI_UNIQUEDEF_SIZE (2+1+1)
-#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
-#define MI_COLUMNDEF_SIZE (2*3+1)
-#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16)
-#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
+#define MI_STATE_KEYSEG_SIZE 4
+#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*HA_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE)
+#define MI_KEYDEF_SIZE (2+ 5*2)
+#define MI_UNIQUEDEF_SIZE (2+1+1)
+#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
+#define MI_COLUMNDEF_SIZE (2*3+1)
+#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16)
+#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
typedef struct st_mi_base_info
{
- my_off_t keystart; /* Start of keys */
+ my_off_t keystart; /* Start of keys */
my_off_t max_data_file_length;
my_off_t max_key_file_length;
my_off_t margin_key_file_length;
- ha_rows records,reloc; /* Create information */
- ulong mean_row_length; /* Create information */
- ulong reclength; /* length of unpacked record */
- ulong pack_reclength; /* Length of full packed rec. */
+ ha_rows records, reloc; /* Create information */
+ ulong mean_row_length; /* Create information */
+ ulong reclength; /* length of unpacked record */
+ ulong pack_reclength; /* Length of full packed rec. */
ulong min_pack_length;
- ulong max_pack_length; /* Max possibly length of packed rec.*/
+ ulong max_pack_length; /* Max possibly length of packed rec.*/
ulong min_block_length;
- ulong fields, /* fields in table */
- pack_fields; /* packed fields in table */
- uint rec_reflength; /* = 2-8 */
- uint key_reflength; /* = 2-8 */
- uint keys; /* same as in state.header */
- uint auto_key; /* Which key-1 is a auto key */
- uint blobs; /* Number of blobs */
- uint pack_bits; /* Length of packed bits */
- uint max_key_block_length; /* Max block length */
- uint max_key_length; /* Max key length */
+ ulong fields, /* fields in table */
+ pack_fields; /* packed fields in table */
+ uint rec_reflength; /* = 2-8 */
+ uint key_reflength; /* = 2-8 */
+ uint keys; /* same as in state.header */
+ uint auto_key; /* Which key-1 is a auto key */
+ uint blobs; /* Number of blobs */
+ uint pack_bits; /* Length of packed bits */
+ uint max_key_block_length; /* Max block length */
+ uint max_key_length; /* Max key length */
/* Extra allocation when using dynamic record format */
uint extra_alloc_bytes;
uint extra_alloc_procent;
/* Info about raid */
- uint raid_type,raid_chunks;
+ uint raid_type, raid_chunks;
ulong raid_chunksize;
/* The following are from the header */
- uint key_parts,all_key_parts;
+ uint key_parts, all_key_parts;
} MI_BASE_INFO;
- /* Structs used intern in database */
+ /* Structs used intern in database */
-typedef struct st_mi_blob /* Info of record */
+typedef struct st_mi_blob /* Info of record */
{
- ulong offset; /* Offset to blob in record */
- uint pack_length; /* Type of packed length */
- ulong length; /* Calc:ed for each record */
+ ulong offset; /* Offset to blob in record */
+ uint pack_length; /* Type of packed length */
+ ulong length; /* Calc:ed for each record */
} MI_BLOB;
-typedef struct st_mi_isam_pack {
+typedef struct st_mi_isam_pack
+{
ulong header_length;
uint ref_length;
uchar version;
} MI_PACK;
-#define MAX_NONMAPPED_INSERTS 1000
+#define MAX_NONMAPPED_INSERTS 1000
-typedef struct st_mi_isam_share { /* Shared between opens */
+typedef struct st_mi_isam_share
+{ /* Shared between opens */
MI_STATE_INFO state;
MI_BASE_INFO base;
- MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */
- MI_KEYDEF *keyinfo; /* Key definitions */
- MI_UNIQUEDEF *uniqueinfo; /* unique definitions */
- HA_KEYSEG *keyparts; /* key part info */
- MI_COLUMNDEF *rec; /* Pointer to field information */
- MI_PACK pack; /* Data about packed records */
- MI_BLOB *blobs; /* Pointer to blobs */
- char *unique_file_name; /* realpath() of index file */
- char *data_file_name, /* Resolved path names from symlinks */
- *index_file_name;
- uchar *file_map; /* mem-map of file if possible */
- KEY_CACHE *key_cache; /* ref to the current key cache */
+ MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */
+ MI_KEYDEF *keyinfo; /* Key definitions */
+ MI_UNIQUEDEF *uniqueinfo; /* unique definitions */
+ HA_KEYSEG *keyparts; /* key part info */
+ MI_COLUMNDEF *rec; /* Pointer to field information */
+ MI_PACK pack; /* Data about packed records */
+ MI_BLOB *blobs; /* Pointer to blobs */
+ char *unique_file_name; /* realpath() of index file */
+ char *data_file_name, /* Resolved path names from symlinks */
+ *index_file_name;
+ uchar *file_map; /* mem-map of file if possible */
+ KEY_CACHE *key_cache; /* ref to the current key cache */
MI_DECODE_TREE *decode_trees;
uint16 *decode_tables;
- int (*read_record)(struct st_myisam_info*, my_off_t, uchar*);
- int (*write_record)(struct st_myisam_info*, const uchar*);
- int (*update_record)(struct st_myisam_info*, my_off_t, const uchar*);
- int (*delete_record)(struct st_myisam_info*);
- int (*read_rnd)(struct st_myisam_info*, uchar*, my_off_t, my_bool);
- int (*compare_record)(struct st_myisam_info*, const uchar *);
/* Function to use for a row checksum. */
- ha_checksum (*calc_checksum)(struct st_myisam_info*, const uchar *);
- int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *,
- const uchar *record, my_off_t pos);
- size_t (*file_read)(MI_INFO *, uchar *, size_t, my_off_t, myf);
- size_t (*file_write)(MI_INFO *, const uchar *, size_t, my_off_t, myf);
+ int(*read_record) (struct st_myisam_info *, my_off_t, uchar*);
+ int(*write_record) (struct st_myisam_info *, const uchar*);
+ int(*update_record) (struct st_myisam_info *, my_off_t, const uchar*);
+ int(*delete_record) (struct st_myisam_info *);
+ int(*read_rnd) (struct st_myisam_info *, uchar*, my_off_t, my_bool);
+ int(*compare_record) (struct st_myisam_info *, const uchar*);
+ ha_checksum(*calc_checksum) (struct st_myisam_info *, const uchar*);
+ /* calculate checksum for a row during check table */
+ ha_checksum(*calc_check_checksum)(struct st_myisam_info *, const uchar *);
+ int(*compare_unique) (struct st_myisam_info *, MI_UNIQUEDEF *,
+ const uchar *record, my_off_t pos);
+ size_t (*file_read) (MI_INFO *, uchar *, size_t, my_off_t, myf);
+ size_t (*file_write) (MI_INFO *, const uchar *, size_t, my_off_t, myf);
invalidator_by_filename invalidator; /* query cache invalidator */
- ulong this_process; /* processid */
- ulong last_process; /* For table-change-check */
- ulong last_version; /* Version on start */
- ulong options; /* Options used */
- ulong min_pack_length; /* Theese are used by packed data */
+ ulong this_process; /* processid */
+ ulong last_process; /* For table-change-check */
+ ulong last_version; /* Version on start */
+ ulong options; /* Options used */
+ ulong min_pack_length; /* Theese are used by packed data */
ulong max_pack_length;
ulong state_diff_length;
- uint rec_reflength; /* rec_reflength in use now */
- uint unique_name_length;
+ uint rec_reflength; /* rec_reflength in use now */
+ uint unique_name_length;
uint32 ftparsers; /* Number of distinct ftparsers + 1 */
- File kfile; /* Shared keyfile */
- File data_file; /* Shared data file */
- int mode; /* mode of file on open */
- uint reopen; /* How many times reopened */
- uint w_locks,r_locks,tot_locks; /* Number of read/write locks */
- uint blocksize; /* blocksize of keyfile */
+ File kfile; /* Shared keyfile */
+ File data_file; /* Shared data file */
+ int mode; /* mode of file on open */
+ uint reopen; /* How many times reopened */
+ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */
+ uint blocksize; /* blocksize of keyfile */
myf write_flag;
enum data_file_type data_file_type;
/* Below flag is needed to make log tables work with concurrent insert */
my_bool is_log_table;
- my_bool changed, /* If changed since lock */
- global_changed, /* If changed since open */
- not_flushed,
- temporary,delay_key_write,
- concurrent_insert;
+ my_bool changed, /* If changed since lock */
+ global_changed, /* If changed since open */
+ not_flushed, temporary, delay_key_write, concurrent_insert;
#ifdef THREAD
THR_LOCK lock;
- pthread_mutex_t intern_lock; /* Locking for use with _locking */
+ pthread_mutex_t intern_lock; /* Locking for use with _locking */
rw_lock_t *key_root_lock;
#endif
my_off_t mmaped_length;
- uint nonmmaped_inserts; /* counter of writing in non-mmaped
- area */
+ /* counter of writing in non-mmaped area */
+ uint nonmmaped_inserts;
rw_lock_t mmap_lock;
} MYISAM_SHARE;
-typedef uint mi_bit_type;
-
-typedef struct st_mi_bit_buff { /* Used for packing of record */
- mi_bit_type current_byte;
- uint bits;
- uchar *pos,*end,*blob_pos,*blob_end;
- uint error;
-} MI_BIT_BUFF;
-
-struct st_myisam_info {
- MYISAM_SHARE *s; /* Shared between open:s */
- MI_STATUS_INFO *state,save_state;
- MI_BLOB *blobs; /* Pointer to blobs */
- MI_BIT_BUFF bit_buff;
+struct st_myisam_info
+{
+ MYISAM_SHARE *s; /* Shared between open:s */
+ MI_STATUS_INFO *state, save_state;
+ MI_BLOB *blobs; /* Pointer to blobs */
+ MI_BIT_BUFF bit_buff;
/* accumulate indexfile changes between write's */
- TREE *bulk_insert;
+ TREE *bulk_insert;
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
MEM_ROOT ft_memroot; /* used by the parser */
- MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
- char *filename; /* parameter to open filename */
- uchar *buff, /* Temp area for key */
- *lastkey,*lastkey2; /* Last used search key */
- uchar *first_mbr_key; /* Searhed spatial key */
- uchar *rec_buff; /* Tempbuff for recordpack */
- uchar *int_keypos, /* Save position for next/previous */
- *int_maxpos; /* -""- */
- uint int_nod_flag; /* -""- */
- uint32 int_keytree_version; /* -""- */
- int (*read_record)(struct st_myisam_info*, my_off_t, uchar*);
+ MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ char *filename; /* parameter to open filename */
+ uchar *buff, /* Temp area for key */
+ *lastkey, *lastkey2; /* Last used search key */
+ uchar *first_mbr_key; /* Searhed spatial key */
+ uchar *rec_buff; /* Tempbuff for recordpack */
+ uchar *int_keypos, /* Save position for next/previous */
+ *int_maxpos; /* -""- */
+ uint int_nod_flag; /* -""- */
+ uint32 int_keytree_version; /* -""- */
+ int(*read_record) (struct st_myisam_info *, my_off_t, uchar *);
invalidator_by_filename invalidator; /* query cache invalidator */
- ulong this_unique; /* uniq filenumber or thread */
- ulong last_unique; /* last unique number */
- ulong this_loop; /* counter for this open */
- ulong last_loop; /* last used counter */
- my_off_t lastpos, /* Last record position */
- nextpos; /* Position to next record */
+ ulong this_unique; /* uniq filenumber or thread */
+ ulong last_unique; /* last unique number */
+ ulong this_loop; /* counter for this open */
+ ulong last_loop; /* last used counter */
+ my_off_t lastpos, /* Last record position */
+ nextpos; /* Position to next record */
my_off_t save_lastpos;
- my_off_t pos; /* Intern variable */
- my_off_t last_keypage; /* Last key page read */
- my_off_t last_search_keypage; /* Last keypage when searching */
+ my_off_t pos; /* Intern variable */
+ my_off_t last_keypage; /* Last key page read */
+ my_off_t last_search_keypage; /* Last keypage when searching */
my_off_t dupp_key_pos;
ha_checksum checksum; /* Temp storage for row checksum */
- /* QQ: the folloing two xxx_length fields should be removed,
- as they are not compatible with parallel repair */
- ulong packed_length,blob_length; /* Length of found, packed record */
- int dfile; /* The datafile */
- uint opt_flag; /* Optim. for space/speed */
- uint update; /* If file changed since open */
- int lastinx; /* Last used index */
- uint lastkey_length; /* Length of key in lastkey */
- uint last_rkey_length; /* Last length in mi_rkey() */
+ /*
+ QQ: the folloing two xxx_length fields should be removed,
+ as they are not compatible with parallel repair
+ */
+ ulong packed_length, blob_length; /* Length of found, packed record */
+ int dfile; /* The datafile */
+ uint opt_flag; /* Optim. for space/speed */
+ uint update; /* If file changed since open */
+ int lastinx; /* Last used index */
+ uint lastkey_length; /* Length of key in lastkey */
+ uint last_rkey_length; /* Last length in mi_rkey() */
enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */
- uint save_lastkey_length;
- uint pack_key_length; /* For MYISAMMRG */
+ uint save_lastkey_length;
+ uint pack_key_length; /* For MYISAMMRG */
uint16 last_used_keyseg; /* For MyISAMMRG */
- int errkey; /* Got last error on this key */
- int lock_type; /* How database was locked */
- int tmp_lock_type; /* When locked by readinfo */
- uint data_changed; /* Somebody has changed data */
- uint save_update; /* When using KEY_READ */
- int save_lastinx;
- LIST open_list;
- IO_CACHE rec_cache; /* When cacheing records */
- uint preload_buff_size; /* When preloading indexes */
- myf lock_wait; /* is 0 or MY_DONT_WAIT */
- my_bool was_locked; /* Was locked in panic */
- my_bool append_insert_at_end; /* Set if concurrent insert */
+ int errkey; /* Got last error on this key */
+ int lock_type; /* How database was locked */
+ int tmp_lock_type; /* When locked by readinfo */
+ uint data_changed; /* Somebody has changed data */
+ uint save_update; /* When using KEY_READ */
+ int save_lastinx;
+ LIST open_list;
+ IO_CACHE rec_cache; /* When cacheing records */
+ uint preload_buff_size; /* When preloading indexes */
+ myf lock_wait; /* is 0 or MY_SHORT_WAIT */
+ my_bool was_locked; /* Was locked in panic */
+ my_bool append_insert_at_end; /* Set if concurrent insert */
my_bool quick_mode;
- my_bool page_changed; /* If info->buff can't be used for rnext */
- my_bool buff_used; /* If info->buff has to be reread for rnext */
- my_bool once_flags; /* For MYISAMMRG */
+ /* If info->buff can't be used for rnext */
+ my_bool page_changed;
+ /* If info->buff has to be reread for rnext */
+ my_bool buff_used;
+ my_bool once_flags; /* For MYISAMMRG */
#ifdef __WIN__
my_bool owned_by_merge; /* This MyISAM table is part of a merge union */
#endif
#ifdef THREAD
THR_LOCK_DATA lock;
#endif
- uchar *rtree_recursion_state; /* For RTREE */
- int rtree_recursion_depth;
+ uchar *rtree_recursion_state; /* For RTREE */
+ int rtree_recursion_depth;
};
-typedef struct st_buffpek {
- my_off_t file_pos; /* Where we are in the sort file */
- uchar *base,*key; /* Key pointers */
- ha_rows count; /* Number of rows in table */
- ulong mem_count; /* numbers of keys in memory */
- ulong max_keys; /* Max keys in buffert */
-} BUFFPEK;
-
-typedef struct st_mi_sort_param
-{
- pthread_t thr;
- IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
- DYNAMIC_ARRAY buffpek;
- MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
-
- /*
- The next two are used to collect statistics, see update_key_parts for
- description.
- */
- ulonglong unique[MI_MAX_KEY_SEG+1];
- ulonglong notnull[MI_MAX_KEY_SEG+1];
-
- my_off_t pos,max_pos,filepos,start_recpos;
- uint key, key_length,real_key_length,sortbuff_size;
- uint maxbuffers, keys, find_length, sort_keys_length;
- my_bool fix_datafile, master;
- my_bool calc_checksum; /* calculate table checksum */
- MI_KEYDEF *keyinfo;
- HA_KEYSEG *seg;
- SORT_INFO *sort_info;
- uchar **sort_keys;
- uchar *rec_buff;
- void *wordlist, *wordptr;
- MEM_ROOT wordroot;
- uchar *record;
- MY_TMPDIR *tmpdir;
- int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *);
- int (*key_read)(struct st_mi_sort_param *,void *);
- int (*key_write)(struct st_mi_sort_param *, const void *);
- void (*lock_in_memory)(MI_CHECK *);
- NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **,
- uint , struct st_buffpek *, IO_CACHE *);
- NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint);
- NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,uchar *,
- uint, uint);
-} MI_SORT_PARAM;
-
- /* Some defines used by isam-funktions */
-
-#define USE_WHOLE_KEY MI_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */
-#define F_EXTRA_LCK -1
-
- /* bits in opt_flag */
-#define MEMMAP_USED 32
+#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */
+#define F_EXTRA_LCK -1
+/* bits in opt_flag */
+#define MEMMAP_USED 32
#define REMEMBER_OLD_POS 64
-#define WRITEINFO_UPDATE_KEYFILE 1
-#define WRITEINFO_NO_UNLOCK 2
+#define WRITEINFO_UPDATE_KEYFILE 1
+#define WRITEINFO_NO_UNLOCK 2
- /* once_flags */
+/* once_flags */
#define USE_PACKED_KEYS 1
#define RRND_PRESERVE_LASTINX 2
- /* bits in state.changed */
-
-#define STATE_CHANGED 1
-#define STATE_CRASHED 2
+/* bits in state.changed */
+#define STATE_CHANGED 1
+#define STATE_CRASHED 2
#define STATE_CRASHED_ON_REPAIR 4
-#define STATE_NOT_ANALYZED 8
+#define STATE_NOT_ANALYZED 8
#define STATE_NOT_OPTIMIZED_KEYS 16
-#define STATE_NOT_SORTED_PAGES 32
-
- /* options to mi_read_cache */
+#define STATE_NOT_SORTED_PAGES 32
-#define READING_NEXT 1
-#define READING_HEADER 2
+/* options to mi_read_cache */
+#define READING_NEXT 1
+#define READING_HEADER 2
-#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767)
+#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767)
#define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\
- mi_int2store(x,boh); }
+ mi_int2store(x,boh); }
#define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0)
#define mi_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \
DBUG_PRINT("error", ("Marked table crashed")); \
@@ -400,13 +348,6 @@ typedef struct st_mi_sort_param
/* Functions to store length of space packed keys, VARCHAR or BLOB keys */
-#define store_key_length_inc(key,length) \
-{ if ((length) < 255) \
- { *(key)++=(length); } \
- else \
- { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \
-}
-
#define store_key_length(key,length) \
{ if ((length) < 255) \
{ *(key)=(length); } \
@@ -430,39 +371,39 @@ typedef struct st_mi_sort_param
#define get_pack_length(length) ((length) >= 255 ? 3 : 1)
-#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */
-#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */
-#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2)
-#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */
+#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */
+#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */
+#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2)
+#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */
#define MI_BLOCK_INFO_HEADER_LENGTH 20
-#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
-#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
-#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH)
-#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */
-#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */
-#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1)))
+#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
+#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
+#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH)
+#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */
+#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */
+#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1)))
#define MI_REC_BUFF_OFFSET ALIGN_SIZE(MI_DYN_DELETE_BLOCK_HEADER+sizeof(uint32))
-#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
+#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
-#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
-#define PACK_TYPE_SPACE_FIELDS 2
-#define PACK_TYPE_ZERO_FILL 4
-#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */
+#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
+#define PACK_TYPE_SPACE_FIELDS 2
+#define PACK_TYPE_ZERO_FILL 4
+#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */
-#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH)
+#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH)
#define MI_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size))
-#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
-#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
+#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
+#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
-#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
+#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
#define MI_MIN_ROWS_TO_USE_BULK_INSERT 100
#define MI_MIN_ROWS_TO_DISABLE_INDEXES 100
#define MI_MIN_ROWS_TO_USE_WRITE_CACHE 10
/* The UNIQUE check is done with a hashed long key */
-#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
+#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
#define mi_unique_store(A,B) mi_int4store((A),(B))
#ifdef THREAD
@@ -474,175 +415,180 @@ extern pthread_mutex_t THR_LOCK_myisam;
#define rw_unlock(A) {}
#endif
- /* Some extern variables */
+/* Some extern variables */
extern LIST *myisam_open_list;
-extern uchar NEAR myisam_file_magic[],NEAR myisam_pack_file_magic[];
-extern uint NEAR myisam_read_vec[],NEAR myisam_readnext_vec[];
+extern uchar NEAR myisam_file_magic[], NEAR myisam_pack_file_magic[];
+extern uint NEAR myisam_read_vec[], NEAR myisam_readnext_vec[];
extern uint myisam_quick_table_bits;
extern File myisam_log_file;
extern ulong myisam_pid;
- /* This is used by _mi_calc_xxx_key_length och _mi_store_key */
+/* This is used by _mi_calc_xxx_key_length och _mi_store_key */
typedef struct st_mi_s_param
{
- uint ref_length,key_length,
- n_ref_length,
- n_length,
- totlength,
- part_of_prev_key,prev_length,pack_marker;
- uchar *key, *prev_key,*next_key_pos;
- bool store_not_null;
+ uint ref_length, key_length,
+ n_ref_length,
+ n_length, totlength, part_of_prev_key, prev_length, pack_marker;
+ uchar *key, *prev_key, *next_key_pos;
+ bool store_not_null;
} MI_KEY_PARAM;
- /* Prototypes for intern functions */
+/* Prototypes for intern functions */
-extern int _mi_read_dynamic_record(MI_INFO *info,my_off_t filepos,uchar *buf);
-extern int _mi_write_dynamic_record(MI_INFO*, const uchar*);
-extern int _mi_update_dynamic_record(MI_INFO*, my_off_t, const uchar*);
+extern int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *buf);
+extern int _mi_write_dynamic_record(MI_INFO *, const uchar *);
+extern int _mi_update_dynamic_record(MI_INFO *, my_off_t, const uchar *);
extern int _mi_delete_dynamic_record(MI_INFO *info);
-extern int _mi_cmp_dynamic_record(MI_INFO *info,const uchar *record);
-extern int _mi_read_rnd_dynamic_record(MI_INFO *, uchar *,my_off_t, my_bool);
-extern int _mi_write_blob_record(MI_INFO*, const uchar*);
-extern int _mi_update_blob_record(MI_INFO*, my_off_t, const uchar*);
-extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos,uchar *buf);
-extern int _mi_write_static_record(MI_INFO*, const uchar*);
-extern int _mi_update_static_record(MI_INFO*, my_off_t, const uchar*);
+extern int _mi_cmp_dynamic_record(MI_INFO *info, const uchar *record);
+extern int _mi_read_rnd_dynamic_record(MI_INFO *, uchar *, my_off_t, my_bool);
+extern int _mi_write_blob_record(MI_INFO *, const uchar *);
+extern int _mi_update_blob_record(MI_INFO *, my_off_t, const uchar *);
+extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos, uchar *buf);
+extern int _mi_write_static_record(MI_INFO *, const uchar *);
+extern int _mi_update_static_record(MI_INFO *, my_off_t, const uchar *);
extern int _mi_delete_static_record(MI_INFO *info);
-extern int _mi_cmp_static_record(MI_INFO *info,const uchar *record);
-extern int _mi_read_rnd_static_record(MI_INFO*, uchar *,my_off_t, my_bool);
-extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length);
+extern int _mi_cmp_static_record(MI_INFO *info, const uchar *record);
+extern int _mi_read_rnd_static_record(MI_INFO *, uchar *, my_off_t, my_bool);
+extern int _mi_ck_write(MI_INFO *info, uint keynr, uchar *key, uint length);
extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
uchar *key, uint key_length,
my_off_t *root, uint comp_flag);
-extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root);
-extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
- uchar *anc_buff,uchar *key_pos,uchar *key_buff,
- uchar *father_buff, uchar *father_keypos,
- my_off_t father_page, my_bool insert_last);
-extern int _mi_split_page(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
- uchar *buff,uchar *key_buff, my_bool insert_last);
-extern uchar *_mi_find_half_pos(uint nod_flag,MI_KEYDEF *keyinfo,uchar *page,
- uchar *key,uint *return_key_length,
- uchar **after_key);
-extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos, uchar *org_key,
- uchar *key_buff,
- uchar *key, MI_KEY_PARAM *s_temp);
-extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos, uchar *org_key,
- uchar *key_buff,
- uchar *key, MI_KEY_PARAM *s_temp);
-extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos, uchar *org_key,
- uchar *prev_key,
- uchar *key, MI_KEY_PARAM *s_temp);
-extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos,uchar *org_key,
- uchar *prev_key,
- uchar *key, MI_KEY_PARAM *s_temp);
-void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
-void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
+extern int _mi_enlarge_root(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ my_off_t *root);
+extern int _mi_insert(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uchar *anc_buff, uchar *key_pos, uchar *key_buff,
+ uchar *father_buff, uchar *father_keypos,
+ my_off_t father_page, my_bool insert_last);
+extern int _mi_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uchar *buff, uchar *key_buff, my_bool insert_last);
+extern uchar *_mi_find_half_pos(uint nod_flag, MI_KEYDEF *keyinfo,
+ uchar *page, uchar *key,
+ uint *return_key_length, uchar ** after_key);
+extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *key_buff, uchar *key,
+ MI_KEY_PARAM *s_temp);
+extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *key_buff, uchar *key,
+ MI_KEY_PARAM *s_temp);
+extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *prev_key, uchar *key,
+ MI_KEY_PARAM *s_temp);
+extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *prev_key, uchar *key,
+ MI_KEY_PARAM *s_temp);
+void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
+void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
#ifdef NOT_USED
-void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
+void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
#endif
-void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
+void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
-extern int _mi_ck_delete(MI_INFO *info,uint keynr,uchar *key,uint key_length);
-extern int _mi_readinfo(MI_INFO *info,int lock_flag,int check_keybuffer);
-extern int _mi_writeinfo(MI_INFO *info,uint options);
+extern int _mi_ck_delete(MI_INFO *info, uint keynr, uchar *key,
+ uint key_length);
+extern int _mi_readinfo(MI_INFO *info, int lock_flag, int check_keybuffer);
+extern int _mi_writeinfo(MI_INFO *info, uint options);
extern int _mi_test_if_changed(MI_INFO *info);
extern int _mi_mark_file_changed(MI_INFO *info);
extern int _mi_decrement_open_count(MI_INFO *info);
-extern int _mi_check_index(MI_INFO *info,int inx);
-extern int _mi_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,uint key_len,
- uint nextflag,my_off_t pos);
-extern int _mi_bin_search(struct st_myisam_info *info,MI_KEYDEF *keyinfo,
- uchar *page,uchar *key,uint key_len,uint comp_flag,
- uchar * *ret_pos,uchar *buff, my_bool *was_last_key);
-extern int _mi_seq_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page,
- uchar *key,uint key_len,uint comp_flag,
- uchar **ret_pos,uchar *buff, my_bool *was_last_key);
-extern int _mi_prefix_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page,
- uchar *key,uint key_len,uint comp_flag,
- uchar **ret_pos,uchar *buff, my_bool *was_last_key);
-extern my_off_t _mi_kpos(uint nod_flag,uchar *after_key);
-extern void _mi_kpointer(MI_INFO *info,uchar *buff,my_off_t pos);
-extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag,uchar *after_key);
+extern int _mi_check_index(MI_INFO *info, int inx);
+extern int _mi_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uint key_len, uint nextflag, my_off_t pos);
+extern int _mi_bin_search(struct st_myisam_info *info, MI_KEYDEF *keyinfo,
+ uchar *page, uchar *key, uint key_len,
+ uint comp_flag, uchar **ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern int _mi_seq_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
+ uchar *key, uint key_len, uint comp_flag,
+ uchar ** ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern int _mi_prefix_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
+ uchar *key, uint key_len, uint comp_flag,
+ uchar ** ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern my_off_t _mi_kpos(uint nod_flag, uchar *after_key);
+extern void _mi_kpointer(MI_INFO *info, uchar *buff, my_off_t pos);
+extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag, uchar *after_key);
extern my_off_t _mi_rec_pos(MYISAM_SHARE *info, uchar *ptr);
-extern void _mi_dpointer(MI_INFO *info, uchar *buff,my_off_t pos);
-extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a,uchar *b,
- uint key_length,uint nextflag,uint *diff_length);
-extern uint _mi_get_static_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page,
- uchar *key);
-extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page,
- uchar *key);
+extern void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos);
+extern uint _mi_get_static_key(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar **page, uchar *key);
+extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, uchar **page,
+ uchar *key);
extern uint _mi_get_binary_pack_key(MI_KEYDEF *keyinfo, uint nod_flag,
- uchar **page_pos, uchar *key);
-extern uchar *_mi_get_last_key(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *keypos,
- uchar *lastkey,uchar *endpos,
- uint *return_key_length);
+ uchar ** page_pos, uchar *key);
+extern uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo,
+ uchar *keypos, uchar *lastkey, uchar *endpos,
+ uint *return_key_length);
extern uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
- uchar *key, uchar *keypos, uint *return_key_length);
-extern uint _mi_keylength(MI_KEYDEF *keyinfo,uchar *key);
+ uchar *key, uchar *keypos,
+ uint *return_key_length);
+extern uint _mi_keylength(MI_KEYDEF *keyinfo, uchar *key);
extern uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key,
- HA_KEYSEG *end);
-extern uchar *_mi_move_key(MI_KEYDEF *keyinfo,uchar *to,uchar *from);
-extern int _mi_search_next(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
- uint key_length,uint nextflag,my_off_t pos);
-extern int _mi_search_first(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos);
-extern int _mi_search_last(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos);
-extern uchar *_mi_fetch_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page,
- int level,uchar *buff,int return_buffer);
-extern int _mi_write_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page,
- int level, uchar *buff);
-extern int _mi_dispose(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos,
- int level);
-extern my_off_t _mi_new(MI_INFO *info,MI_KEYDEF *keyinfo,int level);
-extern uint _mi_make_key(MI_INFO *info,uint keynr,uchar *key,
- const uchar *record,my_off_t filepos);
-extern uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key,
+ HA_KEYSEG *end);
+extern uchar *_mi_move_key(MI_KEYDEF *keyinfo, uchar *to, uchar *from);
+extern int _mi_search_next(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uint key_length, uint nextflag, my_off_t pos);
+extern int _mi_search_first(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos);
+extern int _mi_search_last(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos);
+extern uchar *_mi_fetch_keypage(MI_INFO *info, MI_KEYDEF *keyinfo,
+ my_off_t page, int level, uchar *buff,
+ int return_buffer);
+extern int _mi_write_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page,
+ int level, uchar *buff);
+extern int _mi_dispose(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos,
+ int level);
+extern my_off_t _mi_new(MI_INFO *info, MI_KEYDEF *keyinfo, int level);
+extern uint _mi_make_key(MI_INFO *info, uint keynr, uchar *key,
+ const uchar *record, my_off_t filepos);
+extern uint _mi_pack_key(MI_INFO *info, uint keynr, uchar *key,
uchar *old, key_part_map keypart_map,
- HA_KEYSEG **last_used_keyseg);
-extern int _mi_read_key_record(MI_INFO *info,my_off_t filepos,uchar *buf);
-extern int _mi_read_cache(IO_CACHE *info,uchar *buff,my_off_t pos,
- uint length,int re_read_if_possibly);
-extern ulonglong retrieve_auto_increment(MI_INFO *info,const uchar *record);
+ HA_KEYSEG ** last_used_keyseg);
+extern int _mi_read_key_record(MI_INFO *info, my_off_t filepos, uchar *buf);
+extern int _mi_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos,
+ uint length, int re_read_if_possibly);
+extern ulonglong retrieve_auto_increment(MI_INFO *info, const uchar *record);
-extern uchar *mi_alloc_rec_buff(MI_INFO *,ulong, uchar**);
+extern uchar *mi_alloc_rec_buff(MI_INFO *, ulong, uchar **);
#define mi_get_rec_buff_ptr(info,buf) \
((((info)->s->options & HA_OPTION_PACK_RECORD) && (buf)) ? \
(buf) - MI_REC_BUFF_OFFSET : (buf))
#define mi_get_rec_buff_len(info,buf) \
(*((uint32 *)(mi_get_rec_buff_ptr(info,buf))))
-extern ulong _mi_rec_unpack(MI_INFO *info,uchar *to,uchar *from,
- ulong reclength);
+extern ulong _mi_rec_unpack(MI_INFO *info, uchar *to, uchar *from,
+ ulong reclength);
extern my_bool _mi_rec_check(MI_INFO *info,const uchar *record, uchar *packpos,
ulong packed_length, my_bool with_checkum);
-extern int _mi_write_part_record(MI_INFO *info,my_off_t filepos,ulong length,
- my_off_t next_filepos,uchar **record,
- ulong *reclength,int *flag);
-extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key,
- uint length);
-extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys);
-extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,uchar *buf);
-extern int _mi_read_rnd_pack_record(MI_INFO*, uchar *,my_off_t, my_bool);
+extern int _mi_write_part_record(MI_INFO *info, my_off_t filepos, ulong length,
+ my_off_t next_filepos, uchar ** record,
+ ulong *reclength, int *flag);
+extern void _mi_print_key(FILE *stream, HA_KEYSEG *keyseg, const uchar *key,
+ uint length);
+extern my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys);
+extern int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, uchar *buf);
+extern int _mi_read_rnd_pack_record(MI_INFO *, uchar *, my_off_t, my_bool);
extern int _mi_pack_rec_unpack(MI_INFO *info, MI_BIT_BUFF *bit_buff,
uchar *to, uchar *from, ulong reclength);
-extern ulonglong mi_safe_mul(ulonglong a,ulonglong b);
+extern ulonglong mi_safe_mul(ulonglong a, ulonglong b);
extern int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf,
- const uchar *oldrec, const uchar *newrec, my_off_t pos);
+ const uchar *oldrec, const uchar *newrec,
+ my_off_t pos);
struct st_sort_info;
-typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */
+typedef struct st_mi_block_info /* Parameter to _mi_get_block_info */
+{
uchar header[MI_BLOCK_INFO_HEADER_LENGTH];
ulong rec_len;
ulong data_len;
@@ -655,35 +601,37 @@ typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */
uint offset;
} MI_BLOCK_INFO;
- /* bits in return from _mi_get_block_info */
-
-#define BLOCK_FIRST 1
-#define BLOCK_LAST 2
-#define BLOCK_DELETED 4
-#define BLOCK_ERROR 8 /* Wrong data */
-#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
-#define BLOCK_FATAL_ERROR 32 /* hardware-error */
-
-#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
-#define MAXERR 20
-#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */
-#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
-#define INDEX_TMP_EXT ".TMM"
-#define DATA_TMP_EXT ".TMD"
-
-#define UPDATE_TIME 1
-#define UPDATE_STAT 2
-#define UPDATE_SORT 4
-#define UPDATE_AUTO_INC 8
-#define UPDATE_OPEN_COUNT 16
-
-#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE)
-#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD)
-#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
-#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
-
-enum myisam_log_commands {
- MI_LOG_OPEN,MI_LOG_WRITE,MI_LOG_UPDATE,MI_LOG_DELETE,MI_LOG_CLOSE,MI_LOG_EXTRA,MI_LOG_LOCK,MI_LOG_DELETE_ALL
+ /* bits in return from _mi_get_block_info */
+
+#define BLOCK_FIRST 1
+#define BLOCK_LAST 2
+#define BLOCK_DELETED 4
+#define BLOCK_ERROR 8 /* Wrong data */
+#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
+#define BLOCK_FATAL_ERROR 32 /* hardware-error */
+
+#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
+#define MAXERR 20
+#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */
+#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
+#define INDEX_TMP_EXT ".TMM"
+#define DATA_TMP_EXT ".TMD"
+
+#define UPDATE_TIME 1
+#define UPDATE_STAT 2
+#define UPDATE_SORT 4
+#define UPDATE_AUTO_INC 8
+#define UPDATE_OPEN_COUNT 16
+
+#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE)
+#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD)
+#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
+#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
+
+enum myisam_log_commands
+{
+ MI_LOG_OPEN, MI_LOG_WRITE, MI_LOG_UPDATE, MI_LOG_DELETE, MI_LOG_CLOSE,
+ MI_LOG_EXTRA, MI_LOG_LOCK, MI_LOG_DELETE_ALL
};
#define myisam_log(a,b,c,d) if (myisam_log_file >= 0) _myisam_log(a,b,c,d)
@@ -693,29 +641,27 @@ enum myisam_log_commands {
#define fast_mi_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _mi_writeinfo((INFO),0)
#define fast_mi_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _mi_readinfo((INFO),F_RDLCK,1)
-#ifdef __cplusplus
+#ifdef __cplusplus
extern "C" {
#endif
-
-extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t);
-extern uint _mi_rec_pack(MI_INFO *info,uchar *to,const uchar *from);
+ extern uint _mi_get_block_info(MI_BLOCK_INFO *, File, my_off_t);
+extern uint _mi_rec_pack(MI_INFO *info, uchar *to, const uchar *from);
extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
MI_BLOCK_INFO *info, uchar **rec_buff_p,
File file, my_off_t filepos);
-extern void _my_store_blob_length(uchar *pos,uint pack_length,uint length);
-extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info,
- const uchar *buffert,uint length);
+extern void _mi_store_blob_length(uchar *pos, uint pack_length, uint length);
+extern void _myisam_log(enum myisam_log_commands command, MI_INFO *info,
+ const uchar *buffert, uint length);
extern void _myisam_log_command(enum myisam_log_commands command,
- MI_INFO *info, const uchar *buffert,
- uint length, int result);
-extern void _myisam_log_record(enum myisam_log_commands command,MI_INFO *info,
- const uchar *record,my_off_t filepos,
- int result);
+ MI_INFO *info, const uchar *buffert,
+ uint length, int result);
+extern void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info,
+ const uchar *record, my_off_t filepos,
+ int result);
extern void mi_report_error(int errcode, const char *file_name);
extern my_bool _mi_memmap_file(MI_INFO *info);
extern void _mi_unmap_file(MI_INFO *info);
extern uint save_pack_length(uint version, uchar *block_buff, ulong length);
-extern uint read_pack_length(uint version, const uchar *buf, ulong *length);
extern uint calc_pack_length(uint version, ulong length);
extern size_t mi_mmap_pread(MI_INFO *info, uchar *Buffer,
size_t Count, my_off_t offset, myf MyFlags);
@@ -730,7 +676,7 @@ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite);
uchar *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state);
uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead);
uint mi_base_info_write(File file, MI_BASE_INFO *base);
-uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base);
+uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base);
int mi_keyseg_write(File file, const HA_KEYSEG *keyseg);
uchar *mi_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg);
uint mi_keydef_write(File file, MI_KEYDEF *keydef);
@@ -742,23 +688,23 @@ uchar *mi_recinfo_read(uchar *ptr, MI_COLUMNDEF *recinfo);
extern int mi_disable_indexes(MI_INFO *info);
extern int mi_enable_indexes(MI_INFO *info);
extern int mi_indexes_are_disabled(MI_INFO *info);
-ulong _my_calc_total_blob_length(MI_INFO *info, const uchar *record);
+ulong _mi_calc_total_blob_length(MI_INFO *info, const uchar *record);
ha_checksum mi_checksum(MI_INFO *info, const uchar *buf);
ha_checksum mi_static_checksum(MI_INFO *info, const uchar *buf);
my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, uchar *record,
- ha_checksum unique_hash, my_off_t pos);
+ ha_checksum unique_hash, my_off_t pos);
ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const uchar *buf);
int _mi_cmp_static_unique(MI_INFO *info, MI_UNIQUEDEF *def,
- const uchar *record, my_off_t pos);
+ const uchar *record, my_off_t pos);
int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def,
- const uchar *record, my_off_t pos);
+ const uchar *record, my_off_t pos);
int mi_unique_comp(MI_UNIQUEDEF *def, const uchar *a, const uchar *b,
- my_bool null_are_equal);
-void mi_get_status(void* param, int concurrent_insert);
-void mi_update_status(void* param);
-void mi_restore_status(void* param);
-void mi_copy_status(void* to,void *from);
-my_bool mi_check_status(void* param);
+ my_bool null_are_equal);
+void mi_get_status(void *param, int concurrent_insert);
+void mi_update_status(void *param);
+void mi_restore_status(void *param);
+void mi_copy_status(void *to, void *from);
+my_bool mi_check_status(void *param);
void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows);
extern MI_INFO *test_if_reopen(char *filename);
@@ -770,22 +716,14 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size);
void mi_remap_file(MI_INFO *info, my_off_t size);
/* Functions needed by mi_check */
-volatile int *killed_ptr(MI_CHECK *param);
-void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...));
-void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...));
-void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...));
-int flush_pending_blocks(MI_SORT_PARAM *param);
-int sort_ft_buf_flush(MI_SORT_PARAM *sort_param);
-int thr_write_keys(MI_SORT_PARAM *sort_param);
+volatile int *killed_ptr(HA_CHECK *param);
+void mi_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...));
+void mi_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...));
+void mi_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...));
#ifdef THREAD
pthread_handler_t thr_find_all_keys(void *arg);
#endif
-int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file);
-
-int sort_write_record(MI_SORT_PARAM *sort_param);
-int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong);
-
+int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file);
#ifdef __cplusplus
}
#endif
-
diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c
index 6566a7a7a02..04c0d9543d7 100644
--- a/storage/myisam/myisamlog.c
+++ b/storage/myisam/myisamlog.c
@@ -808,7 +808,7 @@ static int find_record_with_key(struct file_info *file_info, uchar *record)
{
uint key;
MI_INFO *info=file_info->isam;
- uchar tmp_key[MI_MAX_KEY_BUFF];
+ uchar tmp_key[HA_MAX_KEY_BUFF];
for (key=0 ; key < info->s->base.keys ; key++)
{
diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c
index 7233ebbff80..8deddcc9f1d 100644
--- a/storage/myisam/myisampack.c
+++ b/storage/myisam/myisampack.c
@@ -305,7 +305,7 @@ static void usage(void)
puts("and you are welcome to modify and redistribute it under the GPL license\n");
puts("Pack a MyISAM-table to take much less space.");
- puts("Keys are not updated, you must run myisamchk -rq on the datafile");
+ puts("Keys are not updated, you must run myisamchk -rq on the index (.MYI) file");
puts("afterwards to update the keys.");
puts("You should give the .MYI file as the filename argument.");
@@ -1008,7 +1008,7 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
/* Calculate pos, end_pos, and max_length for variable length fields. */
if (count->field_type == FIELD_BLOB)
{
- uint field_length=count->field_length -mi_portable_sizeof_char_ptr;
+ uint field_length=count->field_length -portable_sizeof_char_ptr;
ulong blob_length= _mi_calc_blob_length(field_length, start_pos);
memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*));
end_pos=pos+blob_length;
@@ -2650,7 +2650,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
case FIELD_BLOB:
{
ulong blob_length=_mi_calc_blob_length(field_length-
- mi_portable_sizeof_char_ptr,
+ portable_sizeof_char_ptr,
start_pos);
/* Empty blobs are encoded with a single 1 bit. */
if (!blob_length)
@@ -2667,7 +2667,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
blob_length, count->length_bits));
write_bits(blob_length,count->length_bits);
- memcpy_fixed(&blob,end_pos-mi_portable_sizeof_char_ptr,
+ memcpy_fixed(&blob,end_pos-portable_sizeof_char_ptr,
sizeof(char*));
blob_end=blob+blob_length;
/* Encode the blob bytes. */
diff --git a/storage/myisam/plug.in b/storage/myisam/plug.in
index 051ec2d54aa..e92b5e56d7f 100644
--- a/storage/myisam/plug.in
+++ b/storage/myisam/plug.in
@@ -1,7 +1,7 @@
-MYSQL_STORAGE_ENGINE(myisam,no, [MyISAM Storage Engine],
- [Traditional non-transactional MySQL tables])
-MYSQL_PLUGIN_DIRECTORY(myisam, [storage/myisam])
-MYSQL_PLUGIN_STATIC(myisam, [libmyisam.a])
-MYSQL_PLUGIN_MANDATORY(myisam) dnl Default
-MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(myisam, [ha_myisam.cc])
+dnl MYSQL_STORAGE_ENGINE(myisam,no, [MyISAM Storage Engine],
+dnl [Traditional non-transactional MySQL tables])
+dnl MYSQL_PLUGIN_DIRECTORY(myisam, [storage/myisam])
+dnl MYSQL_PLUGIN_STATIC(myisam, [libmyisam.a])
+dnl MYSQL_PLUGIN_MANDATORY(myisam) dnl Default
+dnl MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(myisam, [ha_myisam.cc])
diff --git a/storage/myisam/rt_index.c b/storage/myisam/rt_index.c
index 25f9d7c19e4..9db7fe88030 100644
--- a/storage/myisam/rt_index.c
+++ b/storage/myisam/rt_index.c
@@ -534,7 +534,7 @@ static int rtree_insert_req(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
DBUG_ENTER("rtree_insert_req");
if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length +
- MI_MAX_KEY_BUFF)))
+ HA_MAX_KEY_BUFF)))
{
my_errno = HA_ERR_OUT_OF_MEM;
DBUG_RETURN(-1); /* purecov: inspected */
@@ -650,7 +650,7 @@ static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key,
DBUG_PRINT("rtree", ("root was split, grow a new root"));
if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length +
- MI_MAX_KEY_BUFF)))
+ HA_MAX_KEY_BUFF)))
{
my_errno = HA_ERR_OUT_OF_MEM;
DBUG_RETURN(-1); /* purecov: inspected */
diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c
index d505d2633ce..b2507ad8886 100644
--- a/storage/myisam/sort.c
+++ b/storage/myisam/sort.c
@@ -15,7 +15,7 @@
/*
Creates a index for a database by reading keys, sorting them and outputing
- them in sorted order through SORT_INFO functions.
+ them in sorted order through MI_SORT_INFO functions.
*/
#include "fulltext.h"
@@ -487,8 +487,8 @@ ok:
int thr_write_keys(MI_SORT_PARAM *sort_param)
{
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
ulong length, keys;
ulong *rec_per_key_part=param->rec_per_key_part;
int got_error=sort_info->got_error;
@@ -920,7 +920,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
{
count+= buffpek->count;
- buffpek->base= strpos;
+ buffpek->base= (uchar*) strpos;
buffpek->max_keys=maxcount;
strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek,
sort_length));
@@ -958,7 +958,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
{
if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length)))
{
- uchar *base=buffpek->base;
+ uchar *base= buffpek->base;
uint max_keys=buffpek->max_keys;
VOID(queue_remove(&queue,0));
@@ -990,7 +990,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
}
}
buffpek=(BUFFPEK*) queue_top(&queue);
- buffpek->base=(uchar *) sort_keys;
+ buffpek->base= (uchar*) sort_keys;
buffpek->max_keys=keys;
do
{
@@ -1005,7 +1005,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
else
{
register uchar *end;
- strpos= buffpek->key;
+ strpos= (uchar*) buffpek->key;
for (end=strpos+buffpek->mem_count*sort_length;
strpos != end ;
strpos+=sort_length)
diff --git a/storage/myisam/sp_test.c b/storage/myisam/sp_test.c
index dee32ba423e..f572c7ab19b 100644
--- a/storage/myisam/sp_test.c
+++ b/storage/myisam/sp_test.c
@@ -79,7 +79,7 @@ int run_test(const char *filename)
/* Define spatial column */
recinfo[1].type=FIELD_BLOB;
- recinfo[1].length=4 + mi_portable_sizeof_char_ptr;
+ recinfo[1].length=4 + portable_sizeof_char_ptr;
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index 3fccb91e9b5..30c9e6f4943 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -134,9 +134,11 @@ static const char *ha_myisammrg_exts[] = {
};
extern int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
MI_COLUMNDEF **recinfo_out, uint *records_out);
-extern int check_definition(MI_KEYDEF *t1_keyinfo, MI_COLUMNDEF *t1_recinfo,
+extern int check_definition(MI_KEYDEF *t1_keyinfo,
+ MI_COLUMNDEF *t1_recinfo,
uint t1_keys, uint t1_recs,
- MI_KEYDEF *t2_keyinfo, MI_COLUMNDEF *t2_recinfo,
+ MI_KEYDEF *t2_keyinfo,
+ MI_COLUMNDEF *t2_recinfo,
uint t2_keys, uint t2_recs, bool strict);
static void split_file_name(const char *file_name,
LEX_STRING *db, LEX_STRING *name);
@@ -877,7 +879,8 @@ int ha_myisammrg::extra(enum ha_extra_function operation)
/* As this is just a mapping, we don't have to force the underlying
tables to be closed */
if (operation == HA_EXTRA_FORCE_REOPEN ||
- operation == HA_EXTRA_PREPARE_FOR_DELETE)
+ operation == HA_EXTRA_PREPARE_FOR_DROP ||
+ operation == HA_EXTRA_PREPARE_FOR_RENAME)
return 0;
return myrg_extra(file,operation,0);
}
diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h
index 977c45d1435..7516b8193c7 100644
--- a/storage/myisammrg/ha_myisammrg.h
+++ b/storage/myisammrg/ha_myisammrg.h
@@ -51,8 +51,8 @@ class ha_myisammrg: public handler
HA_READ_ORDER | HA_KEYREAD_ONLY);
}
uint max_supported_keys() const { return MI_MAX_KEY; }
- uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; }
- uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; }
+ uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
+ uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
double scan_time()
{ return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; }
diff --git a/strings/llstr.c b/strings/llstr.c
index 643cf36a311..678f8b05f39 100644
--- a/strings/llstr.c
+++ b/strings/llstr.c
@@ -38,3 +38,4 @@ char *ullstr(longlong value,char *buff)
longlong10_to_str(value,buff,10);
return buff;
}
+
diff --git a/support-files/compiler_warnings.supp b/support-files/compiler_warnings.supp
index c437748c770..703f6c2a272 100644
--- a/support-files/compiler_warnings.supp
+++ b/support-files/compiler_warnings.supp
@@ -70,6 +70,11 @@ db_vrfy.c : .*comparison is always false due to limited range of data type.*
.*/ndb/.* : .*defined but not used.*
#
+# Maria warning that is ok in debug builds
+#
+storage/maria/ma_pagecache.c: .*'info_check_pin' defined but not used
+
+#
# Unexplanable (?) stuff
#
listener.cc : .*conversion from 'SOCKET' to 'int'.*
diff --git a/support-files/magic b/support-files/magic
index 9844142ba93..b3f3b3ea29d 100644
--- a/support-files/magic
+++ b/support-files/magic
@@ -4,12 +4,23 @@
#
0 beshort 0xfe01 MySQL table definition file
>2 byte x Version %d
-0 belong&0xffffff00 0xfefe0300 MySQL MISAM index file
+0 belong&0xffffff00 0xfefe0700 MySQL MyISAM index file
>3 byte x Version %d
-0 belong&0xffffff00 0xfefe0700 MySQL MISAM compressed data file
+0 belong&0xffffff00 0xfefe0800 MySQL MyISAM compressed data file
+>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0900 MySQL Maria index file
+>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0A00 MySQL Maria compressed data file
>3 byte x Version %d
0 belong&0xffffff00 0xfefe0500 MySQL ISAM index file
>3 byte x Version %d
0 belong&0xffffff00 0xfefe0600 MySQL ISAM compressed data file
>3 byte x Version %d
0 string \376bin MySQL replication log
+0 belong&0xffffff00 0xfefe0b00
+>4 string MARIALOG MySQL Maria transaction log file
+>>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0c00
+>4 string MACF MySQL Maria control file
+>>3 byte x Version %d
+
diff --git a/unittest/Makefile.am b/unittest/Makefile.am
index 65fa615fb98..8684fd3fc7d 100644
--- a/unittest/Makefile.am
+++ b/unittest/Makefile.am
@@ -13,7 +13,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SUBDIRS = mytap . mysys examples
+SUBDIRS = mytap mysys examples
EXTRA_DIST = unit.pl
CLEANFILES = unit
diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am
index be91ef31c9d..36ee285201e 100644
--- a/unittest/mysys/Makefile.am
+++ b/unittest/mysys/Makefile.am
@@ -13,15 +13,15 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include
-AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+
+noinst_PROGRAMS = bitmap-t base64-t my_atomic-t
LDADD = $(top_builddir)/unittest/mytap/libmytap.a \
$(top_builddir)/mysys/libmysys.a \
$(top_builddir)/dbug/libdbug.a \
$(top_builddir)/strings/libmystrings.a
-noinst_PROGRAMS = bitmap-t base64-t my_atomic-t
-
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c
index f2bcd360508..a24cbae272a 100644
--- a/unittest/mysys/my_atomic-t.c
+++ b/unittest/mysys/my_atomic-t.c
@@ -17,6 +17,7 @@
#include <my_sys.h>
#include <my_atomic.h>
#include <tap.h>
+#include <lf.h>
/* at least gcc 3.4.5 and 3.4.6 (but not 3.2.3) on RHEL */
#if __GNUC__ == 3 && __GNUC_MINOR__ == 4
@@ -25,22 +26,24 @@
#define GCC_BUG_WORKAROUND
#endif
-int32 a32,b32,c32;
+volatile uint32 a32,b32;
+volatile int32 c32, N;
my_atomic_rwlock_t rwl;
+LF_ALLOCATOR lf_allocator;
+LF_HASH lf_hash;
-pthread_attr_t thr_attr;
-pthread_mutex_t mutex;
-pthread_cond_t cond;
-int N;
+pthread_attr_t attr;
+size_t stacksize= 0;
+#define STACK_SIZE (((int)stacksize-2048)*STACK_DIRECTION)
/* add and sub a random number in a loop. Must get 0 at the end */
pthread_handler_t test_atomic_add_handler(void *arg)
{
- int m=*(int *)arg;
+ int m= (*(int *)arg)/2;
GCC_BUG_WORKAROUND int32 x;
- for (x=((int)((long)(&m))); m ; m--)
+ for (x= ((int)(intptr)(&m)); m ; m--)
{
- x=x*m+0x87654321;
+ x= (x*m+0x87654321) & INT_MAX32;
my_atomic_rwlock_wrlock(&rwl);
my_atomic_add32(&a32, x);
my_atomic_rwlock_wrunlock(&rwl);
@@ -49,10 +52,6 @@ pthread_handler_t test_atomic_add_handler(void *arg)
my_atomic_add32(&a32, -x);
my_atomic_rwlock_wrunlock(&rwl);
}
- pthread_mutex_lock(&mutex);
- N--;
- if (!N) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
return 0;
}
@@ -64,30 +63,24 @@ pthread_handler_t test_atomic_add_handler(void *arg)
5. subtract result from a32
must get 0 in a32 at the end
*/
-pthread_handler_t test_atomic_swap_handler(void *arg)
+pthread_handler_t test_atomic_fas_handler(void *arg)
{
- int m=*(int *)arg;
- int32 x;
+ int m= *(int *)arg;
+ uint32 x= my_atomic_add32(&b32, 1);
- my_atomic_rwlock_wrlock(&rwl);
- x=my_atomic_add32(&b32, 1);
- my_atomic_rwlock_wrunlock(&rwl);
-
- my_atomic_rwlock_wrlock(&rwl);
my_atomic_add32(&a32, x);
- my_atomic_rwlock_wrunlock(&rwl);
for (; m ; m--)
{
my_atomic_rwlock_wrlock(&rwl);
- x=my_atomic_swap32(&c32, x);
+ x= my_atomic_fas32(&c32, x);
my_atomic_rwlock_wrunlock(&rwl);
}
if (!x)
{
my_atomic_rwlock_wrlock(&rwl);
- x=my_atomic_swap32(&c32, x);
+ x= my_atomic_fas32(&c32, x);
my_atomic_rwlock_wrunlock(&rwl);
}
@@ -95,111 +88,225 @@ pthread_handler_t test_atomic_swap_handler(void *arg)
my_atomic_add32(&a32, -x);
my_atomic_rwlock_wrunlock(&rwl);
- pthread_mutex_lock(&mutex);
- N--;
- if (!N) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
return 0;
}
/*
same as test_atomic_add_handler, but my_atomic_add32 is emulated with
- (slower) my_atomic_cas32
+ my_atomic_cas32 - notice that the slowdown is proportional to the
+ number of CPUs
*/
pthread_handler_t test_atomic_cas_handler(void *arg)
{
- int m=*(int *)arg, ok;
- GCC_BUG_WORKAROUND int32 x,y;
- for (x=((int)((long)(&m))); m ; m--)
+ int m= (*(int *)arg)/2, ok= 0;
+ GCC_BUG_WORKAROUND int32 x, y;
+ for (x= ((int)(intptr)(&m)); m ; m--)
{
my_atomic_rwlock_wrlock(&rwl);
- y=my_atomic_load32(&a32);
+ y= my_atomic_load32(&a32);
my_atomic_rwlock_wrunlock(&rwl);
-
- x=x*m+0x87654321;
+ x= (x*m+0x87654321) & INT_MAX32;
do {
my_atomic_rwlock_wrlock(&rwl);
- ok=my_atomic_cas32(&a32, &y, y+x);
+ ok= my_atomic_cas32(&a32, &y, (uint32)y+x);
my_atomic_rwlock_wrunlock(&rwl);
- } while (!ok);
+ } while (!ok) ;
do {
my_atomic_rwlock_wrlock(&rwl);
- ok=my_atomic_cas32(&a32, &y, y-x);
+ ok= my_atomic_cas32(&a32, &y, y-x);
my_atomic_rwlock_wrunlock(&rwl);
- } while (!ok);
+ } while (!ok) ;
}
- pthread_mutex_lock(&mutex);
- N--;
- if (!N) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
+ return 0;
+}
+
+/*
+ pin allocator - alloc and release an element in a loop
+*/
+pthread_handler_t test_lf_pinbox(void *arg)
+{
+ int m= *(int *)arg;
+ int32 x= 0;
+ LF_PINS *pins;
+
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ lf_pinbox_put_pins(pins);
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE);
+ }
+ lf_pinbox_put_pins(pins);
+ return 0;
+}
+
+typedef union {
+ int32 data;
+ void *not_used;
+} TLA;
+
+pthread_handler_t test_lf_alloc(void *arg)
+{
+ int m= (*(int *)arg)/2;
+ int32 x,y= 0;
+ LF_PINS *pins;
+
+ pins= lf_alloc_get_pins(&lf_allocator, &m + STACK_SIZE);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ TLA *node1, *node2;
+ x= (x*m+0x87654321) & INT_MAX32;
+ node1= (TLA *)lf_alloc_new(pins);
+ node1->data= x;
+ y+= node1->data;
+ node1->data= 0;
+ node2= (TLA *)lf_alloc_new(pins);
+ node2->data= x;
+ y-= node2->data;
+ node2->data= 0;
+ lf_alloc_free(pins, node1);
+ lf_alloc_free(pins, node2);
+ }
+ lf_alloc_put_pins(pins);
+ my_atomic_rwlock_wrlock(&rwl);
+ my_atomic_add32(&a32, y);
+
+ if (my_atomic_add32(&N, -1) == 1)
+ {
+ diag("%d mallocs, %d pins in stack",
+ lf_allocator.mallocs, lf_allocator.pinbox.pins_in_array);
+#ifdef MY_LF_EXTRA_DEBUG
+ a32|= lf_allocator.mallocs - lf_alloc_pool_count(&lf_allocator);
+#endif
+ }
+ my_atomic_rwlock_wrunlock(&rwl);
+ return 0;
+}
+
+#define N_TLH 1000
+pthread_handler_t test_lf_hash(void *arg)
+{
+ int m= (*(int *)arg)/(2*N_TLH);
+ int32 x,y,z,sum= 0, ins= 0;
+ LF_PINS *pins;
+
+ pins= lf_hash_get_pins(&lf_hash, &m + STACK_SIZE);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ int i;
+ y= x;
+ for (i= 0; i < N_TLH; i++)
+ {
+ x= (x*(m+i)+0x87654321) & INT_MAX32;
+ z= (x<0) ? -x : x;
+ if (lf_hash_insert(&lf_hash, pins, &z))
+ {
+ sum+= z;
+ ins++;
+ }
+ }
+ for (i= 0; i < N_TLH; i++)
+ {
+ y= (y*(m+i)+0x87654321) & INT_MAX32;
+ z= (y<0) ? -y : y;
+ if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z)))
+ sum-= z;
+ }
+ }
+ lf_hash_put_pins(pins);
+ my_atomic_rwlock_wrlock(&rwl);
+ my_atomic_add32(&a32, sum);
+ my_atomic_add32(&b32, ins);
+
+ if (my_atomic_add32(&N, -1) == 1)
+ {
+ diag("%d mallocs, %d pins in stack, %d hash size, %d inserts",
+ lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_array,
+ lf_hash.size, b32);
+ a32|= lf_hash.count;
+ }
+ my_atomic_rwlock_wrunlock(&rwl);
return 0;
}
void test_atomic(const char *test, pthread_handler handler, int n, int m)
{
- pthread_t t;
- ulonglong now=my_getsystime();
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
a32= 0;
b32= 0;
c32= 0;
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
diag("Testing %s with %d threads, %d iterations... ", test, n, m);
- for (N=n ; n ; n--)
+ N= n;
+ for (i= 0 ; i < n ; i++)
{
- if (pthread_create(&t, &thr_attr, handler, &m) != 0)
+ if (pthread_create(threads+i, 0, handler, &m) != 0)
{
diag("Could not create thread");
- a32= 1;
- goto err;
+ abort();
}
}
-
- pthread_mutex_lock(&mutex);
- while (N)
- pthread_cond_wait(&cond, &mutex);
- pthread_mutex_unlock(&mutex);
- now=my_getsystime()-now;
-err:
- ok(a32 == 0, "tested %s in %g secs", test, ((double)now)/1e7);
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32);
+ my_free((void *)threads, MYF(0));
}
+
int main()
{
int err;
MY_INIT("my_atomic-t.c");
- diag("N CPUs: %d", my_getncpus());
+ diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE);
err= my_atomic_initialize();
- plan(4);
+ plan(7);
ok(err == 0, "my_atomic_initialize() returned %d", err);
- pthread_attr_init(&thr_attr);
- pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
- pthread_mutex_init(&mutex, 0);
- pthread_cond_init(&cond, 0);
my_atomic_rwlock_init(&rwl);
+ lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used));
+ lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0,
+ &my_charset_bin);
-#ifdef HPUX11
-#define CYCLES 1000
+ pthread_attr_init(&attr);
+#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE
+ pthread_attr_getstacksize(&attr, &stacksize);
+ if (stacksize == 0)
+#endif
+ stacksize= PTHREAD_STACK_MIN;
+
+#ifdef MY_ATOMIC_MODE_RWLOCKS
+#define CYCLES 3000
#else
-#define CYCLES 10000
+#define CYCLES 300000
#endif
#define THREADS 100
- test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS, CYCLES);
- test_atomic("my_atomic_swap32", test_atomic_swap_handler, THREADS, CYCLES);
- test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS, CYCLES);
- /*
- workaround until we know why it crashes randomly on some machine
- (BUG#22320).
- */
- sleep(2);
-
- pthread_mutex_destroy(&mutex);
- pthread_cond_destroy(&cond);
- pthread_attr_destroy(&thr_attr);
+
+ test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES);
+ test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES);
+ test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES);
+ test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES);
+ test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES);
+ test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES/10);
+
+ lf_hash_destroy(&lf_hash);
+ lf_alloc_destroy(&lf_allocator);
my_atomic_rwlock_destroy(&rwl);
+ my_end(0);
return exit_status();
}
diff --git a/unittest/mytap/tap.c b/unittest/mytap/tap.c
index 4e053e3e745..f9396adbd69 100644
--- a/unittest/mytap/tap.c
+++ b/unittest/mytap/tap.c
@@ -166,9 +166,17 @@ static signal_entry install_signal[]= {
#endif
};
+int skip_big_tests= 0;
+
void
plan(int const count)
{
+ char *config= getenv("MYTAP_CONFIG");
+
+ if (config)
+ skip_big_tests= strcmp(config, "big");
+
+ setvbuf(tapout, 0, _IONBF, 0); /* provide output at once */
/*
Install signal handler
*/
diff --git a/unittest/mytap/tap.h b/unittest/mytap/tap.h
index 31ec47d1ef2..f92fad1101f 100644
--- a/unittest/mytap/tap.h
+++ b/unittest/mytap/tap.h
@@ -62,6 +62,24 @@ extern "C" {
#endif
/**
+ Defines whether "big" tests should be skipped.
+
+ This variable is set by plan() function unless MYTAP_CONFIG environment
+ variable is set to the string "big". It is supposed to be used as
+
+ @code
+ if (skip_big_tests) {
+ skip(1, "Big test skipped");
+ } else {
+ ok(life_universe_and_everything() == 42, "The answer is CORRECT");
+ }
+ @endcode
+
+ @see SKIP_BIG_TESTS
+*/
+extern int skip_big_tests;
+
+/**
@defgroup MyTAP_API MyTAP API
MySQL support for performing unit tests according to TAP.
@@ -81,7 +99,12 @@ extern "C" {
that generate a core, so if you want to override these signals, do
it <em>after</em> you have called the plan() function.
- @param count The planned number of tests to run.
+ It will also set skip_big_tests variable if MYTAP_CONFIG environment
+ variable is defined.
+
+ @see skip_big_tests
+
+ @param count The planned number of tests to run.
*/
void plan(int count);
@@ -161,6 +184,24 @@ void skip(int how_many, char const *reason, ...)
/**
+ Helper macro to skip a group of "big" tests. It is used in the following
+ manner:
+
+ @code
+ SKIP_BIG_TESTS(1)
+ {
+ ok(life_universe_and_everything() == 42, "The answer is CORRECT");
+ }
+ @endcode
+
+ @see skip_big_tests
+ */
+
+#define SKIP_BIG_TESTS(COUNT) \
+ if (skip_big_tests) skip((COUNT), "big test"); else
+
+
+/**
Print a diagnostics message.
@param fmt Diagnostics message in printf() format.
diff --git a/unittest/unit.pl b/unittest/unit.pl
index 9d328985012..b83132581f9 100644
--- a/unittest/unit.pl
+++ b/unittest/unit.pl
@@ -14,8 +14,9 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-use Test::Harness qw(&runtests $verbose);
+use Test::Harness;
use File::Find;
+use Getopt::Long;
use strict;
@@ -35,6 +36,15 @@ unit - Run unit tests in directory
=cut
+my $big=1;
+
+my $result = GetOptions (
+ "big!" => \$big,
+ "verbose!" => \$Test::Harness::verbose,
+);
+
+$ENV{'MYTAP_CONFIG'} = $big ? "big" : "";
+
my $cmd = shift;
if (defined $cmd && exists $dispatch{$cmd}) {