summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2017-10-18 14:11:55 +0400
committerAlexander Barkov <bar@mariadb.org>2017-10-18 14:11:55 +0400
commit30e7d6709f7fb0f70a07c80a1a06614ca23da5f4 (patch)
tree9ba11fd32d82ac5c1364a537532ac896c4dd2058
parent75aabd03d57f85d63d57b25a239b4f930a3ae3c0 (diff)
parent3bc094d32a360b7d51600cf11bc4ce24117ecb78 (diff)
downloadmariadb-git-30e7d6709f7fb0f70a07c80a1a06614ca23da5f4.tar.gz
Merge remote-tracking branch 'origin/10.2' into bb-10.2-ext
-rw-r--r--extra/mariabackup/backup_copy.cc3
-rw-r--r--extra/mariabackup/write_filt.cc2
-rw-r--r--extra/mariabackup/xtrabackup.cc35
-rw-r--r--mysql-test/lib/My/SafeProcess.pm7
-rw-r--r--mysql-test/lib/My/SafeProcess/CMakeLists.txt1
-rw-r--r--mysql-test/lib/My/SafeProcess/safe_kill_win.cc82
-rw-r--r--mysql-test/r/cte_nonrecursive.result36
-rw-r--r--mysql-test/r/cte_recursive.result31
-rw-r--r--mysql-test/r/func_json.result20
-rw-r--r--mysql-test/r/gis-precise.result19
-rw-r--r--mysql-test/r/gis2.result24
-rw-r--r--mysql-test/r/information_schema.result32
-rw-r--r--mysql-test/r/type_float.result26
-rw-r--r--mysql-test/suite/innodb/disabled.def9
-rw-r--r--mysql-test/suite/innodb/r/innodb-alter.result30
-rw-r--r--mysql-test/suite/innodb/r/innodb-online-alter-gis.result10
-rw-r--r--mysql-test/suite/innodb/r/innodb_defrag_concurrent.result17
-rw-r--r--mysql-test/suite/innodb/r/log_file_size.result8
-rw-r--r--mysql-test/suite/innodb/t/innodb-alter.test28
-rw-r--r--mysql-test/suite/innodb/t/innodb-online-alter-gis.test10
-rw-r--r--mysql-test/suite/innodb/t/innodb_defrag_concurrent.test28
-rw-r--r--mysql-test/suite/innodb/t/log_file_size.test25
-rw-r--r--mysql-test/suite/innodb_gis/r/alter_spatial_index.result16
-rw-r--r--mysql-test/suite/innodb_gis/t/alter_spatial_index.test20
-rw-r--r--mysql-test/suite/mariabackup/xb_file_key_management.result1
-rw-r--r--mysql-test/suite/mariabackup/xb_file_key_management.test1
-rw-r--r--mysql-test/t/cte_nonrecursive.test19
-rw-r--r--mysql-test/t/func_json.test16
-rw-r--r--mysql-test/t/gis-precise.test19
-rw-r--r--mysql-test/t/gis2.test28
-rw-r--r--mysql-test/t/information_schema.test26
-rw-r--r--mysql-test/t/type_float.test15
-rw-r--r--plugin/aws_key_management/CMakeLists.txt2
-rw-r--r--sql/event_data_objects.cc30
-rw-r--r--sql/events.cc18
-rw-r--r--sql/field.cc13
-rw-r--r--sql/field.h4
-rw-r--r--sql/item.h2
-rw-r--r--sql/item_geofunc.cc12
-rw-r--r--sql/item_jsonfunc.cc22
-rw-r--r--sql/item_sum.cc2
-rw-r--r--sql/item_xmlfunc.cc24
-rw-r--r--sql/log_event.cc20
-rw-r--r--sql/mysqld.cc4
-rw-r--r--sql/opt_range.cc40
-rw-r--r--sql/opt_range_mrr.cc12
-rw-r--r--sql/opt_sum.cc4
-rw-r--r--sql/opt_table_elimination.cc8
-rw-r--r--sql/parse_file.cc2
-rw-r--r--sql/partition_info.cc2
-rw-r--r--sql/rpl_record.cc6
-rw-r--r--sql/rpl_record_old.cc6
-rw-r--r--sql/share/errmsg-utf8.txt38
-rw-r--r--sql/spatial.cc2
-rw-r--r--sql/sql_acl.cc2
-rw-r--r--sql/sql_base.cc2
-rw-r--r--sql/sql_cache.cc4
-rw-r--r--sql/sql_cte.cc10
-rw-r--r--sql/sql_db.cc4
-rw-r--r--sql/sql_join_cache.cc6
-rw-r--r--sql/sql_lex.cc27
-rw-r--r--sql/sql_lex.h1
-rw-r--r--sql/sql_parse.cc7
-rw-r--r--sql/sql_plugin.cc20
-rw-r--r--sql/sql_repl.cc6
-rw-r--r--sql/sql_select.cc52
-rw-r--r--sql/sql_show.cc20
-rw-r--r--sql/sql_statistics.cc20
-rw-r--r--sql/sql_table.cc18
-rw-r--r--sql/sql_test.cc2
-rw-r--r--sql/sql_time.cc2
-rw-r--r--sql/sql_trigger.cc5
-rw-r--r--sql/sql_update.cc2
-rw-r--r--sql/sql_view.cc6
-rw-r--r--sql/sql_yacc.yy4
-rw-r--r--sql/strfunc.cc2
-rw-r--r--sql/sys_vars.cc2
-rw-r--r--sql/table.cc6
-rw-r--r--sql/unireg.cc8
-rw-r--r--storage/connect/csort.cpp16
-rw-r--r--storage/connect/domdoc.cpp3
-rw-r--r--storage/connect/filamap.cpp12
-rw-r--r--storage/connect/filamgz.cpp8
-rw-r--r--storage/connect/filamtxt.cpp9
-rw-r--r--storage/connect/filamzip.cpp4
-rw-r--r--storage/connect/fmdlex.c8
-rw-r--r--storage/connect/macutil.cpp4
-rw-r--r--storage/connect/myconn.cpp2
-rw-r--r--storage/connect/odbconn.cpp2
-rw-r--r--storage/connect/plgdbutl.cpp2
-rw-r--r--storage/connect/tabfmt.cpp6
-rw-r--r--storage/connect/tabmac.cpp4
-rw-r--r--storage/connect/value.cpp4
-rw-r--r--storage/connect/xobject.cpp4
-rw-r--r--storage/innobase/btr/btr0btr.cc98
-rw-r--r--storage/innobase/btr/btr0bulk.cc3
-rw-r--r--storage/innobase/btr/btr0cur.cc1
-rw-r--r--storage/innobase/btr/btr0defragment.cc11
-rw-r--r--storage/innobase/buf/buf0flu.cc2
-rw-r--r--storage/innobase/dict/dict0dict.cc3
-rw-r--r--storage/innobase/dict/dict0mem.cc61
-rw-r--r--storage/innobase/fil/fil0fil.cc226
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc3
-rw-r--r--storage/innobase/fts/fts0que.cc39
-rw-r--r--storage/innobase/handler/ha_innodb.cc60
-rw-r--r--storage/innobase/handler/handler0alter.cc18
-rw-r--r--storage/innobase/include/btr0cur.h1
-rw-r--r--storage/innobase/include/dict0dict.h8
-rw-r--r--storage/innobase/include/dict0dict.ic16
-rw-r--r--storage/innobase/include/fts0fts.h4
-rw-r--r--storage/innobase/include/os0file.h28
-rw-r--r--storage/innobase/include/row0mysql.h3
-rw-r--r--storage/innobase/include/srv0srv.h6
-rw-r--r--storage/innobase/lock/lock0lock.cc14
-rw-r--r--storage/innobase/log/log0recv.cc12
-rw-r--r--storage/innobase/os/os0file.cc106
-rw-r--r--storage/innobase/row/row0ins.cc4
-rw-r--r--storage/innobase/row/row0log.cc2
-rw-r--r--storage/innobase/row/row0upd.cc3
-rw-r--r--storage/innobase/srv/srv0start.cc63
-rw-r--r--storage/innobase/sync/sync0rw.cc13
-rw-r--r--storage/innobase/trx/trx0trx.cc1
-rw-r--r--storage/rocksdb/CMakeLists.txt23
-rw-r--r--storage/rocksdb/build_rocksdb.cmake29
-rw-r--r--storage/rocksdb/ha_rocksdb.cc829
-rw-r--r--storage/rocksdb/ha_rocksdb.h72
-rw-r--r--storage/rocksdb/ha_rocksdb_proto.h1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc174
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc141
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/my.cnf3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result103
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result73
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result1009
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result490
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table.result3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_primary.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/perf_context.result3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_engine.result33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result709
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result510
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result389
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/write_sync.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/suite.pm4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test137
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test153
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/disabled.def9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test120
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index.inc34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_primary.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/perf_context.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/show_engine.test13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test780
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test500
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test87
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test300
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/write_sync.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test21
-rw-r--r--storage/rocksdb/rdb_buff.h16
-rw-r--r--storage/rocksdb/rdb_compact_filter.h8
-rw-r--r--storage/rocksdb/rdb_datadic.cc375
-rw-r--r--storage/rocksdb/rdb_datadic.h67
-rw-r--r--storage/rocksdb/rdb_i_s.cc16
-rw-r--r--storage/rocksdb/rdb_index_merge.cc186
-rw-r--r--storage/rocksdb/rdb_index_merge.h70
-rw-r--r--storage/rocksdb/rdb_mariadb_server_port.h3
-rw-r--r--storage/rocksdb/rdb_perf_context.cc22
-rw-r--r--storage/rocksdb/rdb_perf_context.h16
-rw-r--r--storage/rocksdb/rdb_utils.cc26
m---------storage/rocksdb/rocksdb0
-rw-r--r--storage/xtradb/buf/buf0flu.cc3
-rw-r--r--storage/xtradb/fil/fil0fil.cc162
-rw-r--r--storage/xtradb/os/os0file.cc21
-rw-r--r--strings/json_lib.c2
-rw-r--r--unittest/mysys/ma_dyncol-t.c2
-rw-r--r--unittest/mysys/my_getopt-t.c2
243 files changed, 10217 insertions, 1290 deletions
diff --git a/extra/mariabackup/backup_copy.cc b/extra/mariabackup/backup_copy.cc
index 19cb768cd01..dfe482963a6 100644
--- a/extra/mariabackup/backup_copy.cc
+++ b/extra/mariabackup/backup_copy.cc
@@ -1707,7 +1707,8 @@ copy_back()
if it exists. */
ds_data = ds_create(dst_dir, DS_TYPE_LOCAL);
- if (!file_exists("ib_logfile0")) {
+ MY_STAT stat_arg;
+ if (!my_stat("ib_logfile0", &stat_arg, MYF(0)) || !stat_arg.st_size) {
/* After completed --prepare, redo log files are redundant.
We must delete any redo logs at the destination, so that
the database will not jump to a different log sequence number
diff --git a/extra/mariabackup/write_filt.cc b/extra/mariabackup/write_filt.cc
index a0633818405..76b66fa9953 100644
--- a/extra/mariabackup/write_filt.cc
+++ b/extra/mariabackup/write_filt.cc
@@ -79,7 +79,7 @@ wf_incremental_init(xb_write_filt_ctxt_t *ctxt, char *dst_name,
cp->delta_buf_base = static_cast<byte *>(malloc(buf_size));
memset(cp->delta_buf_base, 0, buf_size);
cp->delta_buf = static_cast<byte *>
- (ut_align(cp->delta_buf_base, UNIV_PAGE_SIZE_MAX));
+ (ut_align(cp->delta_buf_base, cursor->page_size.physical()));
/* write delta meta info */
snprintf(meta_name, sizeof(meta_name), "%s%s", dst_name,
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc
index f4c0af45cd7..6bc506f14d0 100644
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@@ -2460,7 +2460,7 @@ static os_thread_ret_t log_copying_thread(void*)
log_copying_running = false;
my_thread_end();
- os_thread_exit(NULL);
+ os_thread_exit();
return(0);
}
@@ -2483,7 +2483,7 @@ static os_thread_ret_t io_watching_thread(void*)
io_watching_thread_running = false;
- os_thread_exit(NULL);
+ os_thread_exit();
return(0);
}
@@ -2523,7 +2523,7 @@ data_copy_thread_func(
pthread_mutex_unlock(&ctxt->count_mutex);
my_thread_end();
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
@@ -4022,8 +4022,7 @@ xb_space_create_file(
}
ret = os_file_set_size(path, *file,
- FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE,
- false);
+ FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE);
if (!ret) {
msg("xtrabackup: cannot set size for file %s\n", path);
os_file_close(*file);
@@ -4414,13 +4413,20 @@ xtrabackup_apply_delta(
if (offset_on_page == 0xFFFFFFFFUL)
break;
+ uchar *buf = incremental_buffer + page_in_buffer * page_size;
+ const os_offset_t off = os_offset_t(offset_on_page)*page_size;
+
+ if (off == 0) {
+ /* Read tablespace size from page 0,
+ and extend the file to specified size.*/
+ os_offset_t n_pages = mach_read_from_4(buf + FSP_HEADER_OFFSET + FSP_SIZE);
+ success = os_file_set_size(dst_path, dst_file, n_pages*page_size);
+ if (!success)
+ goto error;
+ }
+
success = os_file_write(IORequestWrite,
- dst_path, dst_file,
- incremental_buffer +
- page_in_buffer * page_size,
- (offset_on_page <<
- page_size_shift),
- page_size);
+ dst_path, dst_file, buf, off, page_size);
if (!success) {
goto error;
}
@@ -4430,8 +4436,10 @@ xtrabackup_apply_delta(
}
free(incremental_buffer_base);
- if (src_file != OS_FILE_CLOSED)
+ if (src_file != OS_FILE_CLOSED) {
os_file_close(src_file);
+ os_file_delete(0,src_path);
+ }
if (dst_file != OS_FILE_CLOSED)
os_file_close(dst_file);
return TRUE;
@@ -4785,7 +4793,8 @@ xtrabackup_prepare_func(char** argv)
if (!ok) goto error_cleanup;
}
- srv_operation = SRV_OPERATION_RESTORE;
+ srv_operation = xtrabackup_export
+ ? SRV_OPERATION_RESTORE_EXPORT : SRV_OPERATION_RESTORE;
if (innodb_init_param()) {
goto error_cleanup;
diff --git a/mysql-test/lib/My/SafeProcess.pm b/mysql-test/lib/My/SafeProcess.pm
index f3ee772cca3..3260a6ed593 100644
--- a/mysql-test/lib/My/SafeProcess.pm
+++ b/mysql-test/lib/My/SafeProcess.pm
@@ -336,9 +336,14 @@ sub start_kill {
sub dump_core {
my ($self)= @_;
- return if IS_WINDOWS;
my $pid= $self->{SAFE_PID};
die "Can't get core from not started process" unless defined $pid;
+
+ if (IS_WINDOWS) {
+ system("$safe_kill $pid dump");
+ return 1;
+ }
+
_verbose("Sending ABRT to $self");
kill ("ABRT", $pid);
return 1;
diff --git a/mysql-test/lib/My/SafeProcess/CMakeLists.txt b/mysql-test/lib/My/SafeProcess/CMakeLists.txt
index ec93f94a3e8..ff842f3468f 100644
--- a/mysql-test/lib/My/SafeProcess/CMakeLists.txt
+++ b/mysql-test/lib/My/SafeProcess/CMakeLists.txt
@@ -25,6 +25,7 @@ SET(INSTALL_ARGS
IF (WIN32)
MYSQL_ADD_EXECUTABLE(my_safe_process safe_process_win.cc ${INSTALL_ARGS})
MYSQL_ADD_EXECUTABLE(my_safe_kill safe_kill_win.cc ${INSTALL_ARGS})
+ TARGET_LINK_LIBRARIES(my_safe_kill dbghelp psapi)
ELSE()
MYSQL_ADD_EXECUTABLE(my_safe_process safe_process.cc ${INSTALL_ARGS})
ENDIF()
diff --git a/mysql-test/lib/My/SafeProcess/safe_kill_win.cc b/mysql-test/lib/My/SafeProcess/safe_kill_win.cc
index 2ac29c61bc7..e5ec33af571 100644
--- a/mysql-test/lib/My/SafeProcess/safe_kill_win.cc
+++ b/mysql-test/lib/My/SafeProcess/safe_kill_win.cc
@@ -25,6 +25,80 @@
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
+#include <psapi.h>
+#include <DbgHelp.h>
+
+static int create_dump(DWORD pid)
+{
+ char path[MAX_PATH];
+ char working_dir[MAX_PATH];
+ int ret= -1;
+ HANDLE process= INVALID_HANDLE_VALUE;
+ HANDLE file= INVALID_HANDLE_VALUE;
+ char *p;
+
+ process = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, (DWORD)pid);
+ if (!process)
+ {
+ fprintf(stderr,"safe_kill : cannot open process pid=%u to create dump, last error %u\n",
+ pid, GetLastError());
+ goto exit;
+ }
+
+ DWORD size = MAX_PATH;
+ if (QueryFullProcessImageName(process, 0, path, &size) == 0)
+ {
+ fprintf(stderr,"safe_kill : cannot read process path for pid %u, last error %u\n",
+ pid, GetLastError());
+ goto exit;
+ }
+
+ if ((p = strrchr(path, '.')) == 0)
+ p= path + strlen(path);
+
+ strncpy(p, ".dmp", path + MAX_PATH - p);
+
+ /* Create dump in current directory.*/
+ const char *filename= strrchr(path, '\\');
+ if (filename == 0)
+ filename = path;
+ else
+ filename++;
+
+ if (!GetCurrentDirectory(MAX_PATH, working_dir))
+ {
+ fprintf(stderr, "GetCurrentDirectory failed, last error %u",GetLastError());
+ goto exit;
+ }
+
+ file = CreateFile(filename, GENERIC_READ | GENERIC_WRITE,
+ 0, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
+
+ if (file == INVALID_HANDLE_VALUE)
+ {
+ fprintf(stderr,"safe_kill : CreateFile() failed for file %s, working dir %s, last error = %u\n",
+ filename, working_dir, GetLastError());
+ goto exit;
+ }
+
+ if (!MiniDumpWriteDump(process, pid, file, MiniDumpNormal, 0,0,0))
+ {
+ fprintf(stderr, "Failed to write minidump to %s, working dir %s, last error %u\n",
+ filename, working_dir, GetLastError());
+ goto exit;
+ }
+
+ ret = 0;
+ fprintf(stderr, "Minidump written to %s, directory %s\n", filename, working_dir);
+
+exit:
+ if(process!= 0 && process != INVALID_HANDLE_VALUE)
+ CloseHandle(process);
+
+ if (file != 0 && file != INVALID_HANDLE_VALUE)
+ CloseHandle(file);
+ return ret;
+}
int main(int argc, const char** argv )
{
@@ -37,12 +111,16 @@ int main(int argc, const char** argv )
signal(SIGBREAK, SIG_IGN);
signal(SIGTERM, SIG_IGN);
- if (argc != 2) {
- fprintf(stderr, "safe_kill <pid>\n");
+ if ((argc != 2 && argc != 3) || (argc == 3 && strcmp(argv[2],"dump"))) {
+ fprintf(stderr, "safe_kill <pid> [dump]\n");
exit(2);
}
pid= atoi(argv[1]);
+ if (argc == 3)
+ {
+ return create_dump(pid);
+ }
_snprintf(safe_process_name, sizeof(safe_process_name),
"safe_process[%d]", pid);
diff --git a/mysql-test/r/cte_nonrecursive.result b/mysql-test/r/cte_nonrecursive.result
index 2fceebd1971..3ad6fb8fabe 100644
--- a/mysql-test/r/cte_nonrecursive.result
+++ b/mysql-test/r/cte_nonrecursive.result
@@ -86,7 +86,7 @@ select * from t2,t where t2.c=t.a;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t2.c 2
-2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort
+2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort
explain
select * from t2, (select a, count(*) from t1 where b >= 'c' group by a) as t
where t2.c=t.a;
@@ -176,7 +176,7 @@ select * from t2 where c in (select c from t);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where
1 PRIMARY <derived2> ref key0 key0 8 test.t2.c 2 Using where; FirstMatch(t2)
-2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort
+2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort
explain
select * from t2
where c in (select c from (select count(*) as c from t1
@@ -245,8 +245,8 @@ select * from t as r1, t as r2 where r1.a=r2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 8 Using where
1 PRIMARY <derived3> ref key0 key0 5 r1.a 2
-3 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
-2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
+3 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
+2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
explain
select * from (select distinct a from t1 where b >= 'c') as r1,
(select distinct a from t1 where b >= 'c') as r2
@@ -370,7 +370,7 @@ select * from t2,t where t2.c=t.a;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t2.c 2
-2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where
+2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where
3 UNION t2 ALL NULL NULL NULL NULL 4 Using where
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
explain
@@ -598,7 +598,7 @@ select * from v2;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where
1 PRIMARY <derived3> ref key0 key0 5 test.t2.c 2
-3 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort
+3 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort
# with clause in the specification of a view that whose definition
# table alias for a with table
create view v3 as
@@ -1055,3 +1055,27 @@ deallocate prepare stmt1;
deallocate prepare stmt2;
drop view v1,v2;
drop table t1,t2;
+#
+# MDEV-13796: UNION of two materialized CTEs
+#
+CREATE TABLE t1 (id int, k int);
+CREATE TABLE t2 (id int);
+INSERT INTO t1 VALUES (3,5), (1,7), (4,3);
+INSERT INTO t2 VALUES (4), (3), (2);
+WITH d1 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id),
+d2 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id)
+SELECT * FROM d1 UNION SELECT * FROM d2;
+SUM(k)
+8
+explain WITH d1 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id),
+d2 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id)
+SELECT * FROM d1 UNION SELECT * FROM d2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 9
+2 DERIVED t1 ALL NULL NULL NULL NULL 3
+2 DERIVED t2 ALL NULL NULL NULL NULL 3 Using where; Using join buffer (flat, BNL join)
+4 UNION <derived3> ALL NULL NULL NULL NULL 9
+3 DERIVED t1 ALL NULL NULL NULL NULL 3
+3 DERIVED t2 ALL NULL NULL NULL NULL 3 Using where; Using join buffer (flat, BNL join)
+NULL UNION RESULT <union1,4> ALL NULL NULL NULL NULL NULL
+DROP TABLE t1,t2;
diff --git a/mysql-test/r/cte_recursive.result b/mysql-test/r/cte_recursive.result
index 946ba16ac5c..a4f32927cf1 100644
--- a/mysql-test/r/cte_recursive.result
+++ b/mysql-test/r/cte_recursive.result
@@ -86,7 +86,7 @@ select t2.a from t1,t2 where t1.a+1=t2.a
select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 30
-2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using where
+2 DERIVED t2 ALL NULL NULL NULL NULL 5 Using where
3 UNION t1 ALL NULL NULL NULL NULL 5
3 UNION t2 ALL NULL NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
@@ -114,7 +114,7 @@ select t2.a from t1,t2 where t1.a+1=t2.a
select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 5
-2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using where
+2 DERIVED t2 ALL NULL NULL NULL NULL 5 Using where
3 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 5
3 RECURSIVE UNION t2 ALL NULL NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
@@ -691,13 +691,13 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00 Using where
1 PRIMARY <derived3> ref key0 key0 5 c.h_id 2 100.00
1 PRIMARY <derived3> ref key0 key0 5 c.w_id 2 100.00
-3 SUBQUERY folks ALL NULL NULL NULL NULL 12 100.00 Using where
+3 DERIVED folks ALL NULL NULL NULL NULL 12 100.00 Using where
4 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 2 100.00
4 RECURSIVE UNION p ALL NULL NULL NULL NULL 12 100.00 Using where; Using join buffer (flat, BNL join)
5 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 2 100.00
5 RECURSIVE UNION p ALL NULL NULL NULL NULL 12 100.00 Using where; Using join buffer (flat, BNL join)
NULL UNION RESULT <union3,4,5> ALL NULL NULL NULL NULL NULL NULL
-2 UNCACHEABLE SUBQUERY <derived3> ALL NULL NULL NULL NULL 12 100.00 Using where
+2 DERIVED <derived3> ALL NULL NULL NULL NULL 12 100.00 Using where
Warnings:
Note 1003 with recursive ancestor_couple_ids as (/* select#2 */ select `a`.`father` AS `h_id`,`a`.`mother` AS `w_id` from `coupled_ancestors` `a` where `a`.`father` is not null and `a`.`mother` is not null), coupled_ancestors as (/* select#3 */ select `test`.`folks`.`id` AS `id`,`test`.`folks`.`name` AS `name`,`test`.`folks`.`dob` AS `dob`,`test`.`folks`.`father` AS `father`,`test`.`folks`.`mother` AS `mother` from `test`.`folks` where `test`.`folks`.`name` = 'Me' union all /* select#4 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_couple_ids` `fa` where `test`.`p`.`id` = `fa`.`h_id` union all /* select#5 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_couple_ids` `ma` where `test`.`p`.`id` = `ma`.`w_id`)/* select#1 */ select `h`.`name` AS `name`,`h`.`dob` AS `dob`,`w`.`name` AS `name`,`w`.`dob` AS `dob` from `ancestor_couple_ids` `c` join `coupled_ancestors` `h` join `coupled_ancestors` `w` where `h`.`id` = `c`.`h_id` and `w`.`id` = `c`.`w_id`
# simple mutual recursion
@@ -877,7 +877,7 @@ where p.id = a.father or p.id = a.mother
select * from ancestors;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 12 100.00
-2 SUBQUERY folks ALL NULL NULL NULL NULL 12 100.00 Using where
+2 DERIVED folks ALL NULL NULL NULL NULL 12 100.00 Using where
3 RECURSIVE UNION p ALL NULL NULL NULL NULL 12 100.00
3 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 12 100.00 Using where; Using join buffer (flat, BNL join)
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL NULL
@@ -1236,7 +1236,7 @@ where p.id = ma.mother
select * from ancestors;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 12
-2 SUBQUERY folks ALL NULL NULL NULL NULL 12 Using where
+2 DERIVED folks ALL NULL NULL NULL NULL 12 Using where
3 RECURSIVE UNION p ALL PRIMARY NULL NULL NULL 12
3 RECURSIVE UNION <derived2> ref key0 key0 5 test.p.id 2
4 RECURSIVE UNION p ALL PRIMARY NULL NULL NULL 12
@@ -1300,14 +1300,14 @@ from prev_gen
select ancestors.name, ancestors.dob from ancestors;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived4> ALL NULL NULL NULL NULL 24
-4 SUBQUERY folks ALL NULL NULL NULL NULL 12 Using where
+4 DERIVED folks ALL NULL NULL NULL NULL 12 Using where
6 RECURSIVE UNION <derived3> ALL NULL NULL NULL NULL 12
-5 RECURSIVE UNION <derived4> ALL NULL NULL NULL NULL 24
-NULL UNION RESULT <union4,6,5> ALL NULL NULL NULL NULL NULL
-3 SUBQUERY folks ALL NULL NULL NULL NULL 12 Using where
+3 DERIVED folks ALL NULL NULL NULL NULL 12 Using where
2 RECURSIVE UNION folks ALL PRIMARY NULL NULL NULL 12
2 RECURSIVE UNION <derived3> ALL NULL NULL NULL NULL 12 Using where; Using join buffer (flat, BNL join)
NULL UNION RESULT <union3,2> ALL NULL NULL NULL NULL NULL
+5 RECURSIVE UNION <derived4> ALL NULL NULL NULL NULL 24
+NULL UNION RESULT <union4,6,5> ALL NULL NULL NULL NULL NULL
explain FORMAT=JSON
with recursive
prev_gen
@@ -1353,7 +1353,6 @@ EXPLAIN
{
"query_block": {
"select_id": 4,
- "operation": "UNION",
"table": {
"table_name": "folks",
"access_type": "ALL",
@@ -1382,7 +1381,6 @@ EXPLAIN
{
"query_block": {
"select_id": 3,
- "operation": "UNION",
"table": {
"table_name": "folks",
"access_type": "ALL",
@@ -1489,7 +1487,6 @@ EXPLAIN
{
"query_block": {
"select_id": 3,
- "operation": "UNION",
"table": {
"table_name": "v",
"access_type": "ALL",
@@ -1757,7 +1754,6 @@ EXPLAIN
{
"query_block": {
"select_id": 2,
- "operation": "UNION",
"table": {
"table_name": "t1",
"access_type": "ALL",
@@ -1840,7 +1836,7 @@ select t2.a from t1,t2 where t1.a+1=t2.a
select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 5
-2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using where
+2 DERIVED t2 ALL NULL NULL NULL NULL 5 Using where
4 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 5
4 RECURSIVE UNION t2 ALL NULL NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
NULL UNION RESULT <union2,4> ALL NULL NULL NULL NULL NULL
@@ -2387,7 +2383,6 @@ ANALYZE
{
"query_block": {
"select_id": 2,
- "operation": "UNION",
"table": {
"message": "No tables used"
}
@@ -2794,7 +2789,7 @@ SELECT c1 FROM t, cte
) SELECT COUNT(*) FROM cte;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
-2 SUBQUERY t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
+2 DERIVED t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
3 RECURSIVE UNION t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
3 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 Using join buffer (flat, BNL join)
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL 0.00 NULL NULL
@@ -2812,7 +2807,7 @@ SELECT c2 FROM t, cte
) SELECT COUNT(*) FROM cte;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
-2 SUBQUERY t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
+2 DERIVED t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
3 RECURSIVE UNION t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00
3 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 Using join buffer (flat, BNL join)
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL 0.00 NULL NULL
diff --git a/mysql-test/r/func_json.result b/mysql-test/r/func_json.result
index b1d3d96aa73..15e4fbec605 100644
--- a/mysql-test/r/func_json.result
+++ b/mysql-test/r/func_json.result
@@ -152,6 +152,9 @@ json_contains('[{"abc":"def", "def":"abc"}]', '["foo","bar"]')
select json_contains('[{"abc":"def", "def":"abc"}, "bar"]', '["bar", {}]');
json_contains('[{"abc":"def", "def":"abc"}, "bar"]', '["bar", {}]')
1
+select json_contains('[{"a":"b"},{"c":"d"}]','{"c":"d"}');
+json_contains('[{"a":"b"},{"c":"d"}]','{"c":"d"}')
+1
select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]");
json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]")
1
@@ -402,6 +405,13 @@ abc
select json_unquote('abc');
json_unquote('abc')
abc
+create table t1 (c VARCHAR(8)) DEFAULT CHARSET=latin1;
+insert into t1 values ('abc'),('def');
+select json_object('foo', json_unquote(json_object('bar', c)),'qux', c) as fld from t1;
+fld
+{"foo": "{\"bar\": \"abc\"}", "qux": "abc"}
+{"foo": "{\"bar\": \"def\"}", "qux": "def"}
+drop table t1;
select json_object("a", json_object("b", "abcd"));
json_object("a", json_object("b", "abcd"))
{"a": {"b": "abcd"}}
@@ -443,6 +453,11 @@ json_length('{"a": 1, "b": {"c": 30}}', '$.b')
select json_length('{"a": 1, "b": {"c": 30}}');
json_length('{"a": 1, "b": {"c": 30}}')
2
+select json_length('{}{');
+json_length('{}{')
+NULL
+Warnings:
+Warning 4038 Syntax error in JSON text in argument 1 to function 'json_length' at position 3
create table json (j INT);
show create table json;
Table Create Table
@@ -705,6 +720,11 @@ json_data
SELECT JSON_OBJECT("user","Jožko Mrkvičká") as json_data;
json_data
{"user": "Jožko Mrkvičká"}
+select json_contains_path('{"foo":"bar"}', 'one', '$[]');
+json_contains_path('{"foo":"bar"}', 'one', '$[]')
+NULL
+Warnings:
+Warning 4042 Syntax error in JSON path in argument 3 to function 'json_contains_path' at position 3
#
# Start of 10.3 tests
#
diff --git a/mysql-test/r/gis-precise.result b/mysql-test/r/gis-precise.result
index f816278a0ba..292dfe0462c 100644
--- a/mysql-test/r/gis-precise.result
+++ b/mysql-test/r/gis-precise.result
@@ -485,6 +485,25 @@ ST_Touches(ST_PolygonFromText('POLYGON((0 0,0 5,5 5,5 0,0 0))'),ST_PointFromText
select ST_Touches(ST_PointFromText('POINT(0 0)'),ST_PointFromText('POINT(0 0)'));
ST_Touches(ST_PointFromText('POINT(0 0)'),ST_PointFromText('POINT(0 0)'))
0
+SELECT ST_RELATE(
+ST_DIFFERENCE(
+GEOMETRYFROMTEXT('
+ MULTILINESTRING(
+ ( 12841 36140, 8005 31007, 26555 31075, 52765 41191,
+ 28978 6548, 45720 32057, 53345 3221 ),
+ ( 8304 59107, 25233 31592, 40502 25303, 8205 42940 ),
+ ( 7829 7305, 58841 56759, 64115 8512, 37562 54145, 2210 14701 ),
+ ( 20379 2805, 40807 27770, 28147 14883, 26439 29383, 55663 5086 ),
+ ( 35944 64702, 14433 23728, 49317 26241, 790 16941 )
+ )
+ '),
+GEOMETRYFROMTEXT('POINT(46061 13545)')
+),
+GEOMETRYFROMTEXT('POINT(4599 60359)'),
+'F*FFFF**F'
+ ) as relate_res;
+relate_res
+0
DROP TABLE IF EXISTS p1;
CREATE PROCEDURE p1(dist DOUBLE, geom TEXT)
BEGIN
diff --git a/mysql-test/r/gis2.result b/mysql-test/r/gis2.result
index 214431e1d2d..c0b476e080b 100644
--- a/mysql-test/r/gis2.result
+++ b/mysql-test/r/gis2.result
@@ -12,3 +12,27 @@ WHERE ST_Contains(point_data, GeomFromText('Point(38.0248492 23.8512726)'));
id
2
DROP TABLE t1;
+create table t1 (p point default "qwer");
+ERROR 22003: Cannot get geometry object from data you send to the GEOMETRY field
+create table t1 (p point default 0);
+ERROR 22003: Cannot get geometry object from data you send to the GEOMETRY field
+create table t1 (p point not null default st_geometryfromtext('point 0)'));
+ERROR 42000: Invalid default value for 'p'
+create table t1 (p point not null default st_geometryfromtext('point(0 0)'));
+insert into t1 values(default);
+select st_astext(p) from t1;
+st_astext(p)
+POINT(0 0)
+drop table t1;
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1))));
+set timestamp=10;
+insert into t1 values(default);
+ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1
+drop table t1;
+SET timestamp=default;
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1))));
+set timestamp=10;
+alter table t1 add column i int;
+ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1
+drop table t1;
+SET timestamp=default;
diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result
index 01a7099e307..1c073881a9d 100644
--- a/mysql-test/r/information_schema.result
+++ b/mysql-test/r/information_schema.result
@@ -2140,3 +2140,35 @@ drop database db1;
connection default;
disconnect con1;
set global sql_mode=default;
+USE test;
+#
+# End of 10.0 tests
+#
+#
+# Start of 10.1 tests
+#
+#
+# MDEV-13242 Wrong results for queries with row constructors and information_schema
+#
+CREATE TABLE tt1(c1 INT);
+CREATE TABLE tt2(c2 INT);
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1', 'c1'));
+count(*)
+1
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt2', 'c2'));
+count(*)
+1
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2'));
+count(*)
+2
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (SELECT 'tt1','c1' FROM dual UNION SELECT 'tt2', 'c2' FROM dual);
+count(*)
+2
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name='tt1' AND column_name='c1') OR (table_name='tt2' AND column_name='c2');
+count(*)
+2
+SELECT column_name FROM information_schema.columns WHERE (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2')) ORDER BY column_name;
+column_name
+c1
+c2
+DROP TABLE tt1, tt2;
diff --git a/mysql-test/r/type_float.result b/mysql-test/r/type_float.result
index 9a92ff21e9f..57cdd1561df 100644
--- a/mysql-test/r/type_float.result
+++ b/mysql-test/r/type_float.result
@@ -772,5 +772,31 @@ t1 CREATE TABLE `t1` (
) ENGINE=MyISAM DEFAULT CHARSET=latin1
drop table if exists t1;
#
+# MDEV-11586 UNION of FLOAT type results in erroneous precision
+#
+CREATE TABLE t1 (f FLOAT);
+INSERT INTO t1 VALUES (1.1);
+SELECT f FROM t1 UNION SELECT 1;
+f
+1.100000023841858
+1
+SELECT 1 UNION SELECT f FROM t1;
+1
+1
+1.100000023841858
+SELECT f FROM t1 UNION SELECT 2147483647;
+f
+1.100000023841858
+2147483647
+SELECT 2147483647 UNION SELECT f FROM t1;
+2147483647
+2147483647
+1.100000023841858
+SELECT CASE WHEN 0 THEN (SELECT f FROM t1) ELSE 2147483647 END AS c1,
+CASE WHEN 1 THEN 2147483647 ELSE (SELECT f FROM t1) END AS c2;
+c1 c2
+2147483647 2147483647
+DROP TABLE t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/suite/innodb/disabled.def b/mysql-test/suite/innodb/disabled.def
index 9a92e99df2e..c435de278b9 100644
--- a/mysql-test/suite/innodb/disabled.def
+++ b/mysql-test/suite/innodb/disabled.def
@@ -10,14 +10,5 @@
#
##############################################################################
-innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
-innodb.defrag_mdl-9155 : MDEV-11336 Fix and enable innodb_defragment
-innodb.innodb_defrag_concurrent : MDEV-11336 Fix and enable innodb_defragment
-innodb.innodb_defrag_stats : MDEV-11336 Fix and enable innodb_defragment
-innodb.innodb_defrag_stats_many_tables : MDEV-11336 Fix and enable innodb_defragment
-innodb.innodb_defragment : MDEV-11336 Fix and enable innodb_defragment
-innodb.innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
-innodb.innodb_defragment_small : MDEV-11336 Fix and enable innodb_defragment
-innodb.innodb_defrag_binlog : MDEV-11336 Fix and enable innodb_defragment
innodb-wl5980-alter : MDEV-9469 / MDEV-13668 extra crash in 10.2
create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails
diff --git a/mysql-test/suite/innodb/r/innodb-alter.result b/mysql-test/suite/innodb/r/innodb-alter.result
index aa78f55c78f..b06c6060375 100644
--- a/mysql-test/suite/innodb/r/innodb-alter.result
+++ b/mysql-test/suite/innodb/r/innodb-alter.result
@@ -857,3 +857,33 @@ DROP TABLE dest_db.t1;
DROP TABLE source_db.t1;
DROP DATABASE source_db;
DROP DATABASE dest_db;
+USE test;
+#
+# MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function
+#
+CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB;
+iNSERT INTO t1 VALUES (10);
+ALTER TABLE t1 ADD b TINYINT NOT NULL DEFAULT if(unix_timestamp()>1,1000,0);
+ERROR 22003: Out of range value for column 'b' at row 1
+SELECT * FROM t1;
+a
+10
+DROP TABLE t1;
+CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB;
+iNSERT INTO t1 VALUES (10);
+ALTER TABLE t1 ADD b DATE NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0);
+affected rows: 0
+info: Records: 0 Duplicates: 0 Warnings: 0
+SELECT * FROM t1;
+a b
+10 2001-01-01
+DROP TABLE t1;
+CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB;
+iNSERT INTO t1 VALUES (10);
+ALTER TABLE t1 ADD b TIME NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0);
+affected rows: 0
+info: Records: 0 Duplicates: 0 Warnings: 0
+SELECT * FROM t1;
+a b
+10 10:20:30
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb-online-alter-gis.result b/mysql-test/suite/innodb/r/innodb-online-alter-gis.result
index c7daac48e48..79c0f2386aa 100644
--- a/mysql-test/suite/innodb/r/innodb-online-alter-gis.result
+++ b/mysql-test/suite/innodb/r/innodb-online-alter-gis.result
@@ -37,3 +37,13 @@ Level Code Message
show errors;
Level Code Message
drop table t1;
+#
+# MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function
+#
+CREATE TABLE t1 (a INT) ENGINE=InnoDB;
+ALTER TABLE t1 ADD COLUMN b LINESTRING DEFAULT POINT(1,1);
+ERROR 22007: Incorrect LINESTRING value: 'POINT' for column 'b' at row 1
+DESCRIBE t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result b/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result
index ff32bf694cb..d10727b95b4 100644
--- a/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result
+++ b/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result
@@ -3,7 +3,15 @@ select @@global.innodb_stats_persistent;
@@global.innodb_stats_persistent
0
set global innodb_defragment_stats_accuracy = 80;
-CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
+CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
+b VARCHAR(256),
+c INT,
+g GEOMETRY NOT NULL,
+t VARCHAR(256),
+KEY second(a, b),
+KEY third(c),
+SPATIAL gk(g),
+FULLTEXT INDEX fti(t)) ENGINE=INNODB;
connect con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
connect con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
connect con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
@@ -40,9 +48,9 @@ count(stat_value) > 0
connection con1;
optimize table t1;;
connection default;
-INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);;
+INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');;
connection con2;
-INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);;
+INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');;
connection con3;
DELETE FROM t1 where a between 1 and 100;;
connection con4;
@@ -59,6 +67,9 @@ disconnect con4;
optimize table t1;
Table Op Msg_type Msg_text
test.t1 optimize status OK
+check table t1 extended;
+Table Op Msg_type Msg_text
+test.t1 check status OK
select count(*) from t1;
count(*)
15723
diff --git a/mysql-test/suite/innodb/r/log_file_size.result b/mysql-test/suite/innodb/r/log_file_size.result
index b576061e74b..e049b34ad81 100644
--- a/mysql-test/suite/innodb/r/log_file_size.result
+++ b/mysql-test/suite/innodb/r/log_file_size.result
@@ -1,4 +1,12 @@
CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
+SELECT * FROM INFORMATION_SCHEMA.ENGINES
+WHERE engine = 'innodb'
+AND support IN ('YES', 'DEFAULT', 'ENABLED');
+ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
+FOUND 1 /InnoDB: Log file .*ib_logfile0 size 0 is too small/ in mysqld.1.err
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
BEGIN;
INSERT INTO t1 VALUES (42);
SELECT * FROM t1;
diff --git a/mysql-test/suite/innodb/t/innodb-alter.test b/mysql-test/suite/innodb/t/innodb-alter.test
index 5e681f96b4a..d936dcad15c 100644
--- a/mysql-test/suite/innodb/t/innodb-alter.test
+++ b/mysql-test/suite/innodb/t/innodb-alter.test
@@ -494,6 +494,34 @@ eval ALTER TABLE $source_db.t1 DROP INDEX index2, algorithm=inplace;
eval DROP TABLE $source_db.t1;
eval DROP DATABASE $source_db;
eval DROP DATABASE $dest_db;
+USE test;
+--echo #
+--echo # MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function
+--echo #
+CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB;
+iNSERT INTO t1 VALUES (10);
+--error ER_WARN_DATA_OUT_OF_RANGE
+ALTER TABLE t1 ADD b TINYINT NOT NULL DEFAULT if(unix_timestamp()>1,1000,0);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# DATETIME-to-DATE truncation is OK
+CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB;
+iNSERT INTO t1 VALUES (10);
+--enable_info
+ALTER TABLE t1 ADD b DATE NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0);
+--disable_info
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# DATETIME-to-TIME truncation is OK
+CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB;
+iNSERT INTO t1 VALUES (10);
+--enable_info
+ALTER TABLE t1 ADD b TIME NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0);
+--disable_info
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/t/innodb-online-alter-gis.test b/mysql-test/suite/innodb/t/innodb-online-alter-gis.test
index 64d07ba23aa..2cb88d398bb 100644
--- a/mysql-test/suite/innodb/t/innodb-online-alter-gis.test
+++ b/mysql-test/suite/innodb/t/innodb-online-alter-gis.test
@@ -19,3 +19,13 @@ ALTER ONLINE TABLE t1 ADD PRIMARY KEY(a),DROP INDEX d, LOCK=SHARED;
show warnings;
show errors;
drop table t1;
+
+--echo #
+--echo # MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function
+--echo #
+
+CREATE TABLE t1 (a INT) ENGINE=InnoDB;
+--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
+ALTER TABLE t1 ADD COLUMN b LINESTRING DEFAULT POINT(1,1);
+DESCRIBE t1;
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test b/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test
index f596fab2a15..bbcd72f1a3a 100644
--- a/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test
+++ b/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test
@@ -16,7 +16,26 @@ select @@global.innodb_stats_persistent;
set global innodb_defragment_stats_accuracy = 80;
# Create table.
-CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
+#
+# TODO: Currently we do not defragment spatial indexes,
+# because doing it properly would require
+# appropriate logic around the SSN (split
+# sequence number).
+#
+# Also do not defragment auxiliary tables related to FULLTEXT INDEX.
+#
+# Both types added to this test to make sure they do not cause
+# problems.
+#
+CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
+b VARCHAR(256),
+c INT,
+g GEOMETRY NOT NULL,
+t VARCHAR(256),
+KEY second(a, b),
+KEY third(c),
+SPATIAL gk(g),
+FULLTEXT INDEX fti(t)) ENGINE=INNODB;
connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
@@ -36,7 +55,7 @@ let $i = $data_size;
while ($i)
{
eval
- INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i);
+ INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i, Point($i,$i), 'This is a test message.');
dec $i;
}
--enable_query_log
@@ -69,10 +88,10 @@ connection con1;
--send optimize table t1;
connection default;
---send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);
+--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');
connection con2;
---send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);
+--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');
connection con3;
--send DELETE FROM t1 where a between 1 and 100;
@@ -103,6 +122,7 @@ disconnect con3;
disconnect con4;
optimize table t1;
+check table t1 extended;
select count(*) from t1;
select count(*) from t1 force index (second);
diff --git a/mysql-test/suite/innodb/t/log_file_size.test b/mysql-test/suite/innodb/t/log_file_size.test
index 206444115fc..140198de4ab 100644
--- a/mysql-test/suite/innodb/t/log_file_size.test
+++ b/mysql-test/suite/innodb/t/log_file_size.test
@@ -23,14 +23,33 @@ call mtr.add_suppression("InnoDB: Log file .*ib_logfile[01].* size");
call mtr.add_suppression("InnoDB: Unable to open .*ib_logfile0. to check native AIO read support");
FLUSH TABLES;
--enable_query_log
+let MYSQLD_DATADIR= `select @@datadir`;
+CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
+
+--source include/shutdown_mysqld.inc
+--move_file $MYSQLD_DATADIR/ib_logfile0 $MYSQLD_DATADIR/ib_logfile.old
+write_file $MYSQLD_DATADIR/ib_logfile0;
+EOF
+let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES
+WHERE engine = 'innodb'
+AND support IN ('YES', 'DEFAULT', 'ENABLED');
--let $restart_parameters= --innodb-thread-concurrency=1 --innodb-log-file-size=1m --innodb-log-files-in-group=2
---source include/restart_mysqld.inc
+--source include/start_mysqld.inc
+
+eval $check_no_innodb;
+--remove_file $MYSQLD_DATADIR/ib_logfile0
+--move_file $MYSQLD_DATADIR/ib_logfile.old $MYSQLD_DATADIR/ib_logfile.0
+--source include/shutdown_mysqld.inc
+let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
+let SEARCH_PATTERN= InnoDB: Log file .*ib_logfile0 size 0 is too small;
+--source include/search_pattern_in_file.inc
+--source include/start_mysqld.inc
+CHECK TABLE t1;
--let $restart_parameters= --innodb-thread-concurrency=100 --innodb-log-file-size=10M --innodb-log-files-in-group=2
--source include/restart_mysqld.inc
-CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 VALUES (42);
@@ -52,9 +71,7 @@ SELECT * FROM t1;
INSERT INTO t1 VALUES (0),(123);
-let MYSQLD_DATADIR= `select @@datadir`;
let SEARCH_ABORT = NOT FOUND;
-let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
BEGIN;
DELETE FROM t1 WHERE a>0;
diff --git a/mysql-test/suite/innodb_gis/r/alter_spatial_index.result b/mysql-test/suite/innodb_gis/r/alter_spatial_index.result
index 17f1f7e1b06..a945e68aeb1 100644
--- a/mysql-test/suite/innodb_gis/r/alter_spatial_index.result
+++ b/mysql-test/suite/innodb_gis/r/alter_spatial_index.result
@@ -743,3 +743,19 @@ ALTER TABLE t1 ADD SPATIAL INDEX(p);
ALTER TABLE t1 FORCE, LOCK=NONE;
ERROR 0A000: LOCK=NONE is not supported. Reason: Do not support online operation on table with GIS index. Try LOCK=SHARED
DROP TABLE t1;
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb;
+set timestamp=10;
+insert into t1 values(default);
+ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1
+drop table t1;
+SET timestamp=default;
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb;
+set timestamp=10;
+alter table t1 add column i int;
+ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1
+drop table t1;
+SET timestamp=default;
+CREATE OR REPLACE TABLE t1 (a INT) ENGINE=InnoDB;
+ALTER TABLE t1 ADD COLUMN b POINT DEFAULT '0';
+ERROR 22003: Cannot get geometry object from data you send to the GEOMETRY field
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_gis/t/alter_spatial_index.test b/mysql-test/suite/innodb_gis/t/alter_spatial_index.test
index 2b834ac69a6..703a89b4065 100644
--- a/mysql-test/suite/innodb_gis/t/alter_spatial_index.test
+++ b/mysql-test/suite/innodb_gis/t/alter_spatial_index.test
@@ -743,3 +743,23 @@ ALTER TABLE t1 ADD SPATIAL INDEX(p);
--error ER_ALTER_OPERATION_NOT_SUPPORTED_REASON
ALTER TABLE t1 FORCE, LOCK=NONE;
DROP TABLE t1;
+
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb;
+set timestamp=10;
+--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
+insert into t1 values(default);
+drop table t1;
+SET timestamp=default;
+
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb;
+set timestamp=10;
+--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
+alter table t1 add column i int;
+drop table t1;
+SET timestamp=default;
+
+CREATE OR REPLACE TABLE t1 (a INT) ENGINE=InnoDB;
+--error ER_CANT_CREATE_GEOMETRY_OBJECT
+ALTER TABLE t1 ADD COLUMN b POINT DEFAULT '0';
+DROP TABLE t1;
+
diff --git a/mysql-test/suite/mariabackup/xb_file_key_management.result b/mysql-test/suite/mariabackup/xb_file_key_management.result
index 8972da32f8b..721d10a9d91 100644
--- a/mysql-test/suite/mariabackup/xb_file_key_management.result
+++ b/mysql-test/suite/mariabackup/xb_file_key_management.result
@@ -9,6 +9,7 @@ INSERT INTO t VALUES('foobar2');
# remove datadir
# xtrabackup move back
# restart server
+ib_logfile0
SELECT * FROM t;
c
foobar1
diff --git a/mysql-test/suite/mariabackup/xb_file_key_management.test b/mysql-test/suite/mariabackup/xb_file_key_management.test
index 3887a889aaa..2a176952053 100644
--- a/mysql-test/suite/mariabackup/xb_file_key_management.test
+++ b/mysql-test/suite/mariabackup/xb_file_key_management.test
@@ -24,6 +24,7 @@ exec $XTRABACKUP --prepare --target-dir=$targetdir;
--enable_result_log
--list_files $targetdir ib_logfile*
+--cat_file $targetdir/ib_logfile0
SELECT * FROM t;
DROP TABLE t;
diff --git a/mysql-test/t/cte_nonrecursive.test b/mysql-test/t/cte_nonrecursive.test
index 980bff01694..57b7ae1658f 100644
--- a/mysql-test/t/cte_nonrecursive.test
+++ b/mysql-test/t/cte_nonrecursive.test
@@ -724,3 +724,22 @@ deallocate prepare stmt2;
drop view v1,v2;
drop table t1,t2;
+
+--echo #
+--echo # MDEV-13796: UNION of two materialized CTEs
+--echo #
+
+CREATE TABLE t1 (id int, k int);
+CREATE TABLE t2 (id int);
+INSERT INTO t1 VALUES (3,5), (1,7), (4,3);
+INSERT INTO t2 VALUES (4), (3), (2);
+
+let $q=
+WITH d1 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id),
+ d2 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id)
+SELECT * FROM d1 UNION SELECT * FROM d2;
+
+eval $q;
+eval explain $q;
+
+DROP TABLE t1,t2;
diff --git a/mysql-test/t/func_json.test b/mysql-test/t/func_json.test
index a34cede537c..47ed0c3ca75 100644
--- a/mysql-test/t/func_json.test
+++ b/mysql-test/t/func_json.test
@@ -56,6 +56,7 @@ select json_contains('[1, {"a":1}]', '{}');
select json_contains('[1, {"a":1}]', '{"a":1}');
select json_contains('[{"abc":"def", "def":"abc"}]', '["foo","bar"]');
select json_contains('[{"abc":"def", "def":"abc"}, "bar"]', '["bar", {}]');
+select json_contains('[{"a":"b"},{"c":"d"}]','{"c":"d"}');
select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]");
select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[10]");
@@ -163,6 +164,14 @@ drop table t1;
select json_unquote('"abc"');
select json_unquote('abc');
+#
+# MDEV-13703 Illegal mix of collations for operation 'json_object' on using JSON_UNQUOTE as an argument.
+#
+create table t1 (c VARCHAR(8)) DEFAULT CHARSET=latin1;
+insert into t1 values ('abc'),('def');
+
+select json_object('foo', json_unquote(json_object('bar', c)),'qux', c) as fld from t1;
+drop table t1;
select json_object("a", json_object("b", "abcd"));
select json_object("a", '{"b": "abcd"}');
@@ -179,6 +188,7 @@ select json_length('{}');
select json_length('[1, 2, {"a": 3}]');
select json_length('{"a": 1, "b": {"c": 30}}', '$.b');
select json_length('{"a": 1, "b": {"c": 30}}');
+select json_length('{}{');
create table json (j INT);
show create table json;
@@ -361,6 +371,12 @@ select json_array(5,json_query('[1,2]','$'));
SELECT JSON_ARRAY('1. ě 2. š 3. č 4. ř 5. ž 6. ý 7. á 8. í 9. é 10. ů 11. ú') AS json_data;
SELECT JSON_OBJECT("user","Jožko Mrkvičká") as json_data;
+#
+# MDEV-12312 JSON_CONTAINS_PATH does not detect invalid path and returns TRUE.
+#
+
+select json_contains_path('{"foo":"bar"}', 'one', '$[]');
+
--echo #
--echo # Start of 10.3 tests
--echo #
diff --git a/mysql-test/t/gis-precise.test b/mysql-test/t/gis-precise.test
index 1f8259bb828..07fabae6025 100644
--- a/mysql-test/t/gis-precise.test
+++ b/mysql-test/t/gis-precise.test
@@ -363,5 +363,24 @@ select ST_Touches(ST_LineFromText('LINESTRING(0 0,5 5)'),ST_PointFromText('POINT
select ST_Touches(ST_PolygonFromText('POLYGON((0 0,0 5,5 5,5 0,0 0))'),ST_PointFromText('POINT(0 0)'));
select ST_Touches(ST_PointFromText('POINT(0 0)'),ST_PointFromText('POINT(0 0)'));
+# MDEV-12705 10.1.18-MariaDB-1~jessie - mysqld got signal 11.
+SELECT ST_RELATE(
+ ST_DIFFERENCE(
+ GEOMETRYFROMTEXT('
+ MULTILINESTRING(
+ ( 12841 36140, 8005 31007, 26555 31075, 52765 41191,
+ 28978 6548, 45720 32057, 53345 3221 ),
+ ( 8304 59107, 25233 31592, 40502 25303, 8205 42940 ),
+ ( 7829 7305, 58841 56759, 64115 8512, 37562 54145, 2210 14701 ),
+ ( 20379 2805, 40807 27770, 28147 14883, 26439 29383, 55663 5086 ),
+ ( 35944 64702, 14433 23728, 49317 26241, 790 16941 )
+ )
+ '),
+ GEOMETRYFROMTEXT('POINT(46061 13545)')
+ ),
+ GEOMETRYFROMTEXT('POINT(4599 60359)'),
+ 'F*FFFF**F'
+ ) as relate_res;
+
--source include/gis_debug.inc
diff --git a/mysql-test/t/gis2.test b/mysql-test/t/gis2.test
index b734ab19ecd..9731e2a91d0 100644
--- a/mysql-test/t/gis2.test
+++ b/mysql-test/t/gis2.test
@@ -15,3 +15,31 @@ SELECT id FROM t1
WHERE ST_Contains(point_data, GeomFromText('Point(38.0248492 23.8512726)'));
DROP TABLE t1;
+#
+# MDEV-13923 Assertion `!is_set() || (m_status == DA_OK_BULK && is_bulk_op())' failed upon altering table with geometry field
+#
+--error ER_CANT_CREATE_GEOMETRY_OBJECT
+create table t1 (p point default "qwer");
+--error ER_CANT_CREATE_GEOMETRY_OBJECT
+create table t1 (p point default 0);
+--error ER_INVALID_DEFAULT
+create table t1 (p point not null default st_geometryfromtext('point 0)'));
+create table t1 (p point not null default st_geometryfromtext('point(0 0)'));
+insert into t1 values(default);
+select st_astext(p) from t1;
+drop table t1;
+
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1))));
+set timestamp=10;
+--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
+insert into t1 values(default);
+drop table t1;
+SET timestamp=default;
+
+create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1))));
+set timestamp=10;
+--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
+alter table t1 add column i int;
+drop table t1;
+SET timestamp=default;
+
diff --git a/mysql-test/t/information_schema.test b/mysql-test/t/information_schema.test
index 157b4c69508..417390dcafe 100644
--- a/mysql-test/t/information_schema.test
+++ b/mysql-test/t/information_schema.test
@@ -1861,3 +1861,29 @@ disconnect con1;
--source include/wait_until_count_sessions.inc
set global sql_mode=default;
+
+USE test;
+
+--echo #
+--echo # End of 10.0 tests
+--echo #
+
+
+--echo #
+--echo # Start of 10.1 tests
+--echo #
+
+
+--echo #
+--echo # MDEV-13242 Wrong results for queries with row constructors and information_schema
+--echo #
+
+CREATE TABLE tt1(c1 INT);
+CREATE TABLE tt2(c2 INT);
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1', 'c1'));
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt2', 'c2'));
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2'));
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (SELECT 'tt1','c1' FROM dual UNION SELECT 'tt2', 'c2' FROM dual);
+SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name='tt1' AND column_name='c1') OR (table_name='tt2' AND column_name='c2');
+SELECT column_name FROM information_schema.columns WHERE (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2')) ORDER BY column_name;
+DROP TABLE tt1, tt2;
diff --git a/mysql-test/t/type_float.test b/mysql-test/t/type_float.test
index 4e8cee75d63..2d7c4428507 100644
--- a/mysql-test/t/type_float.test
+++ b/mysql-test/t/type_float.test
@@ -534,5 +534,20 @@ show create table t1;
drop table if exists t1;
--echo #
+--echo # MDEV-11586 UNION of FLOAT type results in erroneous precision
+--echo #
+
+CREATE TABLE t1 (f FLOAT);
+INSERT INTO t1 VALUES (1.1);
+SELECT f FROM t1 UNION SELECT 1;
+SELECT 1 UNION SELECT f FROM t1;
+SELECT f FROM t1 UNION SELECT 2147483647;
+SELECT 2147483647 UNION SELECT f FROM t1;
+SELECT CASE WHEN 0 THEN (SELECT f FROM t1) ELSE 2147483647 END AS c1,
+ CASE WHEN 1 THEN 2147483647 ELSE (SELECT f FROM t1) END AS c2;
+DROP TABLE t1;
+
+
+--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/plugin/aws_key_management/CMakeLists.txt b/plugin/aws_key_management/CMakeLists.txt
index 913bd8b16ed..aa93fc3aa03 100644
--- a/plugin/aws_key_management/CMakeLists.txt
+++ b/plugin/aws_key_management/CMakeLists.txt
@@ -117,7 +117,7 @@ ELSE()
IF(CMAKE_VERSION LESS "3.0")
SET(GIT_TAG "1.0.8")
ELSE()
- SET(GIT_TAG "1.1.27")
+ SET(GIT_TAG "1.2.11")
ENDIF()
SET(AWS_SDK_PATCH_COMMAND )
diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc
index 7b08d1754ad..ef334233085 100644
--- a/sql/event_data_objects.cc
+++ b/sql/event_data_objects.cc
@@ -1478,19 +1478,33 @@ end:
bool save_tx_read_only= thd->tx_read_only;
thd->tx_read_only= false;
- if (WSREP(thd))
- {
+ /*
+ This code is processing event execution and does not have client
+ connection. Here, event execution will now execute a prepared
+ DROP EVENT statement, but thd->lex->sql_command is set to
+ SQLCOM_CREATE_PROCEDURE
+ DROP EVENT will be logged in binlog, and we have to
+ replicate it to make all nodes have consistent event definitions
+ Wsrep DDL replication is triggered inside Events::drop_event(),
+ and here we need to prepare the THD so that DDL replication is
+ possible, essentially it requires setting sql_command to
+ SQLCOMM_DROP_EVENT, we will switch sql_command for the duration
+ of DDL replication only.
+ */
+ const enum_sql_command sql_command_save= thd->lex->sql_command;
+ const bool sql_command_set= WSREP(thd);
+
+ if (sql_command_set)
thd->lex->sql_command = SQLCOM_DROP_EVENT;
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
- }
ret= Events::drop_event(thd, &dbname, &name, FALSE);
- WSREP_TO_ISOLATION_END;
+ if (sql_command_set)
+ {
+ WSREP_TO_ISOLATION_END;
+ thd->lex->sql_command = sql_command_save;
+ }
-#ifdef WITH_WSREP
- error:
-#endif
thd->tx_read_only= save_tx_read_only;
thd->security_ctx->master_access= saved_master_access;
}
diff --git a/sql/events.cc b/sql/events.cc
index 6a38d4d3a1f..6d71fcdcb39 100644
--- a/sql/events.cc
+++ b/sql/events.cc
@@ -335,6 +335,7 @@ Events::create_event(THD *thd, Event_parse_data *parse_data)
if (check_access(thd, EVENT_ACL, parse_data->dbname.str, NULL, NULL, 0, 0))
DBUG_RETURN(TRUE);
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (lock_object_name(thd, MDL_key::EVENT,
parse_data->dbname.str, parse_data->name.str))
@@ -417,6 +418,10 @@ Events::create_event(THD *thd, Event_parse_data *parse_data)
thd->restore_stmt_binlog_format(save_binlog_format);
DBUG_RETURN(ret);
+#ifdef WITH_WSREP
+ error:
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
}
@@ -457,6 +462,9 @@ Events::update_event(THD *thd, Event_parse_data *parse_data,
if (check_access(thd, EVENT_ACL, parse_data->dbname.str, NULL, NULL, 0, 0))
DBUG_RETURN(TRUE);
+
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+
if (lock_object_name(thd, MDL_key::EVENT,
parse_data->dbname.str, parse_data->name.str))
DBUG_RETURN(TRUE);
@@ -541,6 +549,10 @@ Events::update_event(THD *thd, Event_parse_data *parse_data,
thd->restore_stmt_binlog_format(save_binlog_format);
DBUG_RETURN(ret);
+#ifdef WITH_WSREP
+error:
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
}
@@ -582,6 +594,8 @@ Events::drop_event(THD *thd, const LEX_CSTRING *dbname,
if (check_access(thd, EVENT_ACL, dbname->str, NULL, NULL, 0, 0))
DBUG_RETURN(TRUE);
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+
/*
Turn off row binlogging of this statement and use statement-based so
that all supporting tables are updated for DROP EVENT command.
@@ -603,6 +617,10 @@ Events::drop_event(THD *thd, const LEX_CSTRING *dbname,
thd->restore_stmt_binlog_format(save_binlog_format);
DBUG_RETURN(ret);
+#ifdef WITH_WSREP
+error:
+ DBUG_RETURN(TRUE);
+#endif
}
diff --git a/sql/field.cc b/sql/field.cc
index c7aa0dce16e..e7329feecb1 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -240,7 +240,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
//MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP
MYSQL_TYPE_FLOAT, MYSQL_TYPE_VARCHAR,
//MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24
- MYSQL_TYPE_FLOAT, MYSQL_TYPE_FLOAT,
+ MYSQL_TYPE_DOUBLE, MYSQL_TYPE_FLOAT,
//MYSQL_TYPE_DATE MYSQL_TYPE_TIME
MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR,
//MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR
@@ -2240,15 +2240,15 @@ Field *Field::clone(MEM_ROOT *root, my_ptrdiff_t diff)
return tmp;
}
-void Field::set_default()
+int Field::set_default()
{
if (default_value)
{
Query_arena backup_arena;
table->in_use->set_n_backup_active_arena(table->expr_arena, &backup_arena);
- (void) default_value->expr->save_in_field(this, 0);
+ int rc= default_value->expr->save_in_field(this, 0);
table->in_use->restore_active_arena(table->expr_arena, &backup_arena);
- return;
+ return rc;
}
/* Copy constant value stored in s->default_values */
my_ptrdiff_t l_offset= (my_ptrdiff_t) (table->s->default_values -
@@ -2257,6 +2257,7 @@ void Field::set_default()
if (maybe_null_in_table())
*null_ptr= ((*null_ptr & (uchar) ~null_bit) |
(null_ptr[l_offset] & null_bit));
+ return 0;
}
@@ -9558,7 +9559,7 @@ Field_bit::unpack(uchar *to, const uchar *from, const uchar *from_end,
}
-void Field_bit::set_default()
+int Field_bit::set_default()
{
if (bit_len > 0)
{
@@ -9566,7 +9567,7 @@ void Field_bit::set_default()
uchar bits= get_rec_bits(bit_ptr + col_offset, bit_ofs, bit_len);
set_rec_bits(bits, bit_ptr, bit_ofs, bit_len);
}
- Field::set_default();
+ return Field::set_default();
}
/*
diff --git a/sql/field.h b/sql/field.h
index 17b84e058a8..951d6940a88 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -958,7 +958,7 @@ public:
my_ptrdiff_t l_offset= (my_ptrdiff_t) (record - table->record[0]);
return ptr + l_offset;
}
- virtual void set_default();
+ virtual int set_default();
bool has_update_default_function() const
{
@@ -3762,7 +3762,7 @@ public:
virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
virtual const uchar *unpack(uchar *to, const uchar *from,
const uchar *from_end, uint param_data);
- virtual void set_default();
+ virtual int set_default();
Field *new_key_field(MEM_ROOT *root, TABLE *new_table,
uchar *new_ptr, uint32 length,
diff --git a/sql/item.h b/sql/item.h
index a926ee9aa85..82106fea92b 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -4680,6 +4680,8 @@ public:
bool fix_fields(THD *thd, Item **it);
void cleanup();
+ Item *get_orig_item() const { return orig_item; }
+
/* Methods of getting value which should be cached in the cache */
void save_val(Field *to);
double val_real();
diff --git a/sql/item_geofunc.cc b/sql/item_geofunc.cc
index 04952739e85..16d4cf7cefa 100644
--- a/sql/item_geofunc.cc
+++ b/sql/item_geofunc.cc
@@ -66,9 +66,9 @@ String *Item_func_geometry_from_text::val_str(String *str)
srid= (uint32)args[1]->val_int();
str->set_charset(&my_charset_bin);
+ str->length(0);
if (str->reserve(SRID_SIZE, 512))
return 0;
- str->length(0);
str->q_append(srid);
if ((null_value= !Geometry::create_from_wkt(&buffer, &trs, str, 0)))
return 0;
@@ -1323,6 +1323,8 @@ static int setup_relate_func(Geometry *g1, Geometry *g2,
}
else
func->repeat_expression(shape_a);
+ if (func->reserve_op_buffer(1))
+ return 1;
func->add_operation(op_matrix(nc%3), 1);
if (do_store_shapes)
{
@@ -1493,11 +1495,13 @@ longlong Item_func_spatial_precise_rel::val_int()
Gcalc_function::op_intersection, 2);
func.add_operation(Gcalc_function::op_internals, 1);
shape_a= func.get_next_expression_pos();
- if ((null_value= g1.store_shapes(&trn)))
+ if ((null_value= g1.store_shapes(&trn)) ||
+ func.reserve_op_buffer(1))
break;
func.add_operation(Gcalc_function::op_internals, 1);
shape_b= func.get_next_expression_pos();
- if ((null_value= g2.store_shapes(&trn)))
+ if ((null_value= g2.store_shapes(&trn)) ||
+ func.reserve_op_buffer(1))
break;
func.add_operation(Gcalc_function::v_find_t |
Gcalc_function::op_intersection, 2);
@@ -1732,6 +1736,8 @@ int Item_func_buffer::Transporter::single_point(double x, double y)
{
if (buffer_op == Gcalc_function::op_difference)
{
+ if (m_fn->reserve_op_buffer(1))
+ return 1;
m_fn->add_operation(Gcalc_function::op_false, 0);
return 0;
}
diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc
index 2f4c1ef8e46..b30d32c36ac 100644
--- a/sql/item_jsonfunc.cc
+++ b/sql/item_jsonfunc.cc
@@ -581,7 +581,8 @@ String *Item_func_json_quote::val_str(String *str)
void Item_func_json_unquote::fix_length_and_dec()
{
- collation.set(&my_charset_utf8_general_ci);
+ collation.set(&my_charset_utf8_general_ci,
+ DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
max_length= args[0]->max_length;
maybe_null= 1;
}
@@ -1011,6 +1012,8 @@ static int check_contains(json_engine_t *js, json_engine_t *value)
case JSON_VALUE_ARRAY:
if (value->value_type != JSON_VALUE_ARRAY)
{
+ loc_js= *value;
+ set_js= FALSE;
while (json_scan_next(js) == 0 && js->state != JST_ARRAY_END)
{
int c_level, v_scalar;
@@ -1021,6 +1024,11 @@ static int check_contains(json_engine_t *js, json_engine_t *value)
if (!(v_scalar= json_value_scalar(js)))
c_level= json_get_level(js);
+ if (set_js)
+ *value= loc_js;
+ else
+ set_js= TRUE;
+
if (check_contains(js, value))
{
if (json_skip_level(js))
@@ -1452,7 +1460,8 @@ void Item_func_json_array::fix_length_and_dec()
if (arg_count == 0)
{
- collation.set(&my_charset_utf8_general_ci);
+ collation.set(&my_charset_utf8_general_ci,
+ DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
tmp_val.set_charset(&my_charset_utf8_general_ci);
max_length= 2;
return;
@@ -2126,6 +2135,7 @@ longlong Item_func_json_length::val_int()
json_engine_t je;
uint length= 0;
uint array_counters[JSON_DEPTH_LIMIT];
+ int err;
if ((null_value= args[0]->null_value))
return 0;
@@ -2167,7 +2177,7 @@ longlong Item_func_json_length::val_int()
if (json_value_scalar(&je))
return 1;
- while (json_scan_next(&je) == 0 &&
+ while (!(err= json_scan_next(&je)) &&
je.state != JST_OBJ_END && je.state != JST_ARRAY_END)
{
switch (je.state)
@@ -2186,6 +2196,12 @@ longlong Item_func_json_length::val_int()
};
}
+ if (!err)
+ {
+ /* Parse to the end of the JSON just to check it's valid. */
+ while (json_scan_next(&je) == 0) {}
+ }
+
if (!je.s.error)
return length;
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 0c0b5a64953..4a3f107796d 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -3593,7 +3593,7 @@ bool Item_func_group_concat::setup(THD *thd)
syntax of this function). If there is no ORDER BY clause, we don't
create this tree.
*/
- init_tree(tree, (uint) MY_MIN(thd->variables.max_heap_table_size,
+ init_tree(tree, (size_t)MY_MIN(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0,
tree_key_length,
group_concat_key_cmp_with_order, NULL, (void*) this,
diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
index ba33d103d0c..2b3d2374405 100644
--- a/sql/item_xmlfunc.cc
+++ b/sql/item_xmlfunc.cc
@@ -176,7 +176,7 @@ public:
{
nodebeg= (MY_XML_NODE*) pxml->ptr();
nodeend= (MY_XML_NODE*) (pxml->ptr() + pxml->length());
- numnodes= nodeend - nodebeg;
+ numnodes= (uint)(nodeend - nodebeg);
}
void prepare(String *nodeset)
{
@@ -615,7 +615,7 @@ public:
if ((node->parent == flt->num) &&
(node->type == MY_XML_NODE_TEXT))
{
- fake->set_value(node->beg, node->end - node->beg,
+ fake->set_value(node->beg, (uint)(node->end - node->beg),
collation.collation);
if (args[1]->val_int())
return 1;
@@ -817,7 +817,7 @@ String *Item_nodeset_func_predicate::val_nodeset(String *str)
Item_func *comp_func= (Item_func*)args[1];
uint pos= 0, size;
prepare(str);
- size= fltend - fltbeg;
+ size= (uint)(fltend - fltbeg);
for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++)
{
nodeset_func->context_cache.length(0);
@@ -836,7 +836,7 @@ String *Item_nodeset_func_elementbyindex::val_nodeset(String *nodeset)
Item_nodeset_func *nodeset_func= (Item_nodeset_func*) args[0];
prepare(nodeset);
MY_XPATH_FLT *flt;
- uint pos, size= fltend - fltbeg;
+ uint pos, size= (uint)(fltend - fltbeg);
for (pos= 0, flt= fltbeg; flt < fltend; flt++)
{
nodeset_func->context_cache.length(0);
@@ -995,7 +995,7 @@ static Item *create_comparator(MY_XPATH *xpath,
else if (a->type() == Item::XPATH_NODESET &&
b->type() == Item::XPATH_NODESET)
{
- uint len= xpath->query.end - context->beg;
+ uint len= (uint)(xpath->query.end - context->beg);
set_if_smaller(len, 32);
my_printf_error(ER_UNKNOWN_ERROR,
"XPATH error: "
@@ -1399,7 +1399,7 @@ MY_XPATH_FUNC *
my_xpath_function(const char *beg, const char *end)
{
MY_XPATH_FUNC *k, *function_names;
- uint length= end-beg;
+ uint length= (uint)(end-beg);
switch (length)
{
case 1: return 0;
@@ -1961,7 +1961,7 @@ static int my_xpath_parse_PrimaryExpr_literal(MY_XPATH *xpath)
return 0;
xpath->item= new (xpath->thd->mem_root)
Item_string(xpath->thd, xpath->prevtok.beg + 1,
- xpath->prevtok.end - xpath->prevtok.beg - 2,
+ (uint)(xpath->prevtok.end - xpath->prevtok.beg - 2),
xpath->cs);
return 1;
}
@@ -2499,13 +2499,13 @@ static int my_xpath_parse_Number(MY_XPATH *xpath)
if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT))
{
xpath->item= new (thd->mem_root) Item_int(thd, xpath->prevtok.beg,
- xpath->prevtok.end - xpath->prevtok.beg);
+ (uint)(xpath->prevtok.end - xpath->prevtok.beg));
return 1;
}
my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS);
xpath->item= new (thd->mem_root) Item_float(thd, beg,
- xpath->prevtok.end - beg);
+ (uint)(xpath->prevtok.end - beg));
return 1;
}
@@ -2632,7 +2632,7 @@ my_xpath_parse_VariableReference(MY_XPATH *xpath)
{
xpath->item= NULL;
DBUG_ASSERT(xpath->query.end > dollar_pos);
- uint len= xpath->query.end - dollar_pos;
+ uint len= (uint)(xpath->query.end - dollar_pos);
set_if_smaller(len, 32);
my_printf_error(ER_UNKNOWN_ERROR, "Unknown XPATH variable at: '%.*s'",
MYF(0), len, dollar_pos);
@@ -2660,7 +2660,7 @@ my_xpath_parse_NodeTest_QName(MY_XPATH *xpath)
if (!my_xpath_parse_QName(xpath))
return 0;
DBUG_ASSERT(xpath->context);
- uint len= xpath->prevtok.end - xpath->prevtok.beg;
+ uint len= (uint)(xpath->prevtok.end - xpath->prevtok.beg);
xpath->context= nametestfunc(xpath, xpath->axis, xpath->context,
xpath->prevtok.beg, len);
return 1;
@@ -2759,7 +2759,7 @@ bool Item_xml_str_func::fix_fields(THD *thd, Item **ref)
if (!rc)
{
- uint clen= xpath.query.end - xpath.lasttok.beg;
+ uint clen= (uint)(xpath.query.end - xpath.lasttok.beg);
set_if_smaller(clen, 32);
my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%.*s'",
MYF(0), clen, xpath.lasttok.beg);
diff --git a/sql/log_event.cc b/sql/log_event.cc
index f57fa7d2718..98b1f858fee 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1131,7 +1131,7 @@ int append_query_string(CHARSET_INFO *csinfo, String *to,
*ptr++= '\'';
}
- to->length(orig_len + ptr - beg);
+ to->length((uint32)(orig_len + ptr - beg));
return 0;
}
#endif
@@ -10036,7 +10036,7 @@ Execute_load_query_log_event::do_apply_event(rpl_group_info *rgi)
p= strmake(p, STRING_WITH_LEN(" INTO "));
p= strmake(p, query+fn_pos_end, q_len-fn_pos_end);
- error= Query_log_event::do_apply_event(rgi, buf, p-buf);
+ error= Query_log_event::do_apply_event(rgi, buf, (uint32)(p-buf));
/* Forging file name for deletion in same buffer */
*fname_end= 0;
@@ -10488,7 +10488,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length)
if (static_cast<size_t>(m_rows_end - m_rows_cur) <= length)
{
size_t const block_size= 1024;
- ulong cur_size= m_rows_cur - m_rows_buf;
+ size_t cur_size= m_rows_cur - m_rows_buf;
DBUG_EXECUTE_IF("simulate_too_big_row_case1",
cur_size= UINT_MAX32 - (block_size * 10);
length= UINT_MAX32 - (block_size * 10););
@@ -10501,21 +10501,21 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length)
DBUG_EXECUTE_IF("simulate_too_big_row_case4",
cur_size= UINT_MAX32 - (block_size * 10);
length= (block_size * 10) - block_size + 1;);
- ulong remaining_space= UINT_MAX32 - cur_size;
+ size_t remaining_space= UINT_MAX32 - cur_size;
/* Check that the new data fits within remaining space and we can add
block_size without wrapping.
*/
- if (length > remaining_space ||
+ if (cur_size > UINT_MAX32 || length > remaining_space ||
((length + block_size) > remaining_space))
{
sql_print_error("The row data is greater than 4GB, which is too big to "
"write to the binary log.");
DBUG_RETURN(ER_BINLOG_ROW_LOGGING_FAILED);
}
- ulong const new_alloc=
+ size_t const new_alloc=
block_size * ((cur_size + length + block_size - 1) / block_size);
- uchar* const new_buf= (uchar*)my_realloc((uchar*)m_rows_buf, (uint) new_alloc,
+ uchar* const new_buf= (uchar*)my_realloc((uchar*)m_rows_buf, new_alloc,
MYF(MY_ALLOW_ZERO_PTR|MY_WME));
if (unlikely(!new_buf))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
@@ -11248,11 +11248,11 @@ bool Rows_log_event::write_compressed()
uchar *m_rows_cur_tmp = m_rows_cur;
bool ret = true;
uint32 comlen, alloc_size;
- comlen= alloc_size= binlog_get_compress_len(m_rows_cur_tmp - m_rows_buf_tmp);
+ comlen= alloc_size= binlog_get_compress_len((uint32)(m_rows_cur_tmp - m_rows_buf_tmp));
m_rows_buf = (uchar *)my_safe_alloca(alloc_size);
if(m_rows_buf &&
!binlog_buf_compress((const char *)m_rows_buf_tmp, (char *)m_rows_buf,
- m_rows_cur_tmp - m_rows_buf_tmp, &comlen))
+ (uint32)(m_rows_cur_tmp - m_rows_buf_tmp), &comlen))
{
m_rows_cur= comlen + m_rows_buf;
ret= Log_event::write();
@@ -12488,7 +12488,7 @@ Rows_log_event::write_row(rpl_group_info *rgi,
the size of the first row and use that value to initialize
storage engine for bulk insertion */
DBUG_ASSERT(!(m_curr_row > m_curr_row_end));
- ulong estimated_rows= 0;
+ ha_rows estimated_rows= 0;
if (m_curr_row < m_curr_row_end)
estimated_rows= (m_rows_end - m_curr_row) / (m_curr_row_end - m_curr_row);
else if (m_curr_row == m_curr_row_end)
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index d0993151f76..218594b82b4 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -8800,8 +8800,8 @@ static int mysql_init_variables(void)
/* Set directory paths */
mysql_real_data_home_len=
- strmake_buf(mysql_real_data_home,
- get_relative_path(MYSQL_DATADIR)) - mysql_real_data_home;
+ (uint)(strmake_buf(mysql_real_data_home,
+ get_relative_path(MYSQL_DATADIR)) - mysql_real_data_home);
/* Replication parameters */
master_info_file= (char*) "master.info",
relay_log_info_file= (char*) "relay-log.info";
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 5d31dd1662a..024bb9dd933 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -499,9 +499,9 @@ int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
if (trees_next == trees_end)
{
const int realloc_ratio= 2; /* Double size for next round */
- uint old_elements= (trees_end - trees);
- uint old_size= sizeof(SEL_TREE**) * old_elements;
- uint new_size= old_size * realloc_ratio;
+ size_t old_elements= (trees_end - trees);
+ size_t old_size= sizeof(SEL_TREE**) * old_elements;
+ size_t new_size= old_size * realloc_ratio;
SEL_TREE **new_trees;
if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
return -1;
@@ -846,10 +846,10 @@ SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges,
SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt,
RANGE_OPT_PARAM *param) : Sql_alloc()
{
- uint elements= (arg->trees_end - arg->trees);
+ size_t elements= (arg->trees_end - arg->trees);
if (elements > PREALLOCED_TREES)
{
- uint size= elements * sizeof (SEL_TREE **);
+ size_t size= elements * sizeof (SEL_TREE **);
if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
goto mem_err;
}
@@ -951,7 +951,7 @@ int imerge_list_or_list(RANGE_OPT_PARAM *param,
uint rc;
bool is_last_check_pass= FALSE;
SEL_IMERGE *imerge= im1->head();
- uint elems= imerge->trees_next-imerge->trees;
+ uint elems= (uint)(imerge->trees_next-imerge->trees);
MEM_ROOT *mem_root= current_thd->mem_root;
im1->empty();
@@ -1051,7 +1051,7 @@ int imerge_list_or_tree(RANGE_OPT_PARAM *param,
SEL_TREE *or_tree= new (mem_root) SEL_TREE (tree, FALSE, param);
if (or_tree)
{
- uint elems= imerge->trees_next-imerge->trees;
+ uint elems= (uint)(imerge->trees_next-imerge->trees);
rc= imerge->or_sel_tree_with_checks(param, elems, or_tree,
TRUE, &is_last_check_pass);
if (!is_last_check_pass)
@@ -2897,7 +2897,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
uint keynr;
uint max_quick_key_parts= 0;
MY_BITMAP *used_fields= &table->cond_set;
- double table_records= table->stat_records();
+ double table_records= (double)table->stat_records();
DBUG_ENTER("calculate_cond_selectivity_for_table");
table->cond_selectivity= 1.0;
@@ -3994,8 +3994,8 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
store_length_array,
range_par->min_key,
range_par->max_key,
- tmp_min_key - range_par->min_key,
- tmp_max_key - range_par->max_key,
+ (uint)(tmp_min_key - range_par->min_key),
+ (uint)(tmp_max_key - range_par->max_key),
flag,
&ppar->part_iter);
if (!res)
@@ -4659,7 +4659,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
}
}
- uint n_child_scans= imerge->trees_next - imerge->trees;
+ size_t n_child_scans= imerge->trees_next - imerge->trees;
if (!n_child_scans)
DBUG_RETURN(NULL);
@@ -5203,7 +5203,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
INDEX_SCAN_INFO **scan_ptr;
INDEX_SCAN_INFO *cpk_scan= NULL;
TABLE *table= param->table;
- uint n_index_scans= tree->index_scans_end - tree->index_scans;
+ uint n_index_scans= (uint)(tree->index_scans_end - tree->index_scans);
if (!n_index_scans)
return 1;
@@ -5846,7 +5846,7 @@ TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
}
}
- count= tree->index_scans_end - tree->index_scans;
+ count= (uint)(tree->index_scans_end - tree->index_scans);
for (i= 0; i < count; i++)
{
index_scan= tree->index_scans[i];
@@ -6506,7 +6506,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
intersect_scans_best););
*are_all_covering= intersect->is_covering;
- uint best_num= intersect_scans_best - intersect_scans;
+ uint best_num= (uint)(intersect_scans_best - intersect_scans);
ror_intersect_cpy(intersect, intersect_best);
/*
@@ -6688,7 +6688,7 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
TRP_ROR_INTERSECT *trp;
if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
DBUG_RETURN(trp);
- uint best_num= (ror_scan_mark - tree->ror_scans);
+ uint best_num= (uint)(ror_scan_mark - tree->ror_scans);
if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
sizeof(ROR_SCAN_INFO*)*
best_num)))
@@ -11476,7 +11476,7 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
DBUG_RETURN(0);
}
- uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
+ uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer);
if (count == 0)
{
/* Ranges have already been used up before. None is left for read. */
@@ -11521,7 +11521,7 @@ int QUICK_RANGE_SELECT_GEOM::get_next()
DBUG_RETURN(result);
}
- uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
+ uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer);
if (count == 0)
{
/* Ranges have already been used up before. None is left for read. */
@@ -11975,7 +11975,7 @@ void QUICK_SELECT_I::add_key_and_length(String *key_names,
bool *first)
{
char buf[64];
- uint length;
+ size_t length;
KEY *key_info= head->key_info + index;
if (*first)
@@ -12529,7 +12529,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
{
cur_group_prefix_len+= cur_part->store_length;
++cur_group_key_parts;
- max_key_part= cur_part - cur_index_info->key_part + 1;
+ max_key_part= (uint)(cur_part - cur_index_info->key_part) + 1;
used_key_parts_map.set_bit(max_key_part);
}
else
@@ -13252,7 +13252,7 @@ get_field_keypart(KEY *index, Field *field)
part < end; part++)
{
if (field->eq(part->field))
- return part - index->key_part + 1;
+ return (uint)(part - index->key_part + 1);
}
return 0;
}
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
index b3350191d13..ace6208fd77 100644
--- a/sql/opt_range_mrr.cc
+++ b/sql/opt_range_mrr.cc
@@ -199,9 +199,9 @@ walk_right_n_up:
{
{
RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
- uint min_key_length= cur->min_key - seq->param->min_key;
- uint max_key_length= cur->max_key - seq->param->max_key;
- uint len= cur->min_key - cur[-1].min_key;
+ size_t min_key_length= cur->min_key - seq->param->min_key;
+ size_t max_key_length= cur->max_key - seq->param->max_key;
+ size_t len= cur->min_key - cur[-1].min_key;
if (!(min_key_length == max_key_length &&
!memcmp(cur[-1].min_key, cur[-1].max_key, len) &&
!key_tree->min_flag && !key_tree->max_flag))
@@ -238,7 +238,7 @@ walk_up_n_right:
/* Ok got a tuple */
RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
- uint min_key_length= cur->min_key - seq->param->min_key;
+ uint min_key_length= (uint)(cur->min_key - seq->param->min_key);
range->ptr= (char*)(intptr)(key_tree->part);
if (cur->min_key_flag & GEOM_FLAG)
@@ -256,13 +256,13 @@ walk_up_n_right:
range->range_flag= cur->min_key_flag | cur->max_key_flag;
range->start_key.key= seq->param->min_key;
- range->start_key.length= cur->min_key - seq->param->min_key;
+ range->start_key.length= (uint)(cur->min_key - seq->param->min_key);
range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
HA_READ_KEY_EXACT);
range->end_key.key= seq->param->max_key;
- range->end_key.length= cur->max_key - seq->param->max_key;
+ range->end_key.length= (uint)(cur->max_key - seq->param->max_key);
range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
HA_READ_AFTER_KEY);
range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index ab587b8b279..8a75aaed8d6 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -768,12 +768,12 @@ static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo,
key_part_map org_key_part_used= *key_part_used;
if (eq_type || between || max_fl == less_fl)
{
- uint length= (key_ptr-ref->key_buff)+part->store_length;
+ uint length= (uint)(key_ptr-ref->key_buff)+part->store_length;
if (ref->key_length < length)
{
/* Ultimately ref->key_length will contain the length of the search key */
ref->key_length= length;
- ref->key_parts= (part - keyinfo->key_part) + 1;
+ ref->key_parts= (uint)(part - keyinfo->key_part) + 1;
}
if (!*prefix_len && part+1 == field_part)
*prefix_len= length;
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
index d3d1bc97a70..191360a6969 100644
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -848,7 +848,7 @@ bool check_func_dependency(JOIN *join,
*/
uint and_level=0;
build_eq_mods_for_cond(join->thd, &dac, &last_eq_mod, &and_level, cond);
- if (!(dac.n_equality_mods= last_eq_mod - dac.equality_mods))
+ if (!(dac.n_equality_mods= (uint)(last_eq_mod - dac.equality_mods)))
return FALSE; /* No useful conditions */
List<Dep_module> bound_modules;
@@ -1061,7 +1061,7 @@ bool Dep_analysis_context::setup_equality_modules_deps(List<Dep_module>
eq_mod < equality_mods + n_equality_mods;
eq_mod++)
{
- deps_recorder.expr_offset= eq_mod - equality_mods;
+ deps_recorder.expr_offset= (uint)(eq_mod - equality_mods);
deps_recorder.visited_other_tables= FALSE;
eq_mod->unbound_args= 0;
@@ -1079,7 +1079,7 @@ bool Dep_analysis_context::setup_equality_modules_deps(List<Dep_module>
Dep_value_field* field_val;
while ((field_val= it++))
{
- uint offs= field_val->bitmap_offset + eq_mod - equality_mods;
+ uint offs= (uint)(field_val->bitmap_offset + eq_mod - equality_mods);
bitmap_set_bit(&expr_deps, offs);
}
}
@@ -1158,7 +1158,7 @@ void build_eq_mods_for_cond(THD *thd, Dep_analysis_context *ctx,
if (cond->type() == Item_func::COND_ITEM)
{
List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
- uint orig_offset= *eq_mod - ctx->equality_mods;
+ size_t orig_offset= *eq_mod - ctx->equality_mods;
/* AND/OR */
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
diff --git a/sql/parse_file.cc b/sql/parse_file.cc
index 196feabb235..94f72b7b492 100644
--- a/sql/parse_file.cc
+++ b/sql/parse_file.cc
@@ -256,7 +256,7 @@ sql_create_definition_file(const LEX_CSTRING *dir,
File handler;
IO_CACHE file;
char path[FN_REFLEN+1]; // +1 to put temporary file name for sure
- int path_end;
+ size_t path_end;
File_option *param;
DBUG_ENTER("sql_create_definition_file");
DBUG_PRINT("enter", ("Dir: %s, file: %s, base %p",
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index cc6553fc158..47fb60ea12a 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -1674,7 +1674,7 @@ void partition_info::print_no_partition_found(TABLE *table_arg, myf errflag)
bool partition_info::set_part_expr(THD *thd, char *start_token, Item *item_ptr,
char *end_token, bool is_subpart)
{
- uint expr_len= end_token - start_token;
+ size_t expr_len= end_token - start_token;
char *func_string= (char*) thd->memdup(start_token, expr_len);
if (!func_string)
diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc
index 51e93003c56..1b1059cc529 100644
--- a/sql/rpl_record.cc
+++ b/sql/rpl_record.cc
@@ -80,7 +80,7 @@ pack_row(TABLE *table, MY_BITMAP const* cols,
unsigned int null_mask= 1U;
for ( ; (field= *p_field) ; p_field++)
{
- if (bitmap_is_set(cols, p_field - table->field))
+ if (bitmap_is_set(cols, (uint)(p_field - table->field)))
{
my_ptrdiff_t offset;
if (field->is_null(rec_offset))
@@ -262,7 +262,7 @@ unpack_row(rpl_group_info *rgi,
No need to bother about columns that does not exist: they have
gotten default values when being emptied above.
*/
- if (bitmap_is_set(cols, field_ptr - begin_ptr))
+ if (bitmap_is_set(cols, (uint)(field_ptr - begin_ptr)))
{
if ((null_mask & 0xFF) == 0)
{
@@ -434,7 +434,7 @@ unpack_row(rpl_group_info *rgi,
if (master_reclength)
{
if (*field_ptr)
- *master_reclength = (*field_ptr)->ptr - table->record[0];
+ *master_reclength = (ulong)((*field_ptr)->ptr - table->record[0]);
else
*master_reclength = table->s->reclength;
}
diff --git a/sql/rpl_record_old.cc b/sql/rpl_record_old.cc
index a252bbff0f5..fd37c6f9142 100644
--- a/sql/rpl_record_old.cc
+++ b/sql/rpl_record_old.cc
@@ -134,7 +134,7 @@ unpack_row_old(rpl_group_info *rgi,
{
Field *const f= *field_ptr;
- if (bitmap_is_set(cols, field_ptr - begin_ptr))
+ if (bitmap_is_set(cols, (uint)(field_ptr - begin_ptr)))
{
f->move_field_offset(offset);
ptr= f->unpack(f->ptr, ptr, row_buffer_end, 0);
@@ -149,14 +149,14 @@ unpack_row_old(rpl_group_info *rgi,
}
}
else
- bitmap_clear_bit(rw_set, field_ptr - begin_ptr);
+ bitmap_clear_bit(rw_set, (uint)(field_ptr - begin_ptr));
}
*row_end = ptr;
if (master_reclength)
{
if (*field_ptr)
- *master_reclength = (*field_ptr)->ptr - table->record[0];
+ *master_reclength = (ulong)((*field_ptr)->ptr - table->record[0]);
else
*master_reclength = table->s->reclength;
}
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index a6462cbca9b..054378ed268 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -7675,8 +7675,11 @@ ER_SLAVE_SAME_ID
ER_FLASHBACK_NOT_SUPPORTED
eng "Flashback does not support %s %s"
-# MARIAROCKS-TODO: Should we add RocksDB error messages here or use some other
-# solution?
+
+
+#
+# MyRocks error messages
+#
ER_KEYS_OUT_OF_ORDER
eng "Keys are out order during bulk load"
@@ -7710,39 +7713,40 @@ ER_UNSUPPORTED_COLLATION
ER_METADATA_INCONSISTENCY
eng "Table '%s' does not exist, but metadata information exists inside MyRocks. This is a sign of data inconsistency. Please check if '%s.frm' exists, and try to restore it if it does not exist."
-ER_KEY_CREATE_DURING_ALTER
- eng "MyRocks failed creating new key definitions during alter."
-
-ER_SK_POPULATE_DURING_ALTER
- eng "MyRocks failed populating secondary key during alter."
-
ER_CF_DIFFERENT
eng "Column family ('%s') flag (%d) is different from an existing flag (%d). Assign a new CF flag, or do not change existing CF flag."
+ER_RDB_TTL_DURATION_FORMAT
+ eng "TTL duration (%s) in MyRocks must be an unsigned non-null 64-bit integer."
+
ER_RDB_STATUS_GENERAL
- eng "Status error %d received from RocksDB: %s"
+ eng "Status error %d received from RocksDB: %s"
ER_RDB_STATUS_MSG
- eng "%s, Status error %d received from RocksDB: %s"
-
-ER_NET_OK_PACKET_TOO_LARGE
- eng "OK packet too large"
+ eng "%s, Status error %d received from RocksDB: %s"
ER_RDB_TTL_UNSUPPORTED
- eng "TTL support is currently disabled when table has secondary indexes or hidden PK."
+ eng "TTL support is currently disabled when table has a hidden PK."
ER_RDB_TTL_COL_FORMAT
eng "TTL column (%s) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration."
-ER_RDB_TTL_DURATION_FORMAT
- eng "TTL duration (%s) in MyRocks must be an unsigned non-null 64-bit integer."
-
ER_PER_INDEX_CF_DEPRECATED
eng "The per-index column family option has been deprecated"
+ER_KEY_CREATE_DURING_ALTER
+ eng "MyRocks failed creating new key definitions during alter."
+
+ER_SK_POPULATE_DURING_ALTER
+ eng "MyRocks failed populating secondary key during alter."
+
+# MyRocks messages end
ER_SUM_FUNC_WITH_WINDOW_FUNC_AS_ARG
eng "Window functions can not be used as arguments to group functions."
+ER_NET_OK_PACKET_TOO_LARGE
+ eng "OK packet too large"
+
ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION
eng "Illegal parameter data types %s and %s for operation '%s'"
ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
diff --git a/sql/spatial.cc b/sql/spatial.cc
index 8817e82d6c4..357e311543f 100644
--- a/sql/spatial.cc
+++ b/sql/spatial.cc
@@ -2565,7 +2565,7 @@ uint Gis_multi_polygon::init_from_opresult(String *bin,
n_poly++;
}
bin->write_at_position(np_pos, n_poly);
- return opres - opres_orig;
+ return (uint)(opres - opres_orig);
}
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index b0550fb5189..2c192d661f4 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -12398,7 +12398,7 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
char *end= user + packet_length;
/* Safe because there is always a trailing \0 at the end of the packet */
char *passwd= strend(user) + 1;
- uint user_len= passwd - user - 1;
+ uint user_len= (uint)(passwd - user - 1);
char *db= passwd;
char db_buff[SAFE_NAME_LEN + 1]; // buffer to store db in utf8
char user_buff[USERNAME_LENGTH + 1]; // buffer to store user in utf8
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index c0736130cfe..699633a11c1 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -5479,7 +5479,7 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length,
if (field_ptr && *field_ptr)
{
- *cached_field_index_ptr= field_ptr - table->field;
+ *cached_field_index_ptr= (uint)(field_ptr - table->field);
field= *field_ptr;
}
else
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index 7849e16a839..f28ce514b29 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -4286,7 +4286,7 @@ my_bool Query_cache::move_by_type(uchar **border,
*pprev = block->pprev,
*pnext = block->pnext,
*new_block =(Query_cache_block *) *border;
- uint tablename_offset = block->table()->table() - block->table()->db();
+ size_t tablename_offset = block->table()->table() - block->table()->db();
char *data = (char*) block->data();
uchar *key;
size_t key_length;
@@ -4595,7 +4595,7 @@ uint Query_cache::filename_2_table_key (char *key, const char *path,
filename= tablename + dirname_length(tablename + 2) + 2;
/* Find start of databasename */
for (dbname= filename - 2 ; dbname[-1] != FN_LIBCHAR ; dbname--) ;
- *db_length= (filename - dbname) - 1;
+ *db_length= (uint32)(filename - dbname) - 1;
DBUG_PRINT("qcache", ("table '%-.*s.%s'", *db_length, dbname, filename));
DBUG_RETURN((uint) (strmake(strmake(key, dbname,
diff --git a/sql/sql_cte.cc b/sql/sql_cte.cc
index c163044547f..54a56103d21 100644
--- a/sql/sql_cte.cc
+++ b/sql/sql_cte.cc
@@ -990,7 +990,7 @@ With_element *st_select_lex::find_table_def_in_with_clauses(TABLE_LIST *table)
been done yet.
*/
if (with_elem && sl->master_unit() == with_elem->spec)
- break;
+ break;
With_clause *with_clause=sl->get_with_clause();
if (with_clause)
{
@@ -1038,13 +1038,21 @@ bool TABLE_LIST::set_as_with_table(THD *thd, With_element *with_elem)
}
with= with_elem;
if (!with_elem->is_referenced() || with_elem->is_recursive)
+ {
derived= with_elem->spec;
+ if (derived->get_master() != select_lex &&
+ !is_with_table_recursive_reference())
+ {
+ derived->move_as_slave(select_lex);
+ }
+ }
else
{
if(!(derived= with_elem->clone_parsed_spec(thd, this)))
return true;
derived->with_element= with_elem;
}
+ derived->first_select()->linkage= DERIVED_TABLE_TYPE;
with_elem->inc_references();
return false;
}
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index f91b92b1d4f..7860fa6d550 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -1006,7 +1006,7 @@ update_binlog:
These DDL methods and logging are protected with the exclusive
metadata lock on the schema.
*/
- if (write_to_binlog(thd, query, query_pos -1 - query, db, db_len))
+ if (write_to_binlog(thd, query, (uint)(query_pos -1 - query), db, db_len))
{
error= true;
goto exit;
@@ -1024,7 +1024,7 @@ update_binlog:
These DDL methods and logging are protected with the exclusive
metadata lock on the schema.
*/
- if (write_to_binlog(thd, query, query_pos -1 - query, db, db_len))
+ if (write_to_binlog(thd, query, (uint)(query_pos -1 - query), db, db_len))
{
error= true;
goto exit;
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
index 41741f3dcc7..e6ef8a4be9f 100644
--- a/sql/sql_join_cache.cc
+++ b/sql/sql_join_cache.cc
@@ -406,7 +406,7 @@ void JOIN_CACHE::create_flag_fields()
}
/* Theoretically the new value of flag_fields can be less than the old one */
- flag_fields= copy-field_descr;
+ flag_fields= (uint)(copy-field_descr);
}
@@ -1374,7 +1374,7 @@ uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
}
/* Save the offset of the field to put it later at the end of the record */
if (copy->referenced_field_no)
- copy->offset= cp-curr_rec_pos;
+ copy->offset= (uint)(cp-curr_rec_pos);
switch (copy->type) {
case CACHE_BLOB:
@@ -1778,7 +1778,7 @@ uint JOIN_CACHE::read_flag_fields()
memcpy(copy->str, pos, copy->length);
pos+= copy->length;
}
- return (pos-init_pos);
+ return (uint)(pos-init_pos);
}
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index a479fc9fe30..5f198e40254 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -2392,6 +2392,30 @@ st_select_lex_node *st_select_lex_node:: insert_chain_before(
return this;
}
+
+/*
+ Detach the node from its master and attach it to a new master
+*/
+
+void st_select_lex_node::move_as_slave(st_select_lex_node *new_master)
+{
+ exclude_from_tree();
+ if (new_master->slave)
+ {
+ st_select_lex_node *curr= new_master->slave;
+ for ( ; curr->next ; curr= curr->next) ;
+ prev= &curr->next;
+ }
+ else
+ {
+ prev= &new_master->slave;
+ new_master->slave= this;
+ }
+ next= 0;
+ master= new_master;
+}
+
+
/*
Exclude a node from the tree lex structure, but leave it in the global
list of nodes.
@@ -4521,7 +4545,8 @@ void st_select_lex::set_explain_type(bool on_the_fly)
pos_in_table_list=NULL for e.g. post-join aggregation JOIN_TABs.
*/
if (tab->table && tab->table->pos_in_table_list &&
- tab->table->pos_in_table_list->with)
+ tab->table->pos_in_table_list->with &&
+ tab->table->pos_in_table_list->with->is_recursive)
{
uses_cte= true;
break;
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 9eeb9652022..3f02158b9b1 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -631,6 +631,7 @@ public:
}
st_select_lex_node *insert_chain_before(st_select_lex_node **ptr_pos_to_insert,
st_select_lex_node *end_chain_node);
+ void move_as_slave(st_select_lex_node *new_master);
friend class st_select_lex_unit;
friend bool mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *sel);
friend bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *table,
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index fc9a1eab758..de4bfd0436b 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -5245,7 +5245,6 @@ end_with_restore_list:
if (res)
break;
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
switch (lex->sql_command) {
case SQLCOM_CREATE_EVENT:
{
@@ -5279,7 +5278,6 @@ end_with_restore_list:
&lex->spname->m_name);
break;
case SQLCOM_DROP_EVENT:
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= Events::drop_event(thd,
&lex->spname->m_db, &lex->spname->m_name,
lex->if_exists())))
@@ -6019,7 +6017,6 @@ end_with_restore_list:
Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands
as specified through the thd->lex->create_view->mode flag.
*/
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_view(thd, first_table, thd->lex->create_view->mode);
break;
}
@@ -6035,7 +6032,6 @@ end_with_restore_list:
case SQLCOM_CREATE_TRIGGER:
{
/* Conditionally writes to binlog. */
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_or_drop_trigger(thd, all_tables, 1);
break;
@@ -6043,7 +6039,6 @@ end_with_restore_list:
case SQLCOM_DROP_TRIGGER:
{
/* Conditionally writes to binlog. */
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_or_drop_trigger(thd, all_tables, 0);
break;
}
@@ -6108,13 +6103,11 @@ end_with_restore_list:
my_ok(thd);
break;
case SQLCOM_INSTALL_PLUGIN:
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (! (res= mysql_install_plugin(thd, &thd->lex->comment,
&thd->lex->ident)))
my_ok(thd);
break;
case SQLCOM_UNINSTALL_PLUGIN:
- WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment,
&thd->lex->ident)))
my_ok(thd);
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 4d25de0f299..89ac8559a62 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -2128,12 +2128,16 @@ bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name,
bool error;
int argc=orig_argc;
char **argv=orig_argv;
+ unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] =
+ { MYSQL_AUDIT_GENERAL_CLASSMASK };
DBUG_ENTER("mysql_install_plugin");
tables.init_one_table("mysql", 5, "plugin", 6, "plugin", TL_WRITE);
if (!opt_noacl && check_table_access(thd, INSERT_ACL, &tables, FALSE, 1, FALSE))
DBUG_RETURN(TRUE);
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+
/* need to open before acquiring LOCK_plugin or it will deadlock */
if (! (table = open_ltable(thd, &tables, TL_WRITE,
MYSQL_LOCK_IGNORE_TIMEOUT)))
@@ -2166,8 +2170,6 @@ bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name,
See also mysql_uninstall_plugin() and initialize_audit_plugin()
*/
- unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] =
- { MYSQL_AUDIT_GENERAL_CLASSMASK };
if (mysql_audit_general_enabled())
mysql_audit_acquire_plugins(thd, event_class_mask);
@@ -2199,6 +2201,10 @@ err:
if (argv)
free_defaults(argv);
DBUG_RETURN(error);
+#ifdef WITH_WSREP
+error:
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
}
@@ -2265,6 +2271,8 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name,
TABLE_LIST tables;
LEX_CSTRING dl= *dl_arg;
bool error= false;
+ unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] =
+ { MYSQL_AUDIT_GENERAL_CLASSMASK };
DBUG_ENTER("mysql_uninstall_plugin");
tables.init_one_table("mysql", 5, "plugin", 6, "plugin", TL_WRITE);
@@ -2272,6 +2280,8 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name,
if (!opt_noacl && check_table_access(thd, DELETE_ACL, &tables, FALSE, 1, FALSE))
DBUG_RETURN(TRUE);
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+
/* need to open before acquiring LOCK_plugin or it will deadlock */
if (! (table= open_ltable(thd, &tables, TL_WRITE, MYSQL_LOCK_IGNORE_TIMEOUT)))
DBUG_RETURN(TRUE);
@@ -2297,8 +2307,6 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name,
See also mysql_install_plugin() and initialize_audit_plugin()
*/
- unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] =
- { MYSQL_AUDIT_GENERAL_CLASSMASK };
if (mysql_audit_general_enabled())
mysql_audit_acquire_plugins(thd, event_class_mask);
@@ -2329,6 +2337,10 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name,
mysql_mutex_unlock(&LOCK_plugin);
DBUG_RETURN(error);
+#ifdef WITH_WSREP
+error:
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
}
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 1f802b7de83..c7663e26750 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -2680,7 +2680,7 @@ static int send_events(binlog_send_info *info, IO_CACHE* log, LOG_INFO* linfo,
Gtid_list_log_event glev(&info->until_binlog_state, 0);
if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg) ||
- fake_gtid_list_event(info, &glev, &info->errmsg, my_b_tell(log)))
+ fake_gtid_list_event(info, &glev, &info->errmsg, (uint32)my_b_tell(log)))
{
info->error= ER_UNKNOWN_ERROR;
return 1;
@@ -2690,7 +2690,7 @@ static int send_events(binlog_send_info *info, IO_CACHE* log, LOG_INFO* linfo,
if (info->until_gtid_state &&
is_until_reached(info, &ev_offset, event_type, &info->errmsg,
- my_b_tell(log)))
+ (uint32)my_b_tell(log)))
{
if (info->errmsg)
{
@@ -2745,7 +2745,7 @@ static int send_one_binlog_file(binlog_send_info *info,
if (end_pos <= 1)
{
/** end of file or error */
- return end_pos;
+ return (int)end_pos;
}
/**
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index e4ab9c0b405..90bd191a79b 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -6142,7 +6142,7 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
{
TABLE *table= s->table;
double sel= table->cond_selectivity;
- double table_records= table->stat_records();
+ double table_records= (double)table->stat_records();
dbl_records= table_records * sel;
return dbl_records;
}
@@ -6168,7 +6168,7 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
if (s->table->quick_condition_rows != s->found_records)
records= s->table->quick_condition_rows;
- dbl_records= records;
+ dbl_records= (double)records;
return dbl_records;
}
@@ -6859,7 +6859,7 @@ static void choose_initial_table_order(JOIN *join)
if ((emb_subq= get_emb_subq(*tab)))
break;
}
- uint n_subquery_tabs= tabs_end - tab;
+ uint n_subquery_tabs= (uint)(tabs_end - tab);
if (!n_subquery_tabs)
DBUG_VOID_RETURN;
@@ -6887,7 +6887,7 @@ static void choose_initial_table_order(JOIN *join)
last_tab_for_subq < subq_tabs_end &&
get_emb_subq(*last_tab_for_subq) == cur_subq_nest;
last_tab_for_subq++) {}
- uint n_subquery_tables= last_tab_for_subq - subq_tab;
+ uint n_subquery_tables= (uint)(last_tab_for_subq - subq_tab);
/*
Walk the original array and find where this subquery would have been
@@ -6905,7 +6905,7 @@ static void choose_initial_table_order(JOIN *join)
if (!need_tables)
{
/* Move away the top-level tables that are after top_level_tab */
- uint top_tail_len= last_top_level_tab - top_level_tab - 1;
+ size_t top_tail_len= last_top_level_tab - top_level_tab - 1;
memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1,
sizeof(JOIN_TAB*)*top_tail_len);
last_top_level_tab += n_subquery_tables;
@@ -7651,7 +7651,7 @@ double JOIN::get_examined_rows()
JOIN_TAB *tab= first_breadth_first_tab();
JOIN_TAB *prev_tab= tab;
- examined_rows= tab->get_examined_rows();
+ examined_rows= (double)tab->get_examined_rows();
while ((tab= next_breadth_first_tab(first_breadth_first_tab(),
top_join_tab_count, tab)))
@@ -7949,7 +7949,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
}
if (keyparts > 1)
{
- ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse;
+ ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse);
prev_ref_keyuse= keyuse;
}
}
@@ -9311,8 +9311,8 @@ bool JOIN::get_best_combination()
j= j->bush_root_tab;
}
- top_join_tab_count= join_tab_ranges.head()->end -
- join_tab_ranges.head()->start;
+ top_join_tab_count= (uint)(join_tab_ranges.head()->end -
+ join_tab_ranges.head()->start);
update_depend_map(this);
DBUG_RETURN(0);
@@ -10888,7 +10888,7 @@ static uint make_join_orderinfo(JOIN *join)
if (join->need_tmp)
return join->table_count;
tab= join->get_sort_by_join_tab();
- return tab ? tab-join->join_tab : join->table_count;
+ return tab ? (uint)(tab-join->join_tab) : join->table_count;
}
/*
@@ -11905,8 +11905,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
str.append(" final_pushdown_cond");
print_where(tab->select_cond, str.c_ptr_safe(), QT_ORDINARY););
}
- uint n_top_tables= join->join_tab_ranges.head()->end -
- join->join_tab_ranges.head()->start;
+ uint n_top_tables= (uint)(join->join_tab_ranges.head()->end -
+ join->join_tab_ranges.head()->start);
join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */
@@ -12130,7 +12130,7 @@ ha_rows JOIN_TAB::get_examined_rows()
SQL_SELECT *sel= filesort? filesort->select : this->select;
if (sel && sel->quick && use_quick != 2)
- examined_rows= sel->quick->records;
+ examined_rows= (double)sel->quick->records;
else if (type == JT_NEXT || type == JT_ALL ||
type == JT_HASH || type ==JT_HASH_NEXT)
{
@@ -12140,19 +12140,19 @@ ha_rows JOIN_TAB::get_examined_rows()
@todo This estimate is wrong, a LIMIT query may examine much more rows
than the LIMIT itself.
*/
- examined_rows= limit;
+ examined_rows= (double)limit;
}
else
{
if (table->is_filled_at_execution())
- examined_rows= records;
+ examined_rows= (double)records;
else
{
/*
handler->info(HA_STATUS_VARIABLE) has been called in
make_join_statistics()
*/
- examined_rows= table->stat_records();
+ examined_rows= (double)table->stat_records();
}
}
}
@@ -13981,7 +13981,7 @@ static int compare_fields_by_table_order(Item *field1,
tab2= tab2->bush_root_tab;
}
- cmp= tab1 - tab2;
+ cmp= (int)(tab1 - tab2);
if (!cmp)
{
@@ -17145,7 +17145,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
share->default_values= table->record[1]+alloc_length;
}
copy_func[0]=0; // End marker
- param->func_count= copy_func - param->items_to_copy;
+ param->func_count= (uint)(copy_func - param->items_to_copy);
setup_tmp_table_column_bitmaps(table, bitmaps);
@@ -17829,7 +17829,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
Emulate behaviour by making column not-nullable when creating the
table.
*/
- uint cols= (*recinfo-start_recinfo);
+ uint cols= (uint)(*recinfo-start_recinfo);
start_recinfo[cols-1].null_bit= 0;
}
}
@@ -21047,7 +21047,7 @@ static int test_if_order_by_key(JOIN *join,
(1) this is an extended key
(2) we've reached its end
*/
- key_parts= (key_part - table->key_info[idx].key_part);
+ key_parts= (uint)(key_part - table->key_info[idx].key_part);
if (have_pk_suffix &&
reverse == 0 && // all were =const so far
key_parts == table->key_info[idx].ext_key_parts &&
@@ -24688,7 +24688,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
}
else
{
- double examined_rows= get_examined_rows();
+ double examined_rows= (double)get_examined_rows();
eta->rows_set= true;
eta->rows= (ha_rows) examined_rows;
@@ -26073,8 +26073,8 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
Start from quick select's rows and cost. These are always cheaper than
full index scan/cost.
*/
- double best_rows= table->quick_rows[keynr];
- double best_cost= table->quick_costs[keynr];
+ double best_rows= (double)table->quick_rows[keynr];
+ double best_cost= (double)table->quick_costs[keynr];
/*
Check if ref(const) access was possible on this index.
@@ -26108,7 +26108,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
if (ref_rows > 0)
{
- double tmp= ref_rows;
+ double tmp= (double)ref_rows;
/* Reuse the cost formula from best_access_path: */
set_if_smaller(tmp, (double) tab->join->thd->variables.max_seeks_for_key);
if (table->covering_keys.is_set(keynr))
@@ -26119,7 +26119,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
if (tmp < best_cost)
{
best_cost= tmp;
- best_rows= ref_rows;
+ best_rows= (double)ref_rows;
}
}
}
@@ -26232,7 +26232,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
if (join)
{
- uint tablenr= tab - join->join_tab;
+ uint tablenr= (uint)(tab - join->join_tab);
read_time= join->best_positions[tablenr].read_time;
for (uint i= tablenr+1; i < join->table_count; i++)
fanout*= join->best_positions[i].records_read; // fanout is always >= 1
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 4dcc519029c..116af627adc 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -3460,7 +3460,7 @@ static bool show_status_array(THD *thd, const char *wild,
prefix_end=strnmov(name_buffer, prefix, sizeof(name_buffer)-1);
if (*prefix)
*prefix_end++= '_';
- len=name_buffer + sizeof(name_buffer) - prefix_end;
+ len=(int)(name_buffer + sizeof(name_buffer) - prefix_end);
#ifdef WITH_WSREP
bool is_wsrep_var= FALSE;
@@ -3803,6 +3803,15 @@ bool uses_only_table_name_fields(Item *item, TABLE_LIST *table)
return 0;
}
}
+ else if (item->type() == Item::ROW_ITEM)
+ {
+ Item_row *item_row= static_cast<Item_row*>(item);
+ for (uint i= 0; i < item_row->cols(); i++)
+ {
+ if (!uses_only_table_name_fields(item_row->element_index(i), table))
+ return 0;
+ }
+ }
else if (item->type() == Item::FIELD_ITEM)
{
Item_field *item_field= (Item_field*)item;
@@ -3822,6 +3831,11 @@ bool uses_only_table_name_fields(Item *item, TABLE_LIST *table)
item_field->field_name.length)))
return 0;
}
+ else if (item->type() == Item::EXPR_CACHE_ITEM)
+ {
+ Item_cache_wrapper *tmp= static_cast<Item_cache_wrapper*>(item);
+ return uses_only_table_name_fields(tmp->get_orig_item(), table);
+ }
else if (item->type() == Item::REF_ITEM)
return uses_only_table_name_fields(item->real_item(), table);
@@ -5435,7 +5449,7 @@ static void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs,
*/
tmp_buff= strchr(column_type.c_ptr_safe(), ' ');
table->field[offset]->store(column_type.ptr(),
- (tmp_buff ? tmp_buff - column_type.ptr() :
+ (tmp_buff ? (uint)(tmp_buff - column_type.ptr()) :
column_type.length()), cs);
is_blob= (field->type() == MYSQL_TYPE_BLOB);
@@ -6405,7 +6419,7 @@ static int get_schema_views_record(THD *thd, TABLE_LIST *tables,
table->field[5]->store(STRING_WITH_LEN("NO"), cs);
}
- definer_len= (strxmov(definer, tables->definer.user.str, "@",
+ definer_len= (uint)(strxmov(definer, tables->definer.user.str, "@",
tables->definer.host.str, NullS) - definer);
table->field[6]->store(definer, definer_len, cs);
if (tables->view_suid)
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 224b7541dee..e1465d47f72 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -843,7 +843,7 @@ public:
else
{
stat_field->set_notnull();
- stat_field->store(table->collected_stats->cardinality);
+ stat_field->store(table->collected_stats->cardinality,true);
}
}
@@ -1054,7 +1054,7 @@ public:
switch (i) {
case COLUMN_STAT_MIN_VALUE:
if (table_field->type() == MYSQL_TYPE_BIT)
- stat_field->store(table_field->collected_stats->min_value->val_int());
+ stat_field->store(table_field->collected_stats->min_value->val_int(),true);
else
{
table_field->collected_stats->min_value->val_str(&val);
@@ -1063,7 +1063,7 @@ public:
break;
case COLUMN_STAT_MAX_VALUE:
if (table_field->type() == MYSQL_TYPE_BIT)
- stat_field->store(table_field->collected_stats->max_value->val_int());
+ stat_field->store(table_field->collected_stats->max_value->val_int(),true);
else
{
table_field->collected_stats->max_value->val_str(&val);
@@ -1630,7 +1630,7 @@ public:
of the parameters to be passed to the constructor of the Unique object.
*/
- Count_distinct_field(Field *field, uint max_heap_table_size)
+ Count_distinct_field(Field *field, size_t max_heap_table_size)
{
table_field= field;
tree_key_length= field->pack_length();
@@ -1728,7 +1728,7 @@ class Count_distinct_field_bit: public Count_distinct_field
{
public:
- Count_distinct_field_bit(Field *field, uint max_heap_table_size)
+ Count_distinct_field_bit(Field *field, size_t max_heap_table_size)
{
table_field= field;
tree_key_length= sizeof(ulonglong);
@@ -1824,7 +1824,7 @@ public:
if ((calc_state=
(Prefix_calc_state *) thd->alloc(sizeof(Prefix_calc_state)*key_parts)))
{
- uint keyno= key_info-table->key_info;
+ uint keyno= (uint)(key_info-table->key_info);
for (i= 0, state= calc_state; i < key_parts; i++, state++)
{
/*
@@ -2438,7 +2438,7 @@ int alloc_histograms_for_table_share(THD* thd, TABLE_SHARE *table_share,
inline
void Column_statistics_collected::init(THD *thd, Field *table_field)
{
- uint max_heap_table_size= thd->variables.max_heap_table_size;
+ size_t max_heap_table_size= (size_t)thd->variables.max_heap_table_size;
TABLE *table= table_field->table;
uint pk= table->s->primary_key;
@@ -3719,14 +3719,14 @@ double get_column_avg_frequency(Field * field)
*/
if (!table->s->field)
{
- res= table->stat_records();
+ res= (double)table->stat_records();
return res;
}
Column_statistics *col_stats= field->read_stats;
if (!col_stats)
- res= table->stat_records();
+ res= (double)table->stat_records();
else
res= col_stats->get_avg_frequency();
return res;
@@ -3765,7 +3765,7 @@ double get_column_range_cardinality(Field *field,
double res;
TABLE *table= field->table;
Column_statistics *col_stats= field->read_stats;
- double tab_records= table->stat_records();
+ double tab_records= (double)table->stat_records();
if (!col_stats)
return tab_records;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index cfc571b22ef..98453b15586 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -230,7 +230,7 @@ uint explain_filename(THD* thd,
{
db_name= table_name;
/* calculate the length */
- db_name_len= tmp_p - db_name;
+ db_name_len= (int)(tmp_p - db_name);
tmp_p++;
table_name= tmp_p;
}
@@ -252,7 +252,7 @@ uint explain_filename(THD* thd,
case 's':
if ((tmp_p[1] == 'P' || tmp_p[1] == 'p') && tmp_p[2] == '#')
{
- part_name_len= tmp_p - part_name - 1;
+ part_name_len= (int)(tmp_p - part_name - 1);
subpart_name= tmp_p + 3;
tmp_p+= 3;
}
@@ -284,7 +284,7 @@ uint explain_filename(THD* thd,
}
if (part_name)
{
- table_name_len= part_name - table_name - 3;
+ table_name_len= (int)(part_name - table_name - 3);
if (subpart_name)
subpart_name_len= strlen(subpart_name);
else
@@ -357,7 +357,7 @@ uint explain_filename(THD* thd,
to_p= strnmov(to_p, " */", end_p - to_p);
}
DBUG_PRINT("exit", ("to '%s'", to));
- DBUG_RETURN(to_p - to);
+ DBUG_RETURN((uint)(to_p - to));
}
@@ -553,7 +553,7 @@ uint build_table_filename(char *buff, size_t bufflen, const char *db,
pos= strxnmov(pos, end - pos, tbbuff, ext, NullS);
DBUG_PRINT("exit", ("buff: '%s'", buff));
- DBUG_RETURN(pos - buff);
+ DBUG_RETURN((uint)(pos - buff));
}
@@ -2134,7 +2134,7 @@ static uint32 comment_length(THD *thd, uint32 comment_pos,
for (query+= 3; query < query_end; query++)
{
if (query[-1] == '*' && query[0] == '/')
- return (char*) query - *comment_start + 1;
+ return (uint32)((char*) query - *comment_start + 1);
}
return 0;
}
@@ -2724,7 +2724,7 @@ bool quick_rm_table(THD *thd, handlerton *base, const char *db,
bool error= 0;
DBUG_ENTER("quick_rm_table");
- uint path_length= table_path ?
+ size_t path_length= table_path ?
(strxnmov(path, sizeof(path) - 1, table_path, reg_ext, NullS) - path) :
build_table_filename(path, sizeof(path)-1, db, table_name, reg_ext, flags);
if (mysql_file_delete(key_file_frm, path, MYF(0)))
@@ -6550,7 +6550,7 @@ static bool fill_alter_inplace_info(THD *thd,
table_key;
ha_alter_info->index_add_buffer
[ha_alter_info->index_add_count++]=
- new_key - ha_alter_info->key_info_buffer;
+ (uint)(new_key - ha_alter_info->key_info_buffer);
/* Mark all old fields which are used in newly created index. */
DBUG_PRINT("info", ("index changed: '%s'", table_key->name));
}
@@ -6574,7 +6574,7 @@ static bool fill_alter_inplace_info(THD *thd,
/* Key not found. Add the offset of the key to the add buffer. */
ha_alter_info->index_add_buffer
[ha_alter_info->index_add_count++]=
- new_key - ha_alter_info->key_info_buffer;
+ (uint)(new_key - ha_alter_info->key_info_buffer);
DBUG_PRINT("info", ("index added: '%s'", new_key->name));
}
else
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 1baa5c3d983..39693de80ae 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -172,7 +172,7 @@ TEST_join(JOIN *join)
in order not to garble the tabular output below.
*/
String ref_key_parts[MAX_TABLES];
- int tables_in_range= jt_range->end - jt_range->start;
+ int tables_in_range= (int)(jt_range->end - jt_range->start);
for (i= 0; i < tables_in_range; i++)
{
JOIN_TAB *tab= jt_range->start + i;
diff --git a/sql/sql_time.cc b/sql/sql_time.cc
index c8ec1fc7f6a..24aa7b1b8a6 100644
--- a/sql/sql_time.cc
+++ b/sql/sql_time.cc
@@ -274,7 +274,7 @@ to_ascii(CHARSET_INFO *cs,
*dst++= static_cast<char>(wc);
}
*dst= '\0';
- return dst - dst0;
+ return (uint)(dst - dst0);
}
diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc
index 291d55d61a2..bc452d59d24 100644
--- a/sql/sql_trigger.cc
+++ b/sql/sql_trigger.cc
@@ -453,6 +453,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create)
my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER, MYF(0));
DBUG_RETURN(TRUE);
}
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!create)
{
@@ -616,6 +617,10 @@ end:
my_ok(thd);
DBUG_RETURN(result);
+#ifdef WITH_WSREP
+ error:
+ DBUG_RETURN(true);
+#endif /* WITH_WSREP */
}
/**
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index c6959509a08..83c2e105f07 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -80,7 +80,7 @@ bool compare_record(const TABLE *table)
{
if (field->real_maybe_null())
{
- uchar null_byte_index= field->null_ptr - table->record[0];
+ uchar null_byte_index= (uchar)(field->null_ptr - table->record[0]);
if (((table->record[0][null_byte_index]) & field->null_bit) !=
((table->record[1][null_byte_index]) & field->null_bit))
diff --git a/sql/sql_view.cc b/sql/sql_view.cc
index 75d8841d25c..32fa481395e 100644
--- a/sql/sql_view.cc
+++ b/sql/sql_view.cc
@@ -430,6 +430,8 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views,
lex->link_first_table_back(view, link_to_local);
view->open_type= OT_BASE_ONLY;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+
if (check_dependencies_in_with_clauses(lex->with_clauses_list))
{
res= TRUE;
@@ -708,6 +710,10 @@ err:
lex->link_first_table_back(view, link_to_local);
unit->cleanup();
DBUG_RETURN(res || thd->is_error());
+#ifdef WITH_WSREP
+ error:
+ DBUG_RETURN(true);
+#endif /* WITH_WSREP */
}
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 3505d103710..005cbd5fd02 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -392,8 +392,8 @@ LEX::create_item_for_sp_var(LEX_CSTRING *name, sp_variable *spvar,
DBUG_ASSERT(spcont && spvar);
/* Position and length of the SP variable name in the query. */
- pos_in_q= start_in_q - sphead->m_tmp_query;
- len_in_q= end_in_q - start_in_q;
+ pos_in_q= (uint)(start_in_q - sphead->m_tmp_query);
+ len_in_q= (uint)(end_in_q - start_in_q);
item= new (thd->mem_root)
Item_splocal(thd, name, spvar->offset, spvar->sql_type(),
diff --git a/sql/strfunc.cc b/sql/strfunc.cc
index b09eadb098e..1c0b672fbcc 100644
--- a/sql/strfunc.cc
+++ b/sql/strfunc.cc
@@ -339,7 +339,7 @@ int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needl
if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
(uchar *) needle->str, needle->length))
{
- return (pos - haystack);
+ return (int)(pos - haystack);
}
return -1;
}
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index a399ccdd5e0..a2df923b2ad 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -1469,7 +1469,7 @@ static Sys_var_ulonglong Sys_max_heap_table_size(
"max_heap_table_size",
"Don't allow creation of heap tables bigger than this",
SESSION_VAR(max_heap_table_size), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(16384, (ulonglong)~(intptr)0), DEFAULT(16*1024*1024),
+ VALID_RANGE(16384, SIZE_T_MAX), DEFAULT(16*1024*1024),
BLOCK_SIZE(1024));
static ulong mdl_locks_cache_size;
diff --git a/sql/table.cc b/sql/table.cc
index 7131e9d4a7d..593c0eda3a8 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -2523,7 +2523,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
the correct null_bytes can now be set, since bitfields have been taken
into account
*/
- share->null_bytes= (null_pos - (uchar*) null_flags +
+ share->null_bytes= (uint)(null_pos - (uchar*) null_flags +
(null_bit_pos + 7) / 8);
share->last_null_bit_pos= null_bit_pos;
share->null_bytes_for_compare= null_bits_are_used ? share->null_bytes : 0;
@@ -6013,8 +6013,8 @@ Field_iterator_table_ref::get_or_create_column_ref(THD *thd, TABLE_LIST *parent_
/* The field belongs to a merge view or information schema table. */
Field_translator *translated_field= view_field_it.field_translator();
nj_col= new Natural_join_column(translated_field, table_ref);
- field_count= table_ref->field_translation_end -
- table_ref->field_translation;
+ field_count= (uint)(table_ref->field_translation_end -
+ table_ref->field_translation);
}
else
{
diff --git a/sql/unireg.cc b/sql/unireg.cc
index 5d5b82ba015..b49c3cfbb09 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -981,13 +981,18 @@ static bool make_empty_rec(THD *thd, uchar *buff, uint table_options,
null_count+= field->length & 7;
if (field->default_value && !field->default_value->flags &&
- !(field->flags & BLOB_FLAG))
+ (!(field->flags & BLOB_FLAG) ||
+ field->real_field_type() == MYSQL_TYPE_GEOMETRY))
{
Item *expr= field->default_value->expr;
+
int res= !expr->fixed && // may be already fixed if ALTER TABLE
expr->fix_fields(thd, &expr);
if (!res)
res= expr->save_in_field(regfield, 1);
+ if (!res && (field->flags & BLOB_FLAG))
+ regfield->reset();
+
/* If not ok or warning of level 'note' */
if (res != 0 && res != 3)
{
@@ -996,6 +1001,7 @@ static bool make_empty_rec(THD *thd, uchar *buff, uint table_options,
delete regfield; //To avoid memory leak
goto err;
}
+ delete regfield; //To avoid memory leak
}
else if (regfield->real_type() == MYSQL_TYPE_ENUM &&
(field->flags & NOT_NULL_FLAG))
diff --git a/storage/connect/csort.cpp b/storage/connect/csort.cpp
index 13f325d8f3f..670131b8fd2 100644
--- a/storage/connect/csort.cpp
+++ b/storage/connect/csort.cpp
@@ -351,7 +351,7 @@ void CSORT::Qstx(int *base, int *max)
zlo = zhi = cnm = 0; // Avoid warning message
- lo = max - base; // Number of elements as longs
+ lo = (int)(max - base); // Number of elements as longs
if (Dup)
cnm = Cmpnum(lo);
@@ -472,7 +472,7 @@ void CSORT::Qstx(int *base, int *max)
i = him + 1;
if (Pof)
- Pof[him - Pex] = Pof[mid - Pex] = i - j;
+ Pof[him - Pex] = Pof[mid - Pex] = (int)(i - j);
/*******************************************************************/
/* Look at sizes of the two partitions, do the smaller one first */
@@ -481,8 +481,8 @@ void CSORT::Qstx(int *base, int *max)
/* But only repeat (recursively or by branching) if the partition */
/* is of at least size THRESH. */
/*******************************************************************/
- lo = j - base;
- hi = max - i;
+ lo = (int)(j - base);
+ hi = (int)(max - i);
if (Dup) { // Update progress information
zlo = Cmpnum(lo);
@@ -726,7 +726,7 @@ void CSORT::Qstc(int *base, int *max)
zlo = zhi = cnm = 0; // Avoid warning message
- lo = max - base; // Number of elements as longs
+ lo = (int)(max - base); // Number of elements as longs
if (Dup)
cnm = Cmpnum(lo);
@@ -853,7 +853,7 @@ void CSORT::Qstc(int *base, int *max)
/* the offset array values indicating break point and block size. */
/*******************************************************************/
if (Pof)
- Pof[lt - Pex] = Pof[(jj - 1) - Pex] = jj - lt;
+ Pof[lt - Pex] = Pof[(jj - 1) - Pex] = (int)(jj - lt);
/*******************************************************************/
/* Look at sizes of the two partitions, do the smaller one first */
@@ -862,8 +862,8 @@ void CSORT::Qstc(int *base, int *max)
/* But only repeat (recursively or by branching) if the partition */
/* is of at least size THRESH. */
/*******************************************************************/
- lo = lt - base;
- hi = gt - Swix;
+ lo = (int)(lt - base);
+ hi = (int)(gt - Swix);
if (Dup) { // Update progress information
zlo = Cmpnum(lo);
diff --git a/storage/connect/domdoc.cpp b/storage/connect/domdoc.cpp
index e24e10835c1..ba8eb829abd 100644
--- a/storage/connect/domdoc.cpp
+++ b/storage/connect/domdoc.cpp
@@ -13,6 +13,7 @@
#elif defined(MSX4)
#import "msxml4.dll" //Causes error C2872: DOMNodeType: ambiguous symbol ??
#elif defined(MSX6)
+#pragma warning(suppress : 4192)
#import "msxml6.dll" //Causes error C2872: DOMNodeType: ambiguous symbol ??
#else // MSX4
#error MSX? is not defined
@@ -540,7 +541,7 @@ PXNODE DOMNODE::AddChildNode(PGLOBAL g, PCSZ name, PXNODE np)
// If name has the format m[n] only m is taken as node name
if ((p = strchr(name, '[')))
- pn = BufAlloc(g, name, p - name);
+ pn = BufAlloc(g, name, (int)(p - name));
else
pn = name;
diff --git a/storage/connect/filamap.cpp b/storage/connect/filamap.cpp
index 84dff422db7..67481136d81 100644
--- a/storage/connect/filamap.cpp
+++ b/storage/connect/filamap.cpp
@@ -247,7 +247,7 @@ int MAPFAM::GetRowID(void)
/***********************************************************************/
int MAPFAM::GetPos(void)
{
- return Fpos - Memory;
+ return (int)(Fpos - Memory);
} // end of GetPos
/***********************************************************************/
@@ -255,7 +255,7 @@ int MAPFAM::GetPos(void)
/***********************************************************************/
int MAPFAM::GetNextPos(void)
{
- return Mempos - Memory;
+ return (int)(Mempos - Memory);
} // end of GetNextPos
/***********************************************************************/
@@ -368,7 +368,7 @@ int MAPFAM::ReadBuffer(PGLOBAL g)
} // endif Mempos
// Set caller line buffer
- len = (Mempos - Fpos) - n;
+ len = (int)(Mempos - Fpos) - n;
// Don't rely on ENDING setting
if (len > 0 && *(Mempos - 2) == '\r')
@@ -428,7 +428,7 @@ int MAPFAM::DeleteRecords(PGLOBAL g, int irc)
/* not required here, just setting of future Spos and Tpos. */
/*******************************************************************/
Tpos = Spos = Fpos;
- } else if ((n = Fpos - Spos) > 0) {
+ } else if ((n = (int)(Fpos - Spos)) > 0) {
/*******************************************************************/
/* Non consecutive line to delete. Move intermediate lines. */
/*******************************************************************/
@@ -461,7 +461,7 @@ int MAPFAM::DeleteRecords(PGLOBAL g, int irc)
/*****************************************************************/
/* Remove extra records. */
/*****************************************************************/
- n = Tpos - Memory;
+ n = (int)(Tpos - Memory);
#if defined(__WIN__)
DWORD drc = SetFilePointer(fp->Handle, n, NULL, FILE_BEGIN);
@@ -627,7 +627,7 @@ int MBKFAM::ReadBuffer(PGLOBAL g)
break;
// Set caller line buffer
- len = (Mempos - Fpos) - Ending;
+ len = (int)(Mempos - Fpos) - Ending;
memcpy(Tdbp->GetLine(), Fpos, len);
Tdbp->GetLine()[len] = '\0';
return RC_OK;
diff --git a/storage/connect/filamgz.cpp b/storage/connect/filamgz.cpp
index df366ef15f9..3078935e8a4 100644
--- a/storage/connect/filamgz.cpp
+++ b/storage/connect/filamgz.cpp
@@ -537,7 +537,7 @@ int ZBKFAM::ReadBuffer(PGLOBAL g)
while (*NxtLine++ != '\n') ;
// Set caller line buffer
- n = NxtLine - CurLine - Ending;
+ n = (int)(NxtLine - CurLine - Ending);
memcpy(Tdbp->GetLine(), CurLine, n);
Tdbp->GetLine()[n] = '\0';
return RC_OK;
@@ -588,7 +588,7 @@ int ZBKFAM::ReadBuffer(PGLOBAL g)
for (NxtLine = CurLine; *NxtLine++ != '\n';) ;
// Set caller line buffer
- n = NxtLine - CurLine - Ending;
+ n = (int)(NxtLine - CurLine - Ending);
memcpy(Tdbp->GetLine(), CurLine, n);
Tdbp->GetLine()[n] = '\0';
Rbuf = (CurBlk == Block - 1) ? Last : Nrec;
@@ -1087,7 +1087,7 @@ bool ZLBFAM::SetPos(PGLOBAL g, int pos __attribute__((unused)))
/***********************************************************************/
int ZLBFAM::ReadBuffer(PGLOBAL g)
{
- int n;
+ size_t n;
void *rdbuf;
/*********************************************************************/
@@ -1299,7 +1299,7 @@ int ZLBFAM::WriteBuffer(PGLOBAL g)
else
NxtLine = CurLine + Lrecl;
- BlkLen = NxtLine - To_Buf;
+ BlkLen = (int)(NxtLine - To_Buf);
if (WriteCompressedBuffer(g)) {
Closing = TRUE; // To tell CloseDB about a Write error
diff --git a/storage/connect/filamtxt.cpp b/storage/connect/filamtxt.cpp
index c456ee9e9b7..12727b66335 100644
--- a/storage/connect/filamtxt.cpp
+++ b/storage/connect/filamtxt.cpp
@@ -1351,7 +1351,7 @@ int BLKFAM::GetPos(void)
/***********************************************************************/
int BLKFAM::GetNextPos(void)
{
- return Fpos + NxtLine - CurLine;
+ return (int)(Fpos + NxtLine - CurLine);
} // end of GetNextPos
/***********************************************************************/
@@ -1396,7 +1396,8 @@ int BLKFAM::SkipRecord(PGLOBAL, bool header)
/***********************************************************************/
int BLKFAM::ReadBuffer(PGLOBAL g)
{
- int i, n, rc = RC_OK;
+ int i, rc = RC_OK;
+ size_t n;
/*********************************************************************/
/* Sequential reading when Placed is not true. */
@@ -1497,7 +1498,7 @@ int BLKFAM::ReadBuffer(PGLOBAL g)
fin:
// Store the current record file position for Delete and Update
- Fpos = BlkPos[CurBlk] + CurLine - To_Buf;
+ Fpos = (int)(BlkPos[CurBlk] + CurLine - To_Buf);
return rc;
} // end of ReadBuffer
@@ -1524,7 +1525,7 @@ int BLKFAM::WriteBuffer(PGLOBAL g)
// Now start the writing process.
NxtLine = CurLine + strlen(CurLine);
- BlkLen = NxtLine - To_Buf;
+ BlkLen = (int)(NxtLine - To_Buf);
if (fwrite(To_Buf, 1, BlkLen, Stream) != (size_t)BlkLen) {
sprintf(g->Message, MSG(FWRITE_ERROR), strerror(errno));
diff --git a/storage/connect/filamzip.cpp b/storage/connect/filamzip.cpp
index dfd9343af76..f94362a3d87 100644
--- a/storage/connect/filamzip.cpp
+++ b/storage/connect/filamzip.cpp
@@ -748,7 +748,7 @@ UNZFAM::UNZFAM(PUNZFAM txfp) : MAPFAM(txfp)
/***********************************************************************/
int UNZFAM::GetFileLength(PGLOBAL g)
{
- int len = (zutp && zutp->entryopen) ? Top - Memory
+ int len = (zutp && zutp->entryopen) ? (int)(Top - Memory)
: TXTFAM::GetFileLength(g) * 3;
if (trace)
@@ -1088,7 +1088,7 @@ int ZIPFAM::WriteBuffer(PGLOBAL g)
// Prepare to write the new line
strcat(strcpy(To_Buf, Tdbp->GetLine()), (Bin) ? CrLf : "\n");
- len = strchr(To_Buf, '\n') - To_Buf + 1;
+ len = (int)(strchr(To_Buf, '\n') - To_Buf + 1);
return zutp->writeEntry(g, To_Buf, len);
} // end of WriteBuffer
diff --git a/storage/connect/fmdlex.c b/storage/connect/fmdlex.c
index ef4f7bfc65a..4bf075acf42 100644
--- a/storage/connect/fmdlex.c
+++ b/storage/connect/fmdlex.c
@@ -283,7 +283,7 @@ static void yy_fatal_error YY_PROTO(( const char msg[] ));
*/
#define YY_DO_BEFORE_ACTION \
yytext_ptr = yy_bp; \
- yyleng = yy_cp - yy_bp; \
+ yyleng = (int)(yy_cp - yy_bp); \
yy_hold_char = *yy_cp; \
*yy_cp = '\0'; \
yy_c_buf_p = yy_cp;
@@ -695,7 +695,7 @@ case YY_STATE_EOF(dqt):
case YY_END_OF_BUFFER:
{
/* Amount of text matched not including the EOB char. */
- int yy_amount_of_matched_text = yy_cp - yytext_ptr - 1;
+ int yy_amount_of_matched_text = (int)(yy_cp - yytext_ptr - 1);
/* Undo the effects of YY_DO_BEFORE_ACTION. */
*yy_cp = yy_hold_char;
@@ -862,7 +862,7 @@ static int yy_get_next_buffer()
/* Try to read more data. */
/* First move last chars to start of buffer. */
- number_to_move = yy_c_buf_p - yytext_ptr;
+ number_to_move = (int)(yy_c_buf_p - yytext_ptr);
for ( i = 0; i < number_to_move; ++i )
*(dest++) = *(source++);
@@ -888,7 +888,7 @@ static int yy_get_next_buffer()
/* just a shorter name for the current buffer */
YY_BUFFER_STATE b = yy_current_buffer;
- int yy_c_buf_p_offset = yy_c_buf_p - b->yy_ch_buf;
+ int yy_c_buf_p_offset = (int)(yy_c_buf_p - b->yy_ch_buf);
b->yy_buf_size *= 2;
b->yy_ch_buf = (char *)
diff --git a/storage/connect/macutil.cpp b/storage/connect/macutil.cpp
index b9600bdac2e..f95f3adcc6e 100644
--- a/storage/connect/macutil.cpp
+++ b/storage/connect/macutil.cpp
@@ -230,13 +230,13 @@ bool MACINFO::GetOneInfo(PGLOBAL g, int flag, void *v, int lv)
case 11: // Description
if ((p = strstr(Curp->Description, " - Packet Scheduler Miniport"))) {
strncpy(buf, Curp->Description, p - Curp->Description);
- i = p - Curp->Description;
+ i = (int)(p - Curp->Description);
strncpy(buf, Curp->Description, i);
buf[i] = 0;
p = buf;
} else if ((p = strstr(Curp->Description,
" - Miniport d'ordonnancement de paquets"))) {
- i = p - Curp->Description;
+ i = (int)(p - Curp->Description);
strncpy(buf, Curp->Description, i);
buf[i] = 0;
p = buf;
diff --git a/storage/connect/myconn.cpp b/storage/connect/myconn.cpp
index 08bb24e14df..28e6f076e77 100644
--- a/storage/connect/myconn.cpp
+++ b/storage/connect/myconn.cpp
@@ -248,7 +248,7 @@ PQRYRES MyColumns(PGLOBAL g, THD *thd, const char *host, const char *db,
while (true) {
p2 = strchr(p1, '\'');
- len = MY_MAX(len, p2 - p1);
+ len = MY_MAX(len, (int)(p2 - p1));
if (*++p2 != ',') break;
p1 = p2 + 2;
} // endwhile
diff --git a/storage/connect/odbconn.cpp b/storage/connect/odbconn.cpp
index 70a0a6a1450..3b0cb562672 100644
--- a/storage/connect/odbconn.cpp
+++ b/storage/connect/odbconn.cpp
@@ -2427,7 +2427,7 @@ int ODBConn::GetCatInfo(CATPARM *cap)
else if (vlen[n] == SQL_NULL_DATA)
pval[n]->SetNull(true);
else if (crp->Type == TYPE_STRING/* && vlen[n] != SQL_NULL_DATA*/)
- pval[n]->SetValue_char(pbuf[n], vlen[n]);
+ pval[n]->SetValue_char(pbuf[n], (int)vlen[n]);
else
pval[n]->SetNull(false);
diff --git a/storage/connect/plgdbutl.cpp b/storage/connect/plgdbutl.cpp
index 25da3162516..e46d260203e 100644
--- a/storage/connect/plgdbutl.cpp
+++ b/storage/connect/plgdbutl.cpp
@@ -540,7 +540,7 @@ bool EvalLikePattern(LPCSTR sp, LPCSTR tp)
{
LPSTR p;
char c;
- int n;
+ ssize_t n;
bool b, t = false;
if (trace)
diff --git a/storage/connect/tabfmt.cpp b/storage/connect/tabfmt.cpp
index 516601a5eb4..f616f24d16b 100644
--- a/storage/connect/tabfmt.cpp
+++ b/storage/connect/tabfmt.cpp
@@ -934,7 +934,7 @@ int TDBCSV::ReadBuffer(PGLOBAL g)
if (p) {
//len = p++ - p2;
- len = p - p2 - 1;;
+ len = (int)(p - p2 - 1);
// if (Sep != ' ')
// for (; *p == ' '; p++) ; // Skip blanks
@@ -978,7 +978,7 @@ int TDBCSV::ReadBuffer(PGLOBAL g)
return RC_NF;
} else if ((p = strchr(p2, Sep)))
- len = p - p2;
+ len = (int)(p - p2);
else if (i == Fields - 1)
len = strlen(p2);
else if (Accept && Maxerr == 0) {
@@ -996,7 +996,7 @@ int TDBCSV::ReadBuffer(PGLOBAL g)
} else
len = 0;
- Offset[i] = p2 - To_Line;
+ Offset[i] = (int)(p2 - To_Line);
if (Mode != MODE_UPDATE)
Fldlen[i] = len;
diff --git a/storage/connect/tabmac.cpp b/storage/connect/tabmac.cpp
index a28b5d7108c..8260ab65391 100644
--- a/storage/connect/tabmac.cpp
+++ b/storage/connect/tabmac.cpp
@@ -367,13 +367,13 @@ void MACCOL::ReadColumn(PGLOBAL g)
case 11: // Description
if ((p = strstr(adp->Description, " - Packet Scheduler Miniport"))) {
strncpy(buf, adp->Description, p - adp->Description);
- i = p - adp->Description;
+ i = (int)(p - adp->Description);
strncpy(buf, adp->Description, i);
buf[i] = 0;
p = buf;
} else if ((p = strstr(adp->Description,
" - Miniport d'ordonnancement de paquets"))) {
- i = p - adp->Description;
+ i = (int)(p - adp->Description);
strncpy(buf, adp->Description, i);
buf[i] = 0;
p = buf;
diff --git a/storage/connect/value.cpp b/storage/connect/value.cpp
index a80da808548..eae72984ca6 100644
--- a/storage/connect/value.cpp
+++ b/storage/connect/value.cpp
@@ -1738,7 +1738,7 @@ DECVAL::DECVAL(PSZ s) : TYPVAL<PSZ>(s)
if (s) {
char *p = strchr(Strp, '.');
- Prec = (p) ? Len - (p - Strp) : 0;
+ Prec = (p) ? (int)(Len - (p - Strp)) : 0;
} // endif s
Type = TYPE_DECIM;
@@ -2647,7 +2647,7 @@ bool DTVAL::SetValue_char(const char *p, int n)
// Trim trailing blanks
for (p2 = p + n -1; p < p2 && *p2 == ' '; p2--);
- if ((rc = (n = p2 - p + 1) > Len))
+ if ((rc = (n = (int)(p2 - p + 1)) > Len))
n = Len;
memcpy(Sdate, p, n);
diff --git a/storage/connect/xobject.cpp b/storage/connect/xobject.cpp
index 85af3779701..c595ce5d6c4 100644
--- a/storage/connect/xobject.cpp
+++ b/storage/connect/xobject.cpp
@@ -204,7 +204,7 @@ STRING::STRING(PGLOBAL g, uint n, PCSZ str)
*Strp = 0;
Next = GetNext();
- Size = Next - Strp;
+ Size = (int)(Next - Strp);
Trc = false;
} else {
// This should normally never happen
@@ -239,7 +239,7 @@ char *STRING::Realloc(uint len)
p = Strp;
Next = GetNext();
- Size = Next - p;
+ Size = (int)(Next - p);
return p;
} // end of Realloc
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 3e9f26ad125..e638af8a217 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -77,22 +77,85 @@ btr_corruption_report(
/*
Latching strategy of the InnoDB B-tree
--------------------------------------
-A tree latch protects all non-leaf nodes of the tree. Each node of a tree
-also has a latch of its own.
-
-A B-tree operation normally first acquires an S-latch on the tree. It
-searches down the tree and releases the tree latch when it has the
-leaf node latch. To save CPU time we do not acquire any latch on
-non-leaf nodes of the tree during a search, those pages are only bufferfixed.
-
-If an operation needs to restructure the tree, it acquires an X-latch on
-the tree before searching to a leaf node. If it needs, for example, to
-split a leaf,
-(1) InnoDB decides the split point in the leaf,
-(2) allocates a new page,
-(3) inserts the appropriate node pointer to the first non-leaf level,
-(4) releases the tree X-latch,
-(5) and then moves records from the leaf to the new allocated page.
+
+Node pointer page latches acquisition is protected by index->lock latch.
+
+Before MariaDB 10.2.2, all node pointer pages were protected by index->lock
+either in S (shared) or X (exclusive) mode and block->lock was not acquired on
+node pointer pages.
+
+After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect
+node pointer pages and obtaiment of node pointer page latches is protected by
+index->lock.
+
+(0) Definition: B-tree level.
+
+(0.1) The leaf pages of the B-tree are at level 0.
+
+(0.2) The parent of a page at level L has level L+1. (The level of the
+root page is equal to the tree height.)
+
+(0.3) The B-tree lock (index->lock) is the parent of the root page and
+has a level = tree height + 1.
+
+Index->lock has 3 possible locking modes:
+
+(1) S-latch:
+
+(1.1) All latches for pages must be obtained in descending order of tree level.
+
+(1.2) Before obtaining the first node pointer page latch at a given B-tree
+level, parent latch must be held (at level +1 ).
+
+(1.3) If a node pointer page is already latched at the same level
+we can only obtain latch to its right sibling page latch at the same level.
+
+(1.4) Release of the node pointer page latches must be done in
+child-to-parent order. (Prevents deadlocks when obtained index->lock
+in SX mode).
+
+(1.4.1) Level L node pointer page latch can be released only when
+no latches at children level i.e. level < L are hold.
+
+(1.4.2) All latches from node pointer pages must be released so
+that no latches are obtained between.
+
+(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer
+latch obtained.
+
+(2) SX-latch:
+
+In this case rules (1.2) and (1.3) from S-latch case are relaxed and
+merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition
+can be skipped at some tree levels and latches can be obtained in
+a less restricted order.
+
+(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending
+order of tree level.
+
+(2.2) When a node pointer latch at level L is obtained,
+the left sibling page latch in the same level or some ancestor
+page latch (at level > L) must be hold.
+
+(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can
+be any node pointer page.
+
+(3) X-latch:
+
+Node pointer latches can be obtained in any order.
+
+NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages:
+
+index->lock S-latch is needed in read for the node pointer traversal. When the leaf
+level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all
+node pointer latches). Left to right index travelsal in leaf page level can be safely done
+by obtaining right sibling leaf page latch and then releasing the old page latch.
+
+Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock
+S-latch.
+
+B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page
+allocations are protected by index->lock X-latch.
Node pointers
-------------
@@ -1041,7 +1104,8 @@ btr_free_root(
{
fseg_header_t* header;
- ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
ut_ad(mtr->is_named_space(block->page.id.space()));
btr_search_drop_page_hash_index(block);
diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc
index 139e3116d06..5fae57fe2d4 100644
--- a/storage/innobase/btr/btr0bulk.cc
+++ b/storage/innobase/btr/btr0bulk.cc
@@ -564,8 +564,7 @@ PageBulk::storeExt(
page_cur->block = m_block;
dberr_t err = btr_store_big_rec_extern_fields(
- &btr_pcur, NULL, offsets, big_rec, m_mtr,
- BTR_STORE_INSERT_BULK);
+ &btr_pcur, offsets, big_rec, m_mtr, BTR_STORE_INSERT_BULK);
ut_ad(page_offset(m_cur_rec) == page_offset(page_cur->rec));
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index bb13b6a71d3..4bb87cfaafb 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -6647,7 +6647,6 @@ btr_store_big_rec_extern_fields(
btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if
btr_mtr is restarted, then this can
be repositioned. */
- const upd_t* upd, /*!< in: update vector */
ulint* offsets, /*!< in/out: rec_get_offsets() on
pcur. the "external storage" flags
in offsets will correctly correspond
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index 335b4fc220d..70444ca1830 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -564,7 +564,7 @@ btr_defragment_merge_pages(
page_get_infimum_rec(from_page));
node_ptr = dict_index_build_node_ptr(
index, rec, page_get_page_no(from_page),
- heap, level + 1);
+ heap, level);
btr_insert_on_non_leaf_level(0, index, level+1,
node_ptr, mtr);
}
@@ -797,11 +797,16 @@ DECLARE_THREAD(btr_defragment_thread)(void*)
now = ut_timer_now();
mtr_start(&mtr);
- btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
cursor = btr_pcur_get_btr_cur(pcur);
index = btr_cur_get_index(cursor);
- first_block = btr_cur_get_block(cursor);
mtr.set_named_space(index->space);
+ /* To follow the latching order defined in WL#6326, acquire index->lock X-latch.
+ This entitles us to acquire page latches in any order for the index. */
+ mtr_x_lock(&index->lock, &mtr);
+ /* This will acquire index->lock SX-latch, which per WL#6363 is allowed
+ when we are already holding the X-latch. */
+ btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
+ first_block = btr_cur_get_block(cursor);
last_block = btr_defragment_n_pages(first_block, index,
srv_defragment_n_pages,
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 8575de8bfa3..e7a2a844330 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -824,7 +824,6 @@ buf_flush_update_zip_checksum(
static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
mach_write_to_8(page + FIL_PAGE_LSN, lsn);
- memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
}
@@ -1077,7 +1076,6 @@ buf_flush_write_block_low(
bpage->newest_modification);
ut_a(page_zip_verify_checksum(frame, bpage->size.physical()));
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
break;
case BUF_BLOCK_FILE_PAGE:
frame = bpage->zip.data;
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index d533cfad47a..b2ba3d2fae3 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -912,8 +912,7 @@ dict_index_contains_col_or_prefix(
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
if (dict_index_is_clust(index)) {
-
- return(TRUE);
+ return(!is_virtual);
}
if (is_virtual) {
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 230706976dc..bcaa5ae4a15 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -439,6 +439,9 @@ dict_mem_table_col_rename_low(
ut_ad(from_len <= NAME_LEN);
ut_ad(to_len <= NAME_LEN);
+ char from[NAME_LEN];
+ strncpy(from, s, NAME_LEN);
+
if (from_len == to_len) {
/* The easy case: simply replace the column name in
table->col_names. */
@@ -523,14 +526,54 @@ dict_mem_table_col_rename_low(
foreign = *it;
- for (unsigned f = 0; f < foreign->n_fields; f++) {
- /* These can point straight to
- table->col_names, because the foreign key
- constraints will be freed at the same time
- when the table object is freed. */
- foreign->foreign_col_names[f]
- = dict_index_get_nth_field(
- foreign->foreign_index, f)->name;
+ if (foreign->foreign_index == NULL) {
+ /* We may go here when we set foreign_key_checks to 0,
+ and then try to rename a column and modify the
+ corresponding foreign key constraint. The index
+ would have been dropped, we have to find an equivalent
+ one */
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ if (strcmp(foreign->foreign_col_names[f], from)
+ == 0) {
+
+ char** rc = const_cast<char**>(
+ foreign->foreign_col_names
+ + f);
+
+ if (to_len <= strlen(*rc)) {
+ memcpy(*rc, to, to_len + 1);
+ } else {
+ *rc = static_cast<char*>(
+ mem_heap_dup(
+ foreign->heap,
+ to,
+ to_len + 1));
+ }
+ }
+ }
+
+ dict_index_t* new_index = dict_foreign_find_index(
+ foreign->foreign_table, NULL,
+ foreign->foreign_col_names,
+ foreign->n_fields, NULL, true, false,
+ NULL, NULL, NULL);
+ /* There must be an equivalent index in this case. */
+ ut_ad(new_index != NULL);
+
+ foreign->foreign_index = new_index;
+
+ } else {
+
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ /* These can point straight to
+ table->col_names, because the foreign key
+ constraints will be freed at the same time
+ when the table object is freed. */
+ foreign->foreign_col_names[f]
+ = dict_index_get_nth_field(
+ foreign->foreign_index,
+ f)->name;
+ }
}
}
@@ -540,6 +583,8 @@ dict_mem_table_col_rename_low(
foreign = *it;
+ ut_ad(foreign->referenced_index != NULL);
+
for (unsigned f = 0; f < foreign->n_fields; f++) {
/* foreign->referenced_col_names[] need to be
copies, because the constraint may become
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2897d5f9be8..73132754fdf 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1061,139 +1061,28 @@ fil_space_extend_must_retry(
const page_size_t pageSize(space->flags);
const ulint page_size = pageSize.physical();
-#ifdef _WIN32
- os_offset_t new_file_size =
- std::max(
- os_offset_t(size - file_start_page_no) * page_size,
- os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE));
-
- /* os_file_change_size_win32() handles both compressed(sparse)
- and normal files correctly.
- It allocates physical storage for normal files and "virtual"
- storage for sparse ones.*/
- *success = os_file_change_size_win32(node->name,
- node->handle, new_file_size);
+ /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
+ fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.*/
+ os_offset_t new_size = std::max(
+ os_offset_t(size - file_start_page_no) * page_size,
+ os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE));
+
+ *success = os_file_set_size(node->name, node->handle, new_size,
+ FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
+ os_has_said_disk_full = *success;
if (*success) {
last_page_no = size;
} else {
- ib::error() << "extending file '" << node->name
- << " to size " << new_file_size << " failed";
- }
-#else
- /* We will logically extend the file with ftruncate() if
- page_compression is enabled, because the file is expected to
- be sparse in that case. Make sure that ftruncate() can deal
- with large files. */
- const bool is_sparse = sizeof(off_t) >= 8
- && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
-
- if (is_sparse) {
- /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
- fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
- Do not shrink short ROW_FORMAT=COMPRESSED files. */
- off_t s = std::max(off_t(size - file_start_page_no)
- * off_t(page_size),
- off_t(FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
- *success = !ftruncate(node->handle, s);
- if (!*success) {
- ib::error() << "ftruncate of file '" << node->name
- << "' from "
- << os_offset_t(last_page_no
- - file_start_page_no)
- * page_size << " to " << os_offset_t(s)
- << " bytes failed with " << errno;
- } else {
- last_page_no = size;
- }
- } else {
- const os_offset_t start_offset
- = os_offset_t(last_page_no - file_start_page_no)
- * page_size;
- const ulint n_pages = size - last_page_no;
- const os_offset_t len = os_offset_t(n_pages) * page_size;
-# ifdef HAVE_POSIX_FALLOCATE
- int err;
- do {
- err = posix_fallocate(node->handle, start_offset, len);
- } while (err == EINTR
- && srv_shutdown_state == SRV_SHUTDOWN_NONE);
-
- if (err != EINVAL) {
-
- *success = !err;
- if (!*success) {
- ib::error() << "extending file '" << node->name
- << "' from "
- << start_offset
- << " to " << len + start_offset
- << " bytes failed with: " << err;
- }
- } else
-# endif /* HAVE_POSIX_FALLOCATE */
- {
- /* Extend at most 1 megabyte pages at a time */
- ulint n_bytes = std::min(ulint(1) << 20, n_pages)
- * page_size;
- byte* buf2 = static_cast<byte*>(
- calloc(1, n_bytes + page_size));
- *success = buf2 != NULL;
- if (!buf2) {
- ib::error() << "Cannot allocate "
- << n_bytes + page_size
- << " bytes to extend file";
- }
- byte* const buf = static_cast<byte*>(
- ut_align(buf2, page_size));
- IORequest request(IORequest::WRITE);
-
-
- os_offset_t offset = start_offset;
- const os_offset_t end = start_offset + len;
- const bool read_only_mode = space->purpose
- == FIL_TYPE_TEMPORARY && srv_read_only_mode;
-
- while (*success && offset < end) {
- dberr_t err = os_aio(
- request, OS_AIO_SYNC, node->name,
- node->handle, buf, offset, n_bytes,
- read_only_mode, NULL, NULL);
-
- if (err != DB_SUCCESS) {
- *success = false;
- ib::error() << "writing zeroes to file '"
- << node->name << "' from "
- << offset << " to " << offset + n_bytes
- << " bytes failed with: "
- << ut_strerr(err);
- break;
- }
-
- offset += n_bytes;
-
- n_bytes = std::min(n_bytes,
- static_cast<ulint>(end - offset));
- }
-
- free(buf2);
- }
+ /* Let us measure the size of the file
+ to determine how much we were able to
+ extend it */
+ os_offset_t fsize = os_file_get_size(node->handle);
+ ut_a(fsize != os_offset_t(-1));
- os_has_said_disk_full = *success;
- if (*success) {
- last_page_no = size;
- } else {
- /* Let us measure the size of the file
- to determine how much we were able to
- extend it */
- os_offset_t fsize = os_file_get_size(node->handle);
- ut_a(fsize != os_offset_t(-1));
-
- last_page_no = ulint(fsize / page_size)
- + file_start_page_no;
- }
+ last_page_no = ulint(fsize / page_size)
+ + file_start_page_no;
}
-#endif
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
@@ -1206,11 +1095,7 @@ fil_space_extend_must_retry(
const ulint pages_in_MiB = node->size
& ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1);
- fil_node_complete_io(node,
-#ifndef _WIN32
- !is_sparse ? IORequestWrite :
-#endif /* _WIN32 */
- IORequestRead);
+ fil_node_complete_io(node,IORequestRead);
/* Keep the last data file size info up to date, rounded to
full megabytes */
@@ -1333,6 +1218,7 @@ fil_mutex_enter_and_prepare_for_io(
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
count++;
+ mutex_enter(&fil_system->mutex);
continue;
}
}
@@ -3237,10 +3123,11 @@ fil_truncate_tablespace(
bool success = os_file_truncate(node->name, node->handle, 0);
if (success) {
- os_offset_t size = size_in_pages * UNIV_PAGE_SIZE;
+ os_offset_t size = os_offset_t(size_in_pages) * UNIV_PAGE_SIZE;
success = os_file_set_size(
- node->name, node->handle, size, srv_read_only_mode);
+ node->name, node->handle, size,
+ FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
if (success) {
space->stop_new_ops = false;
@@ -3835,72 +3722,17 @@ fil_ibd_create(
return(DB_ERROR);
}
- bool punch_hole = false;
+ const bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
#ifdef _WIN32
-
- if (FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)) {
- punch_hole = os_file_set_sparse_win32(file);
- }
-
- success = os_file_change_size_win32(path, file, size * UNIV_PAGE_SIZE);
-
-#else
-
- success= false;
-#ifdef HAVE_POSIX_FALLOCATE
- /*
- Extend the file using posix_fallocate(). This is required by
- FusionIO HW/Firmware but should also be the prefered way to extend
- a file.
- */
- int ret;
- do {
- ret = posix_fallocate(file, 0, size * UNIV_PAGE_SIZE);
- } while (ret == EINTR
- && srv_shutdown_state == SRV_SHUTDOWN_NONE);
-
- if (ret == 0) {
- success = true;
- } else if (ret != EINVAL) {
- ib::error() <<
- "posix_fallocate(): Failed to preallocate"
- " data for file " << path
- << ", desired size "
- << size * UNIV_PAGE_SIZE
- << " Operating system error number " << ret
- << ". Check"
- " that the disk is not full or a disk quota"
- " exceeded. Some operating system error"
- " numbers are described at " REFMAN
- "operating-system-error-codes.html";
- }
-#endif /* HAVE_POSIX_FALLOCATE */
-
- if (!success) {
- success = os_file_set_size(
- path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode);
- }
-
- /* Note: We are actually punching a hole, previous contents will
- be lost after this call, if it succeeds. In this case the file
- should be full of NULs. */
-
- punch_hole = os_is_sparse_file_supported(file);
-
- if (punch_hole) {
-
- dberr_t punch_err;
-
- punch_err = os_file_punch_hole(file, 0, size * UNIV_PAGE_SIZE);
-
- if (punch_err != DB_SUCCESS) {
- punch_hole = false;
- }
+ if (is_compressed) {
+ os_file_set_sparse_win32(file);
}
#endif
- ulint block_size = os_file_get_block_size(file, path);
+ success = os_file_set_size(
+ path, file,
+ os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT, is_compressed);
if (!success) {
os_file_close(file);
@@ -3908,6 +3740,10 @@ fil_ibd_create(
return(DB_OUT_OF_FILE_SPACE);
}
+ bool punch_hole = os_is_sparse_file_supported(file);
+
+ ulint block_size = os_file_get_block_size(file, path);
+
/* We have to write the space id to the file immediately and flush the
file to disk. This is because in crash recovery we must be aware what
tablespaces exist and what are their space id's, so that we can apply
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
index cc156a5353a..c459c8296e0 100644
--- a/storage/innobase/fsp/fsp0sysspace.cc
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -410,8 +410,7 @@ SysTablespace::set_size(
bool success = os_file_set_size(
file.m_filepath, file.m_handle,
- static_cast<os_offset_t>(file.m_size << UNIV_PAGE_SIZE_SHIFT),
- m_ignore_read_only ? false : srv_read_only_mode);
+ static_cast<os_offset_t>(file.m_size) << UNIV_PAGE_SIZE_SHIFT);
if (success) {
ib::info() << "File '" << file.filepath() << "' size is now "
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index 594f337c978..858d84f1a5e 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -149,13 +149,6 @@ struct fts_query_t {
bool multi_exist; /*!< multiple FTS_EXIST oper */
st_mysql_ftparser* parser; /*!< fts plugin parser */
-
- /** limit value for the fts query */
- ulonglong limit;
-
- /** number of docs fetched by query. This is to restrict the
- result with limit value */
- ulonglong n_docs;
};
/** For phrase matching, first we collect the documents and the positions
@@ -3228,11 +3221,6 @@ fts_query_filter_doc_ids(
ulint decoded = 0;
ib_rbt_t* doc_freqs = word_freq->doc_freqs;
- if (query->limit != ULONG_UNDEFINED
- && query->n_docs >= query->limit) {
- return(DB_SUCCESS);
- }
-
/* Decode the ilist and add the doc ids to the query doc_id set. */
while (decoded < len) {
ulint freq = 0;
@@ -3320,17 +3308,11 @@ fts_query_filter_doc_ids(
/* Add the word to the document's matched RB tree. */
fts_query_add_word_to_document(query, doc_id, word);
}
-
- if (query->limit != ULONG_UNDEFINED
- && query->limit <= ++query->n_docs) {
- goto func_exit;
- }
}
/* Some sanity checks. */
ut_a(doc_id == node->last_doc_id);
-func_exit:
if (query->total_size > fts_result_cache_limit) {
return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
} else {
@@ -3941,7 +3923,6 @@ fts_query_can_optimize(
@param[in] query_str FTS query
@param[in] query_len FTS query string len in bytes
@param[in,out] result result doc ids
-@param[in] limit limit value
@return DB_SUCCESS if successful otherwise error code */
dberr_t
fts_query(
@@ -3950,8 +3931,7 @@ fts_query(
uint flags,
const byte* query_str,
ulint query_len,
- fts_result_t** result,
- ulonglong limit)
+ fts_result_t** result)
{
fts_query_t query;
dberr_t error = DB_SUCCESS;
@@ -4013,10 +3993,6 @@ fts_query(
query.total_docs = dict_table_get_n_rows(index->table);
- query.limit = limit;
-
- query.n_docs = 0;
-
query.fts_common_table.suffix = "DELETED";
/* Read the deleted doc_ids, we need these for filtering. */
@@ -4078,19 +4054,6 @@ fts_query(
fts_result_cache_limit = 2048;
);
- /* Optimisation is allowed for limit value
- when
- i) No ranking involved
- ii) Only FTS Union operations involved. */
- if (query.limit != ULONG_UNDEFINED
- && !fts_ast_node_check_union(ast)) {
- query.limit = ULONG_UNDEFINED;
- }
-
- DBUG_EXECUTE_IF("fts_union_limit_off",
- query.limit = ULONG_UNDEFINED;
- );
-
/* Traverse the Abstract Syntax Tree (AST) and execute
the query. */
query.error = fts_ast_visit(
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 383f84d1dd1..1a370d7979d 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -6425,16 +6425,27 @@ ha_innobase::open(const char* name, int, uint)
ib_table = open_dict_table(name, norm_name, is_part, ignore_err);
- uint n_fields = mysql_fields(table);
+ if (NULL == ib_table) {
- if (ib_table != NULL
- && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
- && n_fields != dict_table_get_n_tot_u_cols(ib_table))
- || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
- && (n_fields != dict_table_get_n_tot_u_cols(ib_table) - 1)))) {
+ if (is_part) {
+ sql_print_error("Failed to open table %s.\n",
+ norm_name);
+ }
+no_such_table:
+ free_share(m_share);
+ set_my_errno(ENOENT);
+ DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+ }
+
+ uint n_fields = mysql_fields(table);
+ uint n_cols = dict_table_get_n_user_cols(ib_table)
+ + dict_table_get_n_v_cols(ib_table)
+ - !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID);
+
+ if (n_cols != n_fields) {
ib::warn() << "Table " << norm_name << " contains "
- << dict_table_get_n_tot_u_cols(ib_table) << " user"
+ << n_cols << " user"
" defined columns in InnoDB, but " << n_fields
<< " columns in MariaDB. Please check"
" INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN
@@ -6446,21 +6457,7 @@ ha_innobase::open(const char* name, int, uint)
ib_table->file_unreadable = true;
ib_table->corrupted = true;
dict_table_close(ib_table, FALSE, FALSE);
- ib_table = NULL;
- is_part = NULL;
- }
-
- if (NULL == ib_table) {
-
- if (is_part) {
- sql_print_error("Failed to open table %s.\n",
- norm_name);
- }
-
- free_share(m_share);
- set_my_errno(ENOENT);
-
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+ goto no_such_table;
}
innobase_copy_frm_flags_from_table_share(ib_table, table->s);
@@ -8210,13 +8207,16 @@ no_icp:
} else {
ibool contain;
- if (innobase_is_v_fld(table->field[i])) {
- contain = dict_index_contains_col_or_prefix(
- index, num_v, true);
- } else {
+ if (!innobase_is_v_fld(table->field[i])) {
contain = dict_index_contains_col_or_prefix(
index, i - num_v,
false);
+ } else if (dict_index_is_clust(index)) {
+ num_v++;
+ continue;
+ } else {
+ contain = dict_index_contains_col_or_prefix(
+ index, num_v, true);
}
field = build_template_needs_field(
@@ -10540,10 +10540,8 @@ ha_innobase::ft_init_ext(
const byte* q = reinterpret_cast<const byte*>(
const_cast<char*>(query));
- // JAN: TODO: support for ft_init_ext_with_hints(), remove the line below
- m_prebuilt->m_fts_limit= ULONG_UNDEFINED;
- dberr_t error = fts_query(trx, index, flags, q, query_len, &result,
- m_prebuilt->m_fts_limit);
+ // FIXME: support ft_init_ext_with_hints(), pass LIMIT
+ dberr_t error = fts_query(trx, index, flags, q, query_len, &result);
if (error != DB_SUCCESS) {
my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0));
@@ -15077,7 +15075,7 @@ ha_innobase::optimize(
calls to OPTIMIZE, which is undesirable. */
/* TODO: Defragment is disabled for now */
- if (0) {
+ if (srv_defragment) {
int err;
err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index da8fc3ae672..69a6c1f31d0 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -570,6 +570,13 @@ ha_innobase::check_if_supported_inplace_alter(
{
DBUG_ENTER("check_if_supported_inplace_alter");
+ /* Before 10.2.2 information about virtual columns was not stored in
+ system tables. We need to do a full alter to rebuild proper 10.2.2+
+ metadata with the information about virtual columns */
+ if (table->s->mysql_version < 100202 && table->s->virtual_fields) {
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
if (high_level_read_only
|| srv_sys_space.created_new_raw()
|| srv_force_recovery) {
@@ -1069,8 +1076,15 @@ ha_innobase::check_if_supported_inplace_alter(
/* Compute the DEFAULT values of non-constant columns
(VCOL_SESSION_FUNC | VCOL_TIME_FUNC). */
- (*af)->set_default();
- goto next_column;
+ switch ((*af)->set_default()) {
+ case 0: /* OK */
+ case 3: /* DATETIME to TIME or DATE conversion */
+ goto next_column;
+ case -1: /* OOM, or GEOMETRY type mismatch */
+ case 1: /* A number adjusted to the min/max value */
+ case 2: /* String truncation, or conversion problem */
+ break;
+ }
}
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index e62a5e90ce2..f0948fdaebf 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -661,7 +661,6 @@ btr_store_big_rec_extern_fields(
btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if
btr_mtr is restarted, then this can
be repositioned. */
- const upd_t* upd, /*!< in: update vector */
ulint* offsets, /*!< in/out: rec_get_offsets() on
pcur. the "external storage" flags
in offsets will correctly correspond
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index d6fe41670d4..a0fd78e4e0d 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -808,14 +808,6 @@ dict_table_get_n_user_cols(
/*=======================*/
const dict_table_t* table) /*!< in: table */
MY_ATTRIBUTE((warn_unused_result));
-/** Gets the number of user-defined virtual and non-virtual columns in a table
-in the dictionary cache.
-@param[in] table table
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_tot_u_cols(
- const dict_table_t* table);
/********************************************************************//**
Gets the number of all non-virtual columns (also system) in a table
in the dictionary cache.
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index b4e7d3e34c7..76e0c287444 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -400,22 +400,6 @@ dict_table_get_n_user_cols(
return(table->n_cols - dict_table_get_n_sys_cols(table));
}
-/** Gets the number of user-defined virtual and non-virtual columns in a table
-in the dictionary cache.
-@param[in] table table
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_tot_u_cols(
- const dict_table_t* table)
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(dict_table_get_n_user_cols(table)
- + dict_table_get_n_v_cols(table));
-}
-
/********************************************************************//**
Gets the number of all non-virtual columns (also system) in a table
in the dictionary cache.
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index f1d53165cdd..30b8b66b83b 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -579,7 +579,6 @@ fts_commit(
@param[in] query_str FTS query
@param[in] query_len FTS query string len in bytes
@param[in,out] result result doc ids
-@param[in] limit limit value
@return DB_SUCCESS if successful otherwise error code */
dberr_t
fts_query(
@@ -588,8 +587,7 @@ fts_query(
uint flags,
const byte* query_str,
ulint query_len,
- fts_result_t** result,
- ulonglong limit)
+ fts_result_t** result)
MY_ATTRIBUTE((warn_unused_result));
/******************************************************************//**
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 1b90ea8d7e7..c0806ad2977 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1232,19 +1232,27 @@ os_file_get_size(
os_file_t file)
MY_ATTRIBUTE((warn_unused_result));
-/** Write the specified number of zeros to a newly created file.
-@param[in] name name of the file or path as a null-terminated
- string
-@param[in] file handle to a file
-@param[in] size file size
-@param[in] read_only Enable read-only checks if true
-@return true if success */
+/** Extend a file.
+
+On Windows, extending a file allocates blocks for the file,
+unless the file is sparse.
+
+On Unix, we will extend the file with ftruncate(), if
+file needs to be sparse. Otherwise posix_fallocate() is used
+when available, and if not, binary zeroes are added to the end
+of file.
+
+@param[in] name file name
+@param[in] file file handle
+@param[in] size desired file size
+@param[in] sparse whether to create a sparse file (no preallocating)
+@return whether the operation succeeded */
bool
os_file_set_size(
const char* name,
os_file_t file,
os_offset_t size,
- bool read_only)
+ bool is_sparse = false)
MY_ATTRIBUTE((warn_unused_result));
/** Truncates a file at its current position.
@@ -1575,8 +1583,10 @@ os_file_set_umask(ulint umask);
Make file sparse, on Windows.
@param[in] file file handle
+@param[in] is_sparse if true, make file sparse,
+ otherwise "unsparse" the file
@return true on success, false on error */
-bool os_file_set_sparse_win32(os_file_t file);
+bool os_file_set_sparse_win32(os_file_t file, bool is_sparse = true);
/**
Changes file size on Windows
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 8d3752974a6..a7a55d202e8 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -844,9 +844,6 @@ struct row_prebuilt_t {
/** The MySQL table object */
TABLE* m_mysql_table;
-
- /** limit value to avoid fts result overflow */
- ulonglong m_fts_limit;
};
/** Callback for row_mysql_sys_index_iterate() */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index a51520e881c..e24aa89f046 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -508,10 +508,12 @@ enum srv_operation_mode {
SRV_OPERATION_NORMAL,
/** Mariabackup taking a backup */
SRV_OPERATION_BACKUP,
- /** Mariabackup restoring a backup */
+ /** Mariabackup restoring a backup for subsequent --copy-back */
SRV_OPERATION_RESTORE,
/** Mariabackup restoring the incremental part of a backup */
- SRV_OPERATION_RESTORE_DELTA
+ SRV_OPERATION_RESTORE_DELTA,
+ /** Mariabackup restoring a backup for subsequent --export */
+ SRV_OPERATION_RESTORE_EXPORT
};
/** Current mode of operation */
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 0141d8105ed..4853304e791 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -6614,15 +6614,15 @@ lock_validate()
Release both mutexes during the validation check. */
for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
- const lock_t* lock;
ib_uint64_t limit = 0;
- while ((lock = lock_rec_validate(i, &limit)) != 0) {
-
- ulint space = lock->un_member.rec_lock.space;
- ulint page_no = lock->un_member.rec_lock.page_no;
-
- pages.insert(std::make_pair(space, page_no));
+ while (const lock_t* lock = lock_rec_validate(i, &limit)) {
+ if (lock_rec_find_set_bit(lock) == ULINT_UNDEFINED) {
+ /* The lock bitmap is empty; ignore it. */
+ continue;
+ }
+ const lock_rec_t& l = lock->un_member.rec_lock;
+ pages.insert(std::make_pair(l.space, l.page_no));
}
}
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index fd0940b08df..a91b62d11d9 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -1922,7 +1922,8 @@ void
recv_apply_hashed_log_recs(bool last_batch)
{
ut_ad(srv_operation == SRV_OPERATION_NORMAL
- || srv_operation == SRV_OPERATION_RESTORE);
+ || srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT);
mutex_enter(&recv_sys->mutex);
@@ -1941,7 +1942,8 @@ recv_apply_hashed_log_recs(bool last_batch)
ut_ad(!last_batch == log_mutex_own());
recv_no_ibuf_operations = !last_batch
- || srv_operation == SRV_OPERATION_RESTORE;
+ || srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT;
ut_d(recv_no_log_write = recv_no_ibuf_operations);
@@ -2960,7 +2962,8 @@ static
dberr_t
recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i)
{
- if (srv_operation == SRV_OPERATION_RESTORE) {
+ if (srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT) {
ib::warn() << "Tablespace " << i->first << " was not"
" found at " << i->second.name << " when"
" restoring a (partial?) backup. All redo log"
@@ -3118,7 +3121,8 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
dberr_t err = DB_SUCCESS;
ut_ad(srv_operation == SRV_OPERATION_NORMAL
- || srv_operation == SRV_OPERATION_RESTORE);
+ || srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT);
/* Initialize red-black tree for fast insertions into the
flush_list during recovery process. */
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index c894a3c15ab..b5c6381537e 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4743,11 +4743,20 @@ Sets a sparse flag on Windows file.
@param[in] file file handle
@return true on success, false on error
*/
-bool os_file_set_sparse_win32(os_file_t file)
+#include <versionhelpers.h>
+bool os_file_set_sparse_win32(os_file_t file, bool is_sparse)
{
-
+ if (!is_sparse && !IsWindows8OrGreater()) {
+ /* Cannot unset sparse flag on older Windows.
+ Until Windows8 it is documented to produce unpredictable results,
+ if there are unallocated ranges in file.*/
+ return false;
+ }
DWORD temp;
- return os_win32_device_io_control(file, FSCTL_SET_SPARSE, 0, 0, 0, 0,&temp);
+ FILE_SET_SPARSE_BUFFER sparse_buffer;
+ sparse_buffer.SetSparse = is_sparse;
+ return os_win32_device_io_control(file,
+ FSCTL_SET_SPARSE, &sparse_buffer, sizeof(sparse_buffer), 0, 0,&temp);
}
@@ -5319,23 +5328,73 @@ short_warning:
#endif /* _WIN32 */
-/** Write the specified number of zeros to a newly created file.
-@param[in] name name of the file or path as a null-terminated
- string
-@param[in] file handle to a file
-@param[in] size file size
-@param[in] read_only Enable read-only checks if true
-@return true if success */
+/** Extend a file.
+
+On Windows, extending a file allocates blocks for the file,
+unless the file is sparse.
+
+On Unix, we will extend the file with ftruncate(), if
+file needs to be sparse. Otherwise posix_fallocate() is used
+when available, and if not, binary zeroes are added to the end
+of file.
+
+@param[in] name file name
+@param[in] file file handle
+@param[in] size desired file size
+@param[in] sparse whether to create a sparse file (no preallocating)
+@return whether the operation succeeded */
bool
os_file_set_size(
const char* name,
os_file_t file,
os_offset_t size,
- bool read_only)
+ bool is_sparse)
{
#ifdef _WIN32
+ /* On Windows, changing file size works well and as expected for both
+ sparse and normal files.
+
+ However, 10.2 up until 10.2.9 made every file sparse in innodb,
+ causing NTFS fragmentation issues(MDEV-13941). We try to undo
+ the damage, and unsparse the file.*/
+
+ if (!is_sparse && os_is_sparse_file_supported(file)) {
+ if (!os_file_set_sparse_win32(file, false))
+ /* Unsparsing file failed. Fallback to writing binary
+ zeros, to avoid even higher fragmentation.*/
+ goto fallback;
+ }
+
return os_file_change_size_win32(name, file, size);
-#endif
+
+fallback:
+#else
+ if (is_sparse) {
+ bool success = !ftruncate(file, size);
+ if (!success) {
+ ib::error() << "ftruncate of file " << name <<
+ " to " << size << " bytes failed with error " << errno;
+ }
+ return(success);
+ }
+
+# ifdef HAVE_POSIX_FALLOCATE
+ int err;
+ do {
+ err = posix_fallocate(file, 0, size);
+ } while (err == EINTR
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE);
+
+ if (err) {
+ ib::error() <<
+ "preallocating " << size << " bytes for" <<
+ "file " << name << "failed with error " << err;
+ }
+ errno = err;
+ return(!err);
+# endif /* HAVE_POSIX_ALLOCATE */
+#endif /* _WIN32*/
+
/* Write up to 1 megabyte at a time. */
ulint buf_size = ut_min(
static_cast<ulint>(64),
@@ -5353,13 +5412,14 @@ os_file_set_size(
/* Write buffer full of zeros */
memset(buf, 0, buf_size);
- if (size >= (os_offset_t) 100 << 20) {
+ os_offset_t current_size = os_file_get_size(file);
+ bool write_progress_info =
+ (size - current_size >= (os_offset_t) 100 << 20);
+ if (write_progress_info) {
ib::info() << "Progress in MB:";
}
- os_offset_t current_size = 0;
-
while (current_size < size) {
ulint n_bytes;
@@ -5382,8 +5442,9 @@ os_file_set_size(
}
/* Print about progress for each 100 MB written */
- if ((current_size + n_bytes) / (100 << 20)
- != current_size / (100 << 20)) {
+ if (write_progress_info &&
+ ((current_size + n_bytes) / (100 << 20)
+ != current_size / (100 << 20))) {
fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
@@ -5393,7 +5454,7 @@ os_file_set_size(
current_size += n_bytes;
}
- if (size >= (os_offset_t) 100 << 20) {
+ if (write_progress_info) {
fprintf(stderr, "\n");
}
@@ -5578,10 +5639,11 @@ os_is_sparse_file_supported(os_file_t fh)
);
#ifdef _WIN32
- BY_HANDLE_FILE_INFORMATION info;
- if (GetFileInformationByHandle(fh,&info)) {
- if (info.dwFileAttributes != INVALID_FILE_ATTRIBUTES) {
- return (info.dwFileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) != 0;
+ FILE_ATTRIBUTE_TAG_INFO info;
+ if (GetFileInformationByHandleEx(fh, FileAttributeTagInfo,
+ &info, (DWORD)sizeof(info))) {
+ if (info.FileAttributes != INVALID_FILE_ATTRIBUTES) {
+ return (info.FileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) != 0;
}
}
return false;
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 9b9d19ae960..0818585b00c 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -398,7 +398,7 @@ row_ins_clust_index_entry_by_modify(
DEBUG_SYNC_C("before_row_ins_upd_extern");
err = btr_store_big_rec_extern_fields(
- pcur, update, *offsets, big_rec, mtr,
+ pcur, *offsets, big_rec, mtr,
BTR_STORE_INSERT_UPDATE);
DEBUG_SYNC_C("after_row_ins_upd_extern");
dtuple_big_rec_free(big_rec);
@@ -2502,7 +2502,7 @@ row_ins_index_entry_big_rec(
DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern");
error = btr_store_big_rec_extern_fields(
- &pcur, 0, offsets, big_rec, &mtr, BTR_STORE_INSERT);
+ &pcur, offsets, big_rec, &mtr, BTR_STORE_INSERT);
DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
if (error == DB_SUCCESS
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 747959fcde5..3fc7deab4ae 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -2307,7 +2307,7 @@ func_exit_committed:
if (big_rec) {
if (error == DB_SUCCESS) {
error = btr_store_big_rec_extern_fields(
- &pcur, update, cur_offsets, big_rec, &mtr,
+ &pcur, cur_offsets, big_rec, &mtr,
BTR_STORE_UPDATE);
}
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index d78c363ff73..8a67290b070 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -2895,8 +2895,7 @@ row_upd_clust_rec(
DEBUG_SYNC_C("before_row_upd_extern");
err = btr_store_big_rec_extern_fields(
- pcur, node->update, offsets, big_rec, mtr,
- BTR_STORE_UPDATE);
+ pcur, offsets, big_rec, mtr, BTR_STORE_UPDATE);
DEBUG_SYNC_C("after_row_upd_extern");
}
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index d7e1e062d7a..d6dd5805186 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -378,8 +378,7 @@ create_log_file(
ib::info() << "Setting log file " << name << " size to "
<< srv_log_file_size << " bytes";
- ret = os_file_set_size(name, *file, srv_log_file_size,
- srv_read_only_mode);
+ ret = os_file_set_size(name, *file, srv_log_file_size);
if (!ret) {
ib::error() << "Cannot set log file " << name << " size to "
<< srv_log_file_size << " bytes";
@@ -398,13 +397,14 @@ create_log_file(
/** Delete all log files.
@param[in,out] logfilename buffer for log file name
@param[in] dirnamelen length of the directory path
-@param[in] n_files number of files to delete */
+@param[in] n_files number of files to delete
+@param[in] i first file to delete */
static
void
-delete_log_files(char* logfilename, size_t dirnamelen, unsigned n_files)
+delete_log_files(char* logfilename, size_t dirnamelen, uint n_files, uint i=0)
{
/* Remove any old log files. */
- for (unsigned i = 0; i < n_files; i++) {
+ for (; i < n_files; i++) {
sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
/* Ignore errors about non-existent files or files
@@ -658,8 +658,7 @@ srv_undo_tablespace_create(
<< "wait...";
ret = os_file_set_size(
- name, fh, size << UNIV_PAGE_SIZE_SHIFT,
- srv_read_only_mode);
+ name, fh, os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT);
if (!ret) {
ib::info() << "Error in creating " << name
@@ -913,6 +912,7 @@ srv_undo_tablespaces_init(bool create_new_db)
}
/* fall through */
case SRV_OPERATION_RESTORE:
+ case SRV_OPERATION_RESTORE_EXPORT:
ut_ad(!create_new_db);
/* Check if any of the UNDO tablespace needs fix-up because
@@ -1323,6 +1323,7 @@ srv_shutdown_all_bg_threads()
break;
case SRV_OPERATION_NORMAL:
case SRV_OPERATION_RESTORE:
+ case SRV_OPERATION_RESTORE_EXPORT:
if (!buf_page_cleaner_is_active
&& os_aio_all_slots_free()) {
os_aio_wake_all_threads_at_shutdown();
@@ -1494,7 +1495,8 @@ innobase_start_or_create_for_mysql()
unsigned i = 0;
ut_ad(srv_operation == SRV_OPERATION_NORMAL
- || srv_operation == SRV_OPERATION_RESTORE);
+ || srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT);
if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
srv_read_only_mode = true;
@@ -1984,7 +1986,9 @@ innobase_start_or_create_for_mysql()
if (err == DB_NOT_FOUND) {
if (i == 0) {
if (srv_operation
- == SRV_OPERATION_RESTORE) {
+ == SRV_OPERATION_RESTORE
+ || srv_operation
+ == SRV_OPERATION_RESTORE_EXPORT) {
return(DB_SUCCESS);
}
if (flushed_lsn
@@ -2048,6 +2052,26 @@ innobase_start_or_create_for_mysql()
}
if (i == 0) {
+ if (size == 0
+ && (srv_operation
+ == SRV_OPERATION_RESTORE
+ || srv_operation
+ == SRV_OPERATION_RESTORE_EXPORT)) {
+ /* Tolerate an empty ib_logfile0
+ from a previous run of
+ mariabackup --prepare. */
+ return(DB_SUCCESS);
+ }
+ /* The first log file must consist of
+ at least the following 512-byte pages:
+ header, checkpoint page 1, empty,
+ checkpoint page 2, redo log page(s) */
+ if (size <= OS_FILE_LOG_BLOCK_SIZE * 4) {
+ ib::error() << "Log file "
+ << logfilename << " size "
+ << size << " is too small";
+ return(srv_init_abort(DB_ERROR));
+ }
srv_log_file_size = size;
} else if (size != srv_log_file_size) {
@@ -2314,11 +2338,13 @@ files_checked:
recv_recovery_from_checkpoint_finish();
- if (srv_operation == SRV_OPERATION_RESTORE) {
+ if (srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT) {
/* After applying the redo log from
SRV_OPERATION_BACKUP, flush the changes
- to the data files and delete the log file.
- No further change to InnoDB files is needed. */
+ to the data files and truncate or delete the log.
+ Unless --export is specified, no further change to
+ InnoDB files is needed. */
ut_ad(!srv_force_recovery);
ut_ad(srv_n_log_files_found <= 1);
ut_ad(recv_no_log_write);
@@ -2328,8 +2354,18 @@ files_checked:
fil_close_log_files(true);
log_group_close_all();
if (err == DB_SUCCESS) {
+ bool trunc = srv_operation
+ == SRV_OPERATION_RESTORE;
+ /* Delete subsequent log files. */
delete_log_files(logfilename, dirnamelen,
- srv_n_log_files_found);
+ srv_n_log_files_found, trunc);
+ if (trunc) {
+ /* Truncate the first log file. */
+ strcpy(logfilename + dirnamelen,
+ "ib_logfile0");
+ FILE* f = fopen(logfilename, "w");
+ fclose(f);
+ }
}
return(err);
}
@@ -2794,6 +2830,7 @@ innodb_shutdown()
case SRV_OPERATION_BACKUP:
case SRV_OPERATION_RESTORE:
case SRV_OPERATION_RESTORE_DELTA:
+ case SRV_OPERATION_RESTORE_EXPORT:
fil_close_all_files();
break;
case SRV_OPERATION_NORMAL:
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index 6322b14335f..37b64910713 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -84,10 +84,15 @@ lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR):
2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
LOCK COMPATIBILITY MATRIX
- S SX X
- S + + -
- SX + - -
- X - - -
+
+ | S|SX| X|
+ --+--+--+--+
+ S| +| +| -|
+ --+--+--+--+
+ SX| +| -| -|
+ --+--+--+--+
+ X| -| -| -|
+ --+--+--+--+
The lock_word is always read and updated atomically and consistently, so that
it always represents the state of the lock, and the state of the lock changes
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 2408e4bdaf4..ffba8f314fb 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -620,6 +620,7 @@ trx_free_prepared(
&& trx->is_recovered
&& (!srv_was_started
|| srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT
|| srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
ut_a(trx->magic_n == TRX_MAGIC_N);
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
index 6db82119b45..39553566062 100644
--- a/storage/rocksdb/CMakeLists.txt
+++ b/storage/rocksdb/CMakeLists.txt
@@ -21,6 +21,7 @@ IF(HAVE_SCHED_GETCPU)
ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1 -DROCKSDB_SCHED_GETCPU_PRESENT)
ENDIF()
+
# We've had our builders hang during the build process. This prevents MariaRocks
# to be built on 32 bit intel OS kernels.
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "i[36]86")
@@ -79,6 +80,8 @@ SET(ROCKSDB_SE_SOURCES
ha_rocksdb.h
rdb_i_s.cc
rdb_i_s.h
+ rdb_io_watchdog.h
+ rdb_io_watchdog.cc
rdb_mutex_wrapper.cc
rdb_mutex_wrapper.h
rdb_index_merge.cc
@@ -96,6 +99,11 @@ SET(ROCKSDB_SE_SOURCES
rdb_psi.cc
)
+# MariaDB: the following is added in build_rocksdb.cmake, when appropriate:
+# This is a strong requirement coming from RocksDB. No conditional checks here.
+#ADD_DEFINITIONS(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX
+#)
+
MYSQL_ADD_PLUGIN(rocksdb ${ROCKSDB_SE_SOURCES} STORAGE_ENGINE
MODULE_OUTPUT_NAME ha_rocksdb
COMPONENT rocksdb-engine)
@@ -105,11 +113,6 @@ IF(NOT TARGET rocksdb)
RETURN()
ENDIF()
-# MARIAROCKS-TODO: ???
-CHECK_FUNCTION_EXISTS(fallocate HAVE_FALLOCATE)
-IF(HAVE_FALLOCATE)
- ADD_DEFINITIONS(-DROCKSDB_FALLOCATE_PRESENT)
-ENDIF()
CHECK_CXX_SOURCE_COMPILES("
@@ -138,7 +141,6 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib
rdb_perf_context.h
rdb_sst_info.cc
rdb_sst_info.h
- rdb_io_watchdog.cc rdb_io_watchdog.h
rdb_buff.h
rdb_mariadb_port.h
)
@@ -169,7 +171,7 @@ IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# (also had to add -frtti above, because something that event_listener.cc
# includes requires it. So, now everything in MariaRocks is compiled with
# -frtti)
- set_source_files_properties(event_listener.cc rdb_cf_options.cc
+ set_source_files_properties(event_listener.cc rdb_cf_options.cc rdb_sst_info.cc
PROPERTIES COMPILE_FLAGS -frtti)
ENDIF()
@@ -178,6 +180,12 @@ IF(HAVE_SCHED_GETCPU)
ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
ENDIF()
+IF (NOT "$ENV{WITH_TBB}" STREQUAL "")
+ SET(rocksdb_static_libs ${rocksdb_static_libs}
+ $ENV{WITH_TBB}/libtbb${PIC_EXT}.a)
+ ADD_DEFINITIONS(-DTBB)
+ENDIF()
+
#
# MariaDB: Dynamic plugin build is not suitable with unittest ATM
#
@@ -189,6 +197,7 @@ if (UNIX AND NOT APPLE)
SET(rocksdb_static_libs ${rocksdb_static_libs} "-lrt")
endif()
+
ADD_LIBRARY(rocksdb_tools STATIC
rocksdb/tools/ldb_tool.cc
rocksdb/tools/ldb_cmd.cc
diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake
index f0d19dd019f..f7a2cdda46f 100644
--- a/storage/rocksdb/build_rocksdb.cmake
+++ b/storage/rocksdb/build_rocksdb.cmake
@@ -158,13 +158,13 @@ set(ROCKSDB_SOURCES
db/convenience.cc
db/db_filesnapshot.cc
db/db_impl.cc
- db/db_impl_write.cc
db/db_impl_compaction_flush.cc
- db/db_impl_files.cc
- db/db_impl_open.cc
db/db_impl_debug.cc
db/db_impl_experimental.cc
+ db/db_impl_files.cc
+ db/db_impl_open.cc
db/db_impl_readonly.cc
+ db/db_impl_write.cc
db/db_info_dumper.cc
db/db_iter.cc
db/dbformat.cc
@@ -204,9 +204,14 @@ set(ROCKSDB_SOURCES
env/mock_env.cc
memtable/alloc_tracker.cc
memtable/hash_cuckoo_rep.cc
+ memtable/hash_cuckoo_rep.cc
memtable/hash_linklist_rep.cc
+ memtable/hash_linklist_rep.cc
+ memtable/hash_skiplist_rep.cc
memtable/hash_skiplist_rep.cc
memtable/skiplistrep.cc
+ memtable/skiplistrep.cc
+ memtable/vectorrep.cc
memtable/vectorrep.cc
memtable/write_buffer_manager.cc
monitoring/histogram.cc
@@ -218,7 +223,6 @@ set(ROCKSDB_SOURCES
monitoring/statistics.cc
monitoring/thread_status_impl.cc
monitoring/thread_status_updater.cc
- monitoring/thread_status_updater_debug.cc
monitoring/thread_status_util.cc
monitoring/thread_status_util_debug.cc
options/cf_options.cc
@@ -248,7 +252,6 @@ set(ROCKSDB_SOURCES
table/iterator.cc
table/merging_iterator.cc
table/meta_blocks.cc
- table/mock_table.cc
table/partitioned_filter_block.cc
table/persistent_cache_helper.cc
table/plain_table_builder.cc
@@ -297,13 +300,6 @@ set(ROCKSDB_SOURCES
util/xxhash.cc
utilities/backupable/backupable_db.cc
utilities/blob_db/blob_db.cc
- utilities/blob_db/blob_db_impl.cc
- utilities/blob_db/blob_db_options_impl.cc
- utilities/blob_db/blob_dump_tool.cc
- utilities/blob_db/blob_file.cc
- utilities/blob_db/blob_log_format.cc
- utilities/blob_db/blob_log_reader.cc
- utilities/blob_db/blob_log_writer.cc
utilities/checkpoint/checkpoint_impl.cc
utilities/col_buf_decoder.cc
utilities/col_buf_encoder.cc
@@ -315,7 +311,6 @@ set(ROCKSDB_SOURCES
utilities/document/json_document.cc
utilities/document/json_document_builder.cc
utilities/env_mirror.cc
- utilities/env_timed.cc
utilities/geodb/geodb_impl.cc
utilities/leveldb_options/leveldb_options.cc
utilities/lua/rocks_lua_compaction_filter.cc
@@ -337,16 +332,17 @@ set(ROCKSDB_SOURCES
utilities/spatialdb/spatial_db.cc
utilities/table_properties_collectors/compact_on_deletion_collector.cc
utilities/transactions/optimistic_transaction_db_impl.cc
- utilities/transactions/optimistic_transaction_impl.cc
+ utilities/transactions/pessimistic_transaction.cc
+ utilities/transactions/pessimistic_transaction_db.cc
utilities/transactions/transaction_base.cc
- utilities/transactions/transaction_db_impl.cc
utilities/transactions/transaction_db_mutex_impl.cc
- utilities/transactions/transaction_impl.cc
utilities/transactions/transaction_lock_mgr.cc
utilities/transactions/transaction_util.cc
+ utilities/transactions/write_prepared_txn.cc
utilities/ttl/db_ttl_impl.cc
utilities/write_batch_with_index/write_batch_with_index.cc
utilities/write_batch_with_index/write_batch_with_index_internal.cc
+
)
if(WIN32)
@@ -382,3 +378,4 @@ target_link_libraries(rocksdblib ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set_target_properties(rocksdblib PROPERTIES COMPILE_FLAGS "-fPIC -fno-builtin-memcmp -frtti")
endif()
+
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 4915617e3aa..b28961197fa 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -43,6 +43,7 @@
#include "./debug_sync.h"
#include "./my_bit.h"
#include "./my_stacktrace.h"
+#include "./my_sys.h"
#include "./sql_audit.h"
#include "./sql_table.h"
#include "./sql_hset.h"
@@ -67,6 +68,7 @@
#include "rocksdb/utilities/checkpoint.h"
#include "rocksdb/utilities/convenience.h"
#include "rocksdb/utilities/memory_util.h"
+#include "rocksdb/utilities/sim_cache.h"
#include "util/stop_watch.h"
/* MyRocks includes */
@@ -114,6 +116,7 @@ namespace myrocks {
static st_global_stats global_stats;
static st_export_stats export_stats;
static st_memory_stats memory_stats;
+static st_io_stall_stats io_stall_stats;
const std::string DEFAULT_CF_NAME("default");
const std::string DEFAULT_SYSTEM_CF_NAME("__system__");
@@ -170,7 +173,6 @@ static std::shared_ptr<Rdb_tbl_prop_coll_factory> properties_collector_factory;
Rdb_dict_manager dict_manager;
Rdb_cf_manager cf_manager;
Rdb_ddl_manager ddl_manager;
-const char *m_mysql_gtid;
Rdb_binlog_manager binlog_manager;
#if !defined(_WIN32) && !defined(__APPLE__)
@@ -406,6 +408,10 @@ static void rocksdb_set_delayed_write_rate(THD *thd,
struct st_mysql_sys_var *var,
void *var_ptr, const void *save);
+static void rocksdb_set_max_latest_deadlocks(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
static void rdb_set_collation_exception_list(const char *exception_list);
static void rocksdb_set_collation_exception_list(THD *thd,
struct st_mysql_sys_var *var,
@@ -422,6 +428,10 @@ rocksdb_set_bulk_load(THD *thd,
struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
void *var_ptr, const void *save);
+static void rocksdb_set_bulk_load_allow_unsorted(
+ THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *var_ptr, const void *save);
+
static void rocksdb_set_max_background_jobs(THD *thd,
struct st_mysql_sys_var *const var,
void *const var_ptr,
@@ -430,12 +440,15 @@ static void rocksdb_set_max_background_jobs(THD *thd,
// Options definitions
//////////////////////////////////////////////////////////////////////////////
static long long rocksdb_block_cache_size;
+static long long rocksdb_sim_cache_size;
+static my_bool rocksdb_use_clock_cache;
/* Use unsigned long long instead of uint64_t because of MySQL compatibility */
static unsigned long long // NOLINT(runtime/int)
rocksdb_rate_limiter_bytes_per_sec;
static unsigned long long // NOLINT(runtime/int)
rocksdb_sst_mgr_rate_bytes_per_sec;
static unsigned long long rocksdb_delayed_write_rate;
+static uint32_t rocksdb_max_latest_deadlocks;
static unsigned long // NOLINT(runtime/int)
rocksdb_persistent_cache_size_mb;
static ulong rocksdb_info_log_level;
@@ -445,6 +458,7 @@ static ulong rocksdb_index_type;
static uint32_t rocksdb_flush_log_at_trx_commit;
static uint32_t rocksdb_debug_optimizer_n_rows;
static my_bool rocksdb_force_compute_memtable_stats;
+static uint32_t rocksdb_force_compute_memtable_stats_cachetime;
static my_bool rocksdb_debug_optimizer_no_zero_cardinality;
static uint32_t rocksdb_wal_recovery_mode;
static uint32_t rocksdb_access_hint_on_compaction_start;
@@ -462,6 +476,7 @@ static my_bool rocksdb_enable_ttl_read_filtering = 1;
static int rocksdb_debug_ttl_rec_ts = 0;
static int rocksdb_debug_ttl_snapshot_ts = 0;
static int rocksdb_debug_ttl_read_filter_ts = 0;
+static my_bool rocksdb_debug_ttl_ignore_pk = 0;
static my_bool rocksdb_reset_stats = 0;
static uint32_t rocksdb_io_write_timeout_secs = 0;
static uint64_t rocksdb_number_stat_computes = 0;
@@ -474,6 +489,7 @@ static char *rocksdb_datadir;
static uint32_t rocksdb_table_stats_sampling_pct;
static my_bool rocksdb_enable_bulk_load_api = 1;
static my_bool rocksdb_print_snapshot_conflict_queries = 0;
+static my_bool rocksdb_large_prefix = 0;
char *compression_types_val=
const_cast<char*>(get_rocksdb_supported_compression_types());
@@ -489,6 +505,8 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;
+ o->concurrent_prepare = true;
+ o->manual_wal_flush = true;
return o;
}
@@ -583,9 +601,12 @@ const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024;
const size_t RDB_MIN_MERGE_BUF_SIZE = 100;
const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE = 1024 * 1024 * 1024;
const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE = 100;
+const size_t RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
+const size_t RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE = 512 * 1024 * 1024;
const int64 RDB_MIN_BLOCK_CACHE_SIZE = 1024;
const int RDB_MAX_CHECKSUMS_PCT = 100;
+const ulong RDB_DEADLOCK_DETECT_DEPTH = 50;
// TODO: 0 means don't wait at all, and we don't support it yet?
static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
@@ -596,6 +617,14 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
static MYSQL_THDVAR_BOOL(deadlock_detect, PLUGIN_VAR_RQCMDARG,
"Enables deadlock detection", nullptr, nullptr, FALSE);
+static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG,
+ "Number of transactions deadlock detection will "
+ "traverse through before assuming deadlock",
+ nullptr, nullptr,
+ /*default*/ RDB_DEADLOCK_DETECT_DEPTH,
+ /*min*/ 2,
+ /*max*/ ULONG_MAX, 0);
+
static MYSQL_THDVAR_BOOL(
trace_sst_api, PLUGIN_VAR_RQCMDARG,
"Generate trace output in the log for each call to the SstFileWriter",
@@ -607,6 +636,11 @@ static MYSQL_THDVAR_BOOL(
"unique_checks and enables rocksdb_commit_in_the_middle.",
nullptr, rocksdb_set_bulk_load, FALSE);
+static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG,
+ "Allow unsorted input during bulk-load. "
+ "Can be changed only when bulk load is disabled.",
+ nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE);
+
static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Enables using SstFileWriter for bulk loading",
@@ -686,6 +720,18 @@ static MYSQL_THDVAR_ULONGLONG(
/* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE,
/* max */ SIZE_T_MAX, 1);
+static MYSQL_THDVAR_ULONGLONG(
+ merge_tmp_file_removal_delay_ms, PLUGIN_VAR_RQCMDARG,
+ "Fast index creation creates a large tmp file on disk during index "
+ "creation. Removing this large file all at once when index creation is "
+ "complete can cause trim stalls on Flash. This variable specifies a "
+ "duration to sleep (in milliseconds) between calling chsize() to truncate "
+ "the file in chunks. The chunk size is the same as merge_buf_size.",
+ nullptr, nullptr,
+ /* default (0ms) */ RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* min (0ms) */ RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* max */ SIZE_T_MAX, 1);
+
static MYSQL_SYSVAR_BOOL(
create_if_missing,
*reinterpret_cast<my_bool *>(&rocksdb_db_options->create_if_missing),
@@ -694,6 +740,20 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_db_options->create_if_missing);
static MYSQL_SYSVAR_BOOL(
+ concurrent_prepare,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->concurrent_prepare),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->concurrent_prepare);
+
+static MYSQL_SYSVAR_BOOL(
+ manual_wal_flush,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->manual_wal_flush),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->manual_wal_flush);
+
+static MYSQL_SYSVAR_BOOL(
create_missing_column_families,
*reinterpret_cast<my_bool *>(
&rocksdb_db_options->create_missing_column_families),
@@ -736,6 +796,13 @@ static MYSQL_SYSVAR_ULONGLONG(delayed_write_rate, rocksdb_delayed_write_rate,
rocksdb_db_options->delayed_write_rate, 0,
UINT64_MAX, 0);
+static MYSQL_SYSVAR_UINT(max_latest_deadlocks, rocksdb_max_latest_deadlocks,
+ PLUGIN_VAR_RQCMDARG,
+ "Maximum number of recent "
+ "deadlocks to store",
+ nullptr, rocksdb_set_max_latest_deadlocks,
+ rocksdb::kInitialMaxDeadlocks, 0, UINT32_MAX, 0);
+
static MYSQL_SYSVAR_ENUM(
info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG,
"Filter level for info logs to be written mysqld error log. "
@@ -1011,6 +1078,22 @@ static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size,
/* max */ LONGLONG_MAX,
/* Block size */ RDB_MIN_BLOCK_CACHE_SIZE);
+static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Simulated cache size for RocksDB", nullptr,
+ nullptr,
+ /* default */ 0,
+ /* min */ 0,
+ /* max */ LONGLONG_MAX,
+ /* Block size */ 0);
+
+static MYSQL_SYSVAR_BOOL(
+ use_clock_cache,
+ rocksdb_use_clock_cache,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Use ClockCache instead of default LRUCache for RocksDB",
+ nullptr, nullptr, false);
+
static MYSQL_SYSVAR_BOOL(
cache_index_and_filter_blocks,
*reinterpret_cast<my_bool *>(
@@ -1094,17 +1177,26 @@ static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options,
"");
static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
- //psergey-july-merge:TODO: need this: | PLUGIN_VAR_ALLOCATED,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC
+ /* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/,
"Option updates per column family for RocksDB", nullptr,
rocksdb_set_update_cf_options, nullptr);
+enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
+ FLUSH_LOG_NEVER = 0,
+ FLUSH_LOG_SYNC,
+ FLUSH_LOG_BACKGROUND,
+ FLUSH_LOG_MAX /* must be last */
+};
+
static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit,
rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
"Sync on transaction commit. Similar to "
"innodb_flush_log_at_trx_commit. 1: sync on commit, "
"0,2: not sync on commit",
- nullptr, nullptr, 1, 0, 2, 0);
+ nullptr, nullptr, /* default */ FLUSH_LOG_SYNC,
+ /* min */ FLUSH_LOG_NEVER,
+ /* max */ FLUSH_LOG_BACKGROUND, 0);
static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG,
"WriteOptions::disableWAL for RocksDB", nullptr,
@@ -1148,6 +1240,13 @@ static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats,
"Force to always compute memtable stats",
nullptr, nullptr, TRUE);
+static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime,
+ rocksdb_force_compute_memtable_stats_cachetime,
+ PLUGIN_VAR_RQCMDARG,
+ "Time in usecs to cache memtable estimates", nullptr,
+ nullptr, /* default */ 60 * 1000 * 1000,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
static MYSQL_SYSVAR_BOOL(
debug_optimizer_no_zero_cardinality,
rocksdb_debug_optimizer_no_zero_cardinality, PLUGIN_VAR_RQCMDARG,
@@ -1215,6 +1314,12 @@ static MYSQL_SYSVAR_INT(
nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
static MYSQL_SYSVAR_BOOL(
+ debug_ttl_ignore_pk, rocksdb_debug_ttl_ignore_pk, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. If true, compaction filtering will not occur "
+ "on PK TTL data. This variable is a no-op in non-debug builds.",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_SYSVAR_BOOL(
reset_stats, rocksdb_reset_stats, PLUGIN_VAR_RQCMDARG,
"Reset the RocksDB internal statistics without restarting the DB.", nullptr,
rocksdb_set_reset_stats, FALSE);
@@ -1376,15 +1481,23 @@ static MYSQL_SYSVAR_UINT(
RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0,
/* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0);
+static MYSQL_SYSVAR_BOOL(
+ large_prefix, rocksdb_large_prefix, PLUGIN_VAR_RQCMDARG,
+ "Support large index prefix length of 3072 bytes. If off, the maximum "
+ "index prefix length is 767.",
+ nullptr, nullptr, FALSE);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(lock_wait_timeout),
MYSQL_SYSVAR(deadlock_detect),
+ MYSQL_SYSVAR(deadlock_detect_depth),
MYSQL_SYSVAR(max_row_locks),
MYSQL_SYSVAR(write_batch_max_bytes),
MYSQL_SYSVAR(lock_scanned_rows),
MYSQL_SYSVAR(bulk_load),
+ MYSQL_SYSVAR(bulk_load_allow_unsorted),
MYSQL_SYSVAR(skip_unique_check_tables),
MYSQL_SYSVAR(trace_sst_api),
MYSQL_SYSVAR(commit_in_the_middle),
@@ -1395,15 +1508,19 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(enable_bulk_load_api),
MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(merge_combine_read_size),
+ MYSQL_SYSVAR(merge_tmp_file_removal_delay_ms),
MYSQL_SYSVAR(skip_bloom_filter_on_read),
MYSQL_SYSVAR(create_if_missing),
+ MYSQL_SYSVAR(concurrent_prepare),
+ MYSQL_SYSVAR(manual_wal_flush),
MYSQL_SYSVAR(create_missing_column_families),
MYSQL_SYSVAR(error_if_exists),
MYSQL_SYSVAR(paranoid_checks),
MYSQL_SYSVAR(rate_limiter_bytes_per_sec),
MYSQL_SYSVAR(sst_mgr_rate_bytes_per_sec),
MYSQL_SYSVAR(delayed_write_rate),
+ MYSQL_SYSVAR(max_latest_deadlocks),
MYSQL_SYSVAR(info_log_level),
MYSQL_SYSVAR(max_open_files),
MYSQL_SYSVAR(max_total_wal_size),
@@ -1443,6 +1560,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(enable_write_thread_adaptive_yield),
MYSQL_SYSVAR(block_cache_size),
+ MYSQL_SYSVAR(sim_cache_size),
+ MYSQL_SYSVAR(use_clock_cache),
MYSQL_SYSVAR(cache_index_and_filter_blocks),
MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache),
MYSQL_SYSVAR(index_type),
@@ -1468,6 +1587,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(force_index_records_in_range),
MYSQL_SYSVAR(debug_optimizer_n_rows),
MYSQL_SYSVAR(force_compute_memtable_stats),
+ MYSQL_SYSVAR(force_compute_memtable_stats_cachetime),
MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality),
MYSQL_SYSVAR(compact_cf),
@@ -1484,6 +1604,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(debug_ttl_rec_ts),
MYSQL_SYSVAR(debug_ttl_snapshot_ts),
MYSQL_SYSVAR(debug_ttl_read_filter_ts),
+ MYSQL_SYSVAR(debug_ttl_ignore_pk),
MYSQL_SYSVAR(reset_stats),
MYSQL_SYSVAR(io_write_timeout),
MYSQL_SYSVAR(flush_memtable_on_analyze),
@@ -1506,13 +1627,15 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(validate_tables),
MYSQL_SYSVAR(table_stats_sampling_pct),
+
+ MYSQL_SYSVAR(large_prefix),
nullptr};
static rocksdb::WriteOptions
rdb_get_rocksdb_write_options(my_core::THD *const thd) {
rocksdb::WriteOptions opt;
- opt.sync = (rocksdb_flush_log_at_trx_commit == 1);
+ opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
opt.disableWAL = THDVAR(thd, write_disable_wal);
opt.ignore_missing_column_families =
THDVAR(thd, write_ignore_missing_column_families);
@@ -1795,6 +1918,13 @@ public:
}
}
+ void update_bytes_written(ulonglong bytes_written) {
+ if (m_tbl_io_perf != nullptr) {
+ m_tbl_io_perf->update_bytes_written(rocksdb_perf_context_level(m_thd),
+ bytes_written);
+ }
+ }
+
void set_params(int timeout_sec_arg, int max_row_locks_arg) {
m_timeout_sec = timeout_sec_arg;
m_max_row_locks = max_row_locks_arg;
@@ -2292,9 +2422,10 @@ public:
tx_opts.set_snapshot = false;
tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec);
tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect);
+ tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth);
tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes);
- write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1);
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -2513,7 +2644,7 @@ public:
void start_tx() override {
reset();
- write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1);
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -2557,14 +2688,21 @@ namespace {
class Rdb_perf_context_guard {
Rdb_io_perf m_io_perf;
- THD *m_thd;
+ Rdb_io_perf *m_io_perf_ptr;
+ Rdb_transaction *m_tx;
+ uint m_level;
-public:
+ public:
Rdb_perf_context_guard(const Rdb_perf_context_guard &) = delete;
Rdb_perf_context_guard &operator=(const Rdb_perf_context_guard &) = delete;
- explicit Rdb_perf_context_guard(THD *const thd) : m_thd(thd) {
- Rdb_transaction *&tx = get_tx_from_thd(m_thd);
+ explicit Rdb_perf_context_guard(Rdb_io_perf *io_perf, uint level)
+ : m_io_perf_ptr(io_perf), m_tx(nullptr), m_level(level) {
+ m_io_perf_ptr->start(m_level);
+ }
+
+ explicit Rdb_perf_context_guard(Rdb_transaction *tx, uint level)
+ : m_io_perf_ptr(nullptr), m_tx(tx), m_level(level) {
/*
if perf_context information is already being recorded, this becomes a
no-op
@@ -2575,9 +2713,10 @@ public:
}
~Rdb_perf_context_guard() {
- Rdb_transaction *&tx = get_tx_from_thd(m_thd);
- if (tx != nullptr) {
- tx->io_perf_end_and_record();
+ if (m_tx != nullptr) {
+ m_tx->io_perf_end_and_record();
+ } else if (m_io_perf_ptr != nullptr) {
+ m_io_perf_ptr->end_and_record(m_level);
}
}
};
@@ -2667,8 +2806,17 @@ static std::string rdb_xid_to_string(const XID &src) {
*/
static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__)))
DBUG_ASSERT(rdb != nullptr);
- rocksdb_wal_group_syncs++;
- const rocksdb::Status s = rdb->SyncWAL();
+
+ rocksdb::Status s;
+ /*
+ target_lsn is set to 0 when MySQL wants to sync the wal files
+ */
+ if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ rocksdb_wal_group_syncs++;
+ s = rdb->FlushWAL(target_lsn == 0 ||
+ rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
+ }
+
if (!s.ok()) {
rdb_log_status_error(s);
return HA_EXIT_FAILURE;
@@ -2713,8 +2861,9 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx)
if (!tx->prepare(rdb_xid_to_string(xid))) {
return HA_EXIT_FAILURE;
}
- if (thd->durability_property == HA_IGNORE_DURABILITY
-#ifdef MARIAROCKS_NOT_YET
+ if (thd->durability_property == HA_IGNORE_DURABILITY )
+#ifdef MARIAROCKS_NOT_YET
+ (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER)) {
&&
THDVAR(thd, flush_log_at_trx_commit)) {
#endif
@@ -2889,12 +3038,12 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
- /* this will trigger saving of perf_context information */
- Rdb_perf_context_guard guard(thd);
-
/* note: h->external_lock(F_UNLCK) is called after this function is called) */
Rdb_transaction *&tx = get_tx_from_thd(thd);
+ /* this will trigger saving of perf_context information */
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
if (tx != nullptr) {
if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT |
OPTION_BEGIN))) {
@@ -2931,8 +3080,8 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
static int rocksdb_rollback(handlerton *const hton, THD *const thd,
bool rollback_tx) {
- Rdb_perf_context_guard guard(thd);
Rdb_transaction *&tx = get_tx_from_thd(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
if (tx != nullptr) {
if (rollback_tx) {
@@ -3055,7 +3204,82 @@ private:
"=========================================\n";
}
-public:
+ static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn,
+ const GL_INDEX_ID &gl_index_id,
+ bool is_last_path = false) {
+ std::string txn_data;
+
+ /* extract table name and index names using the index id */
+ std::string table_name = ddl_manager.safe_get_table_name(gl_index_id);
+ if (table_name.empty()) {
+ table_name =
+ "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+ }
+ auto kd = ddl_manager.safe_find(gl_index_id);
+ std::string idx_name =
+ (kd) ? kd->get_name()
+ : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+
+ /* get the name of the column family */
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id);
+ std::string cf_name = cfh->GetName();
+
+ txn_data += format_string(
+ "TRANSACTIONID: %u\n"
+ "COLUMN FAMILY NAME: %s\n"
+ "WAITING KEY: %s\n"
+ "LOCK TYPE: %s\n"
+ "INDEX NAME: %s\n"
+ "TABLE NAME: %s\n",
+ txn.m_txn_id, cf_name.c_str(),
+ rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length())
+ .c_str(),
+ txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(),
+ table_name.c_str());
+ if (!is_last_path) {
+ txn_data += "---------------WAITING FOR---------------\n";
+ }
+ return txn_data;
+ }
+
+ static std::string
+ get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) {
+ std::string path_data;
+ if (path_entry.limit_exceeded) {
+ path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
+ } else {
+ path_data += "\n*** DEADLOCK PATH\n"
+ "=========================================\n";
+ for (auto it = path_entry.path.begin(); it != path_entry.path.end();
+ it++) {
+ auto txn = *it;
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ path_data += get_dlock_txn_info(txn, gl_index_id);
+ }
+
+ DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
+ /* print the first txn in the path to display the full deadlock cycle */
+ if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
+ auto txn = path_entry.path[0];
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ path_data += get_dlock_txn_info(txn, gl_index_id, true);
+
+ /* prints the txn id of the transaction that caused the deadlock */
+ auto deadlocking_txn = *(path_entry.path.end() - 1);
+ path_data +=
+ format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n",
+ deadlocking_txn.m_txn_id);
+ }
+ }
+
+ return path_data;
+ }
+
+ public:
Rdb_snapshot_status() : m_data(get_header()) {}
std::string getResult() { return m_data + get_footer(); }
@@ -3080,11 +3304,20 @@ public:
"%s\n"
"lock count %llu, write count %llu\n"
"insert count %llu, update count %llu, delete count %llu\n",
- curr_time - snapshot_timestamp, buffer, tx->get_lock_count(),
+ (longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(),
tx->get_write_count(), tx->get_insert_count(), tx->get_update_count(),
tx->get_delete_count());
}
}
+
+ void populate_deadlock_buffer() {
+ auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
+ m_data += "----------LATEST DETECTED DEADLOCKS----------\n";
+
+ for (auto path_entry : dlock_buffer) {
+ m_data += get_dlock_path_info(path_entry);
+ }
+ }
};
/**
@@ -3184,10 +3417,10 @@ static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd,
Rdb_snapshot_status showStatus;
Rdb_transaction::walk_tx_list(&showStatus);
+ showStatus.populate_deadlock_buffer();
/* Send the result data back to MySQL */
- return print_stats(thd, "SNAPSHOTS", "rocksdb", showStatus.getResult(),
- stat_print);
+ return print_stats(thd, "rocksdb", "", showStatus.getResult(), stat_print);
}
#endif
@@ -3310,7 +3543,6 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
str.clear();
rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set,
&temp_usage_by_type);
-
snprintf(buf, sizeof(buf), "\nMemTable Total: %llu",
(ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal]);
str.append(buf);
@@ -3327,7 +3559,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
(ulonglong)internal_cache_count * kDefaultInternalCacheSize);
str.append(buf);
res |= print_stats(thd, "MEMORY_STATS", "rocksdb", str, stat_print);
-
+#ifdef MARIAROCKS_NOT_YET
/* Show the background thread status */
std::vector<rocksdb::ThreadStatus> thread_list;
rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list);
@@ -3364,6 +3596,8 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
str, stat_print);
}
}
+#endif
+
#ifdef MARIAROCKS_NOT_YET
} else if (stat_type == HA_ENGINE_TRX) {
/* Handle the SHOW ENGINE ROCKSDB TRANSACTION STATUS command */
@@ -3413,8 +3647,6 @@ static int rocksdb_start_tx_and_assign_read_view(
user for whom the transaction should
be committed */
{
- Rdb_perf_context_guard guard(thd);
-
ulong const tx_isolation = my_core::thd_tx_isolation(thd);
if (tx_isolation != ISO_REPEATABLE_READ) {
@@ -3433,6 +3665,8 @@ static int rocksdb_start_tx_and_assign_read_view(
mysql_mutex_assert_owner(&LOCK_commit_ordered);
Rdb_transaction *const tx = get_or_create_tx(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
DBUG_ASSERT(!tx->has_snapshot());
tx->set_tx_read_only(true);
rocksdb_register_tx(hton, thd, tx);
@@ -3476,6 +3710,7 @@ static void rocksdb_update_table_stats(
int n_lock_wait, int n_lock_wait_timeout, int n_lock_deadlock,
const char *engine)) {
my_io_perf_t io_perf_read;
+ my_io_perf_t io_perf_write;
my_io_perf_t io_perf;
page_stats_t page_stats;
comp_stats_t comp_stats;
@@ -3490,6 +3725,7 @@ static void rocksdb_update_table_stats(
memset(&io_perf, 0, sizeof(io_perf));
memset(&page_stats, 0, sizeof(page_stats));
memset(&comp_stats, 0, sizeof(comp_stats));
+ memset(&io_perf_write, 0, sizeof(io_perf_write));
tablenames = rdb_open_tables.get_table_names();
@@ -3522,6 +3758,8 @@ static void rocksdb_update_table_stats(
io_perf_read.bytes = table_handler->m_io_perf_read.bytes.load();
io_perf_read.requests = table_handler->m_io_perf_read.requests.load();
+ io_perf_write.bytes = table_handler->m_io_perf_write.bytes.load();
+ io_perf_write.requests = table_handler->m_io_perf_write.requests.load();
lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load();
deadlock_stats = table_handler->m_deadlock_counter.load();
@@ -3549,9 +3787,10 @@ static void rocksdb_update_table_stats(
sizeof(dbname_sys));
my_core::filename_to_tablename(tablename.c_str(), tablename_sys,
sizeof(tablename_sys));
- (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, &io_perf,
- &io_perf, &io_perf, &io_perf, &page_stats, &comp_stats, 0,
- lock_wait_timeout_stats, deadlock_stats, rocksdb_hton_name);
+ (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read,
+ &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats,
+ &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats,
+ rocksdb_hton_name);
}
}
#endif
@@ -3767,8 +4006,18 @@ static int rocksdb_init_func(void *const p) {
(rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type;
if (!rocksdb_tbl_options->no_block_cache) {
- rocksdb_tbl_options->block_cache =
- rocksdb::NewLRUCache(rocksdb_block_cache_size);
+ std::shared_ptr<rocksdb::Cache> block_cache = rocksdb_use_clock_cache
+ ? rocksdb::NewClockCache(rocksdb_block_cache_size)
+ : rocksdb::NewLRUCache(rocksdb_block_cache_size);
+ if (rocksdb_sim_cache_size > 0) {
+ // Simulated cache enabled
+ // Wrap block cache inside a simulated cache and pass it to RocksDB
+ rocksdb_tbl_options->block_cache =
+ rocksdb::NewSimCache(block_cache, rocksdb_sim_cache_size, 6);
+ } else {
+ // Pass block cache to RocksDB
+ rocksdb_tbl_options->block_cache = block_cache;
+ }
}
// Using newer BlockBasedTable format version for better compression
// and better memory allocation.
@@ -4163,6 +4412,7 @@ Rdb_open_tables_map::get_table_handler(const char *const table_name) {
thr_lock_init(&table_handler->m_thr_lock);
#ifdef MARIAROCKS_NOT_YET
table_handler->m_io_perf_read.init();
+ table_handler->m_io_perf_write.init();
#endif
}
DBUG_ASSERT(table_handler->m_ref_count >= 0);
@@ -4420,11 +4670,11 @@ bool ha_rocksdb::init_with_fields() {
rows within a transaction, etc, because the compaction filter ignores
snapshots when filtering keys.
*/
-bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
+bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
const int64_t curr_ts) {
- DBUG_ASSERT(m_pk_descr != nullptr);
- DBUG_ASSERT(m_pk_descr->has_ttl());
- DBUG_ASSERT(m_pk_descr->m_ttl_rec_offset != UINT_MAX);
+ DBUG_ASSERT(kd.has_ttl());
+ DBUG_ASSERT(kd.m_ttl_rec_offset != UINT_MAX);
/*
Curr_ts can only be 0 if there are no snapshots open.
@@ -4450,7 +4700,7 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
Find where the 8-byte ttl is for each record in this index.
*/
uint64 ts;
- if (!reader.read(m_pk_descr->m_ttl_rec_offset) || reader.read_uint64(&ts)) {
+ if (!reader.read(kd.m_ttl_rec_offset) || reader.read_uint64(&ts)) {
/*
This condition should never be reached since all TTL records have an
8 byte ttl field in front. Don't filter the record out, and log an error.
@@ -4458,7 +4708,7 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
std::string buf;
buf = rdb_hexdump(ttl_rec_val.data(), ttl_rec_val.size(),
RDB_MAX_HEXDUMP_LEN);
- const GL_INDEX_ID gl_index_id = m_pk_descr->get_gl_index_id();
+ const GL_INDEX_ID gl_index_id = kd.get_gl_index_id();
// NO_LINT_DEBUG
sql_print_error("Decoding ttl from PK value failed, "
"for index (%u,%u), val: %s",
@@ -4472,10 +4722,23 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
#ifndef NDEBUG
read_filter_ts += rdb_dbug_set_ttl_read_filter_ts();
#endif
- return ts + m_pk_descr->m_ttl_duration + read_filter_ts <=
+ return ts + kd.m_ttl_duration + read_filter_ts <=
static_cast<uint64>(curr_ts);
}
+void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward) {
+ if (kd.has_ttl()) {
+ while (iter->Valid() &&
+ should_hide_ttl_rec(
+ kd, iter->value(),
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ rocksdb_smart_next(seek_backward, iter);
+ }
+ }
+}
+
/**
Convert record from table->record[0] form into a form that can be written
into rocksdb.
@@ -4501,6 +4764,7 @@ int ha_rocksdb::convert_record_to_storage_format(
if (has_ttl) {
/* If it's a TTL record, reserve space for 8 byte TTL value in front. */
m_storage_record.fill(ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_in_rec, 0);
+ m_ttl_bytes_updated = false;
/*
If the TTL is contained within the key, we use the offset to find the
@@ -4531,6 +4795,8 @@ int ha_rocksdb::convert_record_to_storage_format(
rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(data)) +
rdb_dbug_set_ttl_rec_ts());
#endif
+ // Also store in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
} else if (!has_ttl_column) {
/*
For implicitly generated TTL records we need to copy over the old
@@ -4549,6 +4815,8 @@ int ha_rocksdb::convert_record_to_storage_format(
#endif
char *const data = const_cast<char *>(m_storage_record.ptr());
rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+ // Also store in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
}
}
} else {
@@ -4630,6 +4898,15 @@ int ha_rocksdb::convert_record_to_storage_format(
ts += rdb_dbug_set_ttl_rec_ts();
#endif
rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+
+ // If this is an update and the timestamp has been updated, take note
+ // so we can avoid updating SKs unnecessarily.
+ if (!row_info.old_pk_slice.empty()) {
+ m_ttl_bytes_updated =
+ memcmp(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ // Store timestamp in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
}
}
}
@@ -4907,8 +5184,9 @@ int ha_rocksdb::convert_record_from_storage_format(
}
if (m_maybe_unpack_info) {
- unpack_info = reader.read(RDB_UNPACK_HEADER_SIZE);
- if (!unpack_info || unpack_info[0] != RDB_UNPACK_DATA_TAG) {
+ unpack_info = reader.get_current_ptr();
+ if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) ||
+ !reader.read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
@@ -4916,7 +5194,8 @@ int ha_rocksdb::convert_record_from_storage_format(
rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1));
unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len);
- reader.read(unpack_info_len - RDB_UNPACK_HEADER_SIZE);
+ reader.read(unpack_info_len -
+ Rdb_key_def::get_unpack_header_size(unpack_info[0]));
}
int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
@@ -5072,8 +5351,7 @@ void ha_rocksdb::setup_field_converters() {
If hidden pk exists, we skip this check since the field will never be
part of the hidden pk.
*/
- if (!has_hidden_pk(table) &&
- field->part_of_key.is_set(table->s->primary_key)) {
+ if (!has_hidden_pk(table)) {
KEY *const pk_info = &table->key_info[table->s->primary_key];
for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) {
/* key_part->fieldnr is counted from 1 */
@@ -5276,7 +5554,10 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock,
nullptr);
m_io_perf.init(&m_table_handler->m_table_perf_context,
- &m_table_handler->m_io_perf_read, &stats);
+ &m_table_handler->m_io_perf_read,
+ &m_table_handler->m_io_perf_write, &stats);
+ Rdb_perf_context_guard guard(&m_io_perf,
+ rocksdb_perf_context_level(ha_thd()));
std::string fullname;
err = rdb_normalize_tablename(name, &fullname);
@@ -5375,6 +5656,13 @@ int ha_rocksdb::close(void) {
m_table_handler = nullptr;
}
+ // These are needed to suppress valgrind errors in rocksdb.partition
+ m_storage_record.free();
+ m_last_rowkey.free();
+ m_sk_tails.free();
+ m_sk_tails_old.free();
+ m_pk_unpack_info.free();
+
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -5410,6 +5698,14 @@ static_assert((sizeof(rdb_error_messages) / sizeof(rdb_error_messages[0])) ==
((HA_ERR_ROCKSDB_LAST - HA_ERR_ROCKSDB_FIRST) + 1),
"Number of error messages doesn't match number of error codes");
+//psergey-merge: do we need this in MariaDB: we have get_error_messages
+//below...
+#if 0
+static const char *rdb_get_error_message(int nr) {
+ return rdb_error_messages[nr - HA_ERR_ROCKSDB_FIRST];
+}
+#endif
+
static const char **rdb_get_error_messages(int nr) { return rdb_error_messages; }
bool ha_rocksdb::get_error_message(const int error, String *const buf) {
@@ -5964,9 +6260,8 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
DBUG_RETURN(err);
}
- /* We don't currently support TTL on tables with secondary keys. */
- if (ttl_duration > 0 &&
- (table_arg->s->keys > 1 || is_hidden_pk(i, table_arg, tbl_def_arg))) {
+ /* We don't currently support TTL on tables with hidden primary keys. */
+ if (ttl_duration > 0 && is_hidden_pk(i, table_arg, tbl_def_arg)) {
my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0));
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -6005,6 +6300,12 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_UPDATE1;
});
+ DBUG_EXECUTE_IF("MYROCKS_NO_COVERED_BITMAP_FORMAT", {
+ if (index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ kv_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_UPDATE2;
+ }
+ });
+
uint32 index_flags = (ttl_duration > 0 ? Rdb_key_def::TTL_FLAG : 0);
uint32 ttl_rec_offset =
@@ -6366,7 +6667,7 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
from the POV of the current transaction. If it has, try going to the next
key.
*/
- if (kd.has_ttl() && should_hide_ttl_rec(iter->value(), ttl_filter_ts)) {
+ if (kd.has_ttl() && should_hide_ttl_rec(kd, iter->value(), ttl_filter_ts)) {
rocksdb_smart_next(kd.m_is_reverse_cf, iter);
continue;
}
@@ -6402,7 +6703,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
if ((full_key_match &&
kd.value_matches_prefix(m_scan_it->key(), key_slice)) ||
(kd.has_ttl() &&
- should_hide_ttl_rec(m_scan_it->value(), ttl_filter_ts))) {
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts))) {
rocksdb_smart_next(!kd.m_is_reverse_cf, m_scan_it);
continue;
}
@@ -6432,7 +6733,7 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
key.
*/
while (m_scan_it->Valid() && kd.has_ttl() &&
- should_hide_ttl_rec(m_scan_it->value(), ttl_filter_ts)) {
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) {
rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it);
}
@@ -6590,17 +6891,21 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
int rc = 0;
uint pk_size;
- if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
- /* Get the key columns and primary key value */
- const rocksdb::Slice &rkey = m_scan_it->key();
+ /* Get the key columns and primary key value */
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ const rocksdb::Slice &value = m_scan_it->value();
+
+ bool covered_lookup =
+ m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap);
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
pk_size =
kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
- const rocksdb::Slice &value = m_scan_it->value();
if (pk_size == RDB_INVALID_KEY_LEN) {
rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
} else {
rc = kd.unpack_record(table, buf, &rkey, &value,
m_verify_row_debug_checksums);
+ global_stats.covered_secondary_key_lookups.inc();
}
} else {
if (kd.m_is_reverse_cf)
@@ -6710,11 +7015,15 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
m_last_rowkey.copy((const char *)m_pk_packed_tuple, size,
&my_charset_bin);
- if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE &&
+ rocksdb::Slice value = m_scan_it->value();
+ bool covered_lookup =
+ m_keyread_only || m_key_descr_arr[keyno]->covers_lookup(
+ table, &value, &m_lookup_bitmap);
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE &&
!has_hidden_pk(table)) {
- rocksdb::Slice value = m_scan_it->value();
rc = m_key_descr_arr[keyno]->unpack_record(
table, buf, &key, &value, m_verify_row_debug_checksums);
+ global_stats.covered_secondary_key_lookups.inc();
} else {
rc = get_row_by_rowid(buf, m_pk_packed_tuple, size);
}
@@ -6823,6 +7132,7 @@ int ha_rocksdb::prepare_range_scan(const key_range *start_key,
HA_EXIT_SUCCESS OK
other HA_ERR error code (can be SE-specific)
*/
+
int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key,
key_part_map keypart_map,
enum ha_rkey_function find_flag) {
@@ -7039,6 +7349,8 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
while (1) {
+ rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
+
if (!m_scan_it->Valid()) {
table->status = STATUS_NOT_FOUND;
return HA_ERR_END_OF_FILE;
@@ -7068,10 +7380,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
const enum icp_result icp_status= handler_index_cond_check(this);
if (icp_status == ICP_NO_MATCH) {
- if (move_forward)
- m_scan_it->Next();
- else
- m_scan_it->Prev();
+ rocksdb_smart_next(!move_forward, m_scan_it);
continue; /* Get the next (or prev) index tuple */
}
else if (icp_status == ICP_OUT_OF_RANGE || icp_status == ICP_ABORTED_BY_USER) {
@@ -7402,7 +7711,8 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
if (found) {
/* If we found the record, but it's expired, pretend we didn't find it. */
if (!skip_ttl_check && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(rocksdb::Slice(&m_retrieved_record.front(),
+ should_hide_ttl_rec(*m_pk_descr,
+ rocksdb::Slice(&m_retrieved_record.front(),
m_retrieved_record.size()),
tx->m_snapshot_timestamp)) {
DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
@@ -7481,6 +7791,8 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) {
else
m_scan_it->Prev();
}
+ rocksdb_skip_expired_records(*m_key_descr_arr[active_index], m_scan_it,
+ !move_forward);
rc = find_icp_matching_index_rec(move_forward, buf);
if (!rc)
rc = secondary_index_read(active_index, buf);
@@ -7837,6 +8149,12 @@ bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg,
is_hidden_pk(index, table_arg, tbl_def_arg);
}
+uint ha_rocksdb::max_supported_key_part_length() const {
+ DBUG_ENTER_FUNC();
+ DBUG_RETURN(rocksdb_large_prefix ? MAX_INDEX_COL_LEN_LARGE
+ : MAX_INDEX_COL_LEN_SMALL);
+}
+
const char *ha_rocksdb::get_key_name(const uint index,
const TABLE *const table_arg,
const Rdb_tbl_def *const tbl_def_arg) {
@@ -8219,7 +8537,8 @@ int ha_rocksdb::check_uniqueness_and_lock(
m_retrieved_record by check_and_lock_unique_pk().
*/
if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(rocksdb::Slice(&m_retrieved_record.front(),
+ should_hide_ttl_rec(*m_pk_descr,
+ rocksdb::Slice(&m_retrieved_record.front(),
m_retrieved_record.size()),
(row_info.tx->m_snapshot_timestamp
? row_info.tx->m_snapshot_timestamp
@@ -8267,48 +8586,112 @@ int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg,
int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
const rocksdb::Slice &key,
- const rocksdb::Slice &value) {
- rocksdb::ColumnFamilyHandle *const cf = kd.get_cf();
+ const rocksdb::Slice &value, bool sort) {
+ DBUG_ENTER_FUNC();
+
+ rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
DBUG_ASSERT(cf != nullptr);
- if (m_sst_info == nullptr) {
- m_sst_info = std::make_shared<Rdb_sst_info>(
- rdb, m_table_handler->m_table_name, kd.get_name(), cf,
- *rocksdb_db_options, THDVAR(ha_thd(), trace_sst_api));
- tx->start_bulk_load(this);
- m_bulk_load_tx = tx;
- }
+ int res = HA_EXIT_SUCCESS;
- DBUG_ASSERT(m_sst_info != nullptr);
+ if (sort) {
+ GL_INDEX_ID kd_gl_id = kd.get_gl_index_id();
+ auto it = m_key_merge.find(kd_gl_id);
+ if (it == m_key_merge.end()) {
+ m_key_merge.emplace(
+ std::piecewise_construct, std::make_tuple(kd_gl_id),
+ std::make_tuple(
+ thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size),
+ THDVAR(ha_thd(), merge_combine_read_size),
+ THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf));
+ it = m_key_merge.find(kd_gl_id);
+ if ((res = it->second.init()) != 0) {
+ DBUG_RETURN(res);
+ }
- return m_sst_info->put(key, value);
+ if (m_bulk_load_tx == nullptr) {
+ tx->start_bulk_load(this);
+ m_bulk_load_tx = tx;
+ }
+ }
+ res = it->second.add(key, value);
+ } else {
+ if (!m_sst_info) {
+ m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
+ kd.get_name(), cf, *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api)));
+ tx->start_bulk_load(this);
+ m_bulk_load_tx = tx;
+ }
+
+ DBUG_ASSERT(m_sst_info);
+
+ res = m_sst_info->put(key, value);
+ }
+
+ DBUG_RETURN(res);
}
int ha_rocksdb::finalize_bulk_load() {
- int rc = 0;
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info,
+ m_bulk_load_tx != nullptr);
/* Skip if there are no possible ongoing bulk loads */
- if (m_sst_info == nullptr && m_bulk_load_tx == nullptr) {
- return rc;
+ if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) {
+ DBUG_RETURN(HA_EXIT_SUCCESS);
}
+ int res = HA_EXIT_SUCCESS;
+
RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex);
- /*
- We need this check because it's possible that m_sst_info has been
- flushed and cleared by another thread by the time the mutex has been
- acquired.
- */
- if (m_sst_info != nullptr) {
- rc = m_sst_info->commit();
- m_sst_info = nullptr;
+ if (m_sst_info) {
+ res = m_sst_info->commit();
+ m_sst_info.reset();
+ }
+
+ if (!m_key_merge.empty()) {
+ rocksdb::Slice merge_key;
+ rocksdb::Slice merge_val;
+ for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
+ const std::string &index_name =
+ ddl_manager.safe_find(it->first)->get_name();
+ Rdb_index_merge &rdb_merge = it->second;
+ Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name,
+ rdb_merge.get_cf(), *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api));
+
+ while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) {
+ if ((res = sst_info.put(merge_key, merge_val)) != 0) {
+ break;
+ }
+ }
+ // res == -1 => finished ok; res > 0 => error
+ if (res <= 0) {
+ if ((res = sst_info.commit()) != 0) {
+ break;
+ }
+ }
+ }
+ m_key_merge.clear();
+
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+ }
+
+ if (m_bulk_load_tx != nullptr) {
m_bulk_load_tx->end_bulk_load(this);
m_bulk_load_tx = nullptr;
}
RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex);
- return rc;
+ DBUG_RETURN(res);
}
int ha_rocksdb::update_pk(const Rdb_key_def &kd,
@@ -8316,6 +8699,7 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
const bool &pk_changed) {
const uint key_id = kd.get_keyno();
const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def);
+ ulonglong bytes_written = 0;
/*
If the PK has changed, or if this PK uses single deletes and this is an
@@ -8330,6 +8714,8 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
if (!s.ok()) {
return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
m_table_handler);
+ } else {
+ bytes_written = row_info.old_pk_slice.size();
}
}
@@ -8350,7 +8736,8 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
/*
Write the primary key directly to an SST file using an SstFileWriter
*/
- rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice);
+ rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice,
+ THDVAR(table->in_use, bulk_load_allow_unsorted));
} else if (row_info.skip_unique_check || row_info.tx->m_ddl_transaction) {
/*
It is responsibility of the user to make sure that the data being
@@ -8372,6 +8759,10 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
}
}
+ if (rc == HA_EXIT_SUCCESS) {
+ row_info.tx->update_bytes_written(
+ bytes_written + row_info.new_pk_slice.size() + value_slice.size());
+ }
return rc;
}
@@ -8385,24 +8776,31 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
rocksdb::Slice old_key_slice;
const uint key_id = kd.get_keyno();
+
+ ulonglong bytes_written = 0;
+
/*
- Can skip updating this key if none of the key fields have changed.
+ Can skip updating this key if none of the key fields have changed and, if
+ this table has TTL, the TTL timestamp has not changed.
*/
- if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) {
+ if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id) &&
+ (!kd.has_ttl() || !m_ttl_bytes_updated)) {
return HA_EXIT_SUCCESS;
}
const bool store_row_debug_checksums = should_store_row_debug_checksums();
- new_packed_size = kd.pack_record(
- table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple,
- &m_sk_tails, store_row_debug_checksums, row_info.hidden_pk_id);
+ new_packed_size =
+ kd.pack_record(table_arg, m_pack_buffer, row_info.new_data,
+ m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums,
+ row_info.hidden_pk_id, 0, nullptr, nullptr, m_ttl_bytes);
if (row_info.old_data != nullptr) {
// The old value
old_packed_size = kd.pack_record(
table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old,
- &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id);
+ &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0,
+ nullptr, nullptr, m_ttl_bytes);
/*
Check if we are going to write the same value. This can happen when
@@ -8420,6 +8818,7 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
*/
if (old_packed_size == new_packed_size &&
m_sk_tails_old.get_current_pos() == m_sk_tails.get_current_pos() &&
+ !(kd.has_ttl() && m_ttl_bytes_updated) &&
memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, old_packed_size) ==
0 &&
memcmp(m_sk_tails_old.ptr(), m_sk_tails.ptr(),
@@ -8438,6 +8837,8 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
row_info.tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
old_key_slice);
+
+ bytes_written = old_key_slice.size();
}
new_key_slice = rocksdb::Slice(
@@ -8449,6 +8850,9 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
row_info.tx->get_indexed_write_batch()->Put(kd.get_cf(), new_key_slice,
new_value_slice);
+ row_info.tx->update_bytes_written(bytes_written + new_key_slice.size() +
+ new_value_slice.size());
+
return HA_EXIT_SUCCESS;
}
@@ -8456,14 +8860,19 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info,
const bool &pk_changed) {
int rc;
+ // The PK must be updated first to pull out the TTL value.
+ rc = update_pk(*m_pk_descr, row_info, pk_changed);
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+
+ // Update the remaining indexes.
for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) {
- const Rdb_key_def &kd = *m_key_descr_arr[key_id];
if (is_pk(key_id, table, m_tbl_def)) {
- rc = update_pk(kd, row_info, pk_changed);
- } else {
- rc = update_sk(table, kd, row_info);
+ continue;
}
+ rc = update_sk(table, *m_key_descr_arr[key_id], row_info);
if (rc != HA_EXIT_SUCCESS) {
return rc;
}
@@ -8741,7 +9150,8 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete");
if (m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(m_scan_it->value(), tx->m_snapshot_timestamp)) {
+ should_hide_ttl_rec(*m_pk_descr, m_scan_it->value(),
+ tx->m_snapshot_timestamp)) {
continue;
}
@@ -8772,7 +9182,8 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
if (m_pk_descr->has_ttl() &&
should_hide_ttl_rec(
- value, get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ *m_pk_descr, value,
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
continue;
}
@@ -8816,6 +9227,10 @@ int ha_rocksdb::index_init(uint idx, bool sorted) {
setup_read_decoders();
+ if (!m_keyread_only) {
+ m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap);
+ }
+
// If m_lock_rows is not RDB_LOCK_NONE then we will be doing a get_for_update
// when accessing the index, so don't acquire the snapshot right away.
// Otherwise acquire the snapshot immediately.
@@ -8835,6 +9250,8 @@ int ha_rocksdb::index_end() {
release_scan_iterator();
+ bitmap_free(&m_lookup_bitmap);
+
active_index = MAX_KEY;
in_range_check_pushed_down = FALSE;
@@ -8878,6 +9295,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
rocksdb::Slice key_slice(m_last_rowkey.ptr(), m_last_rowkey.length());
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ ulonglong bytes_written = 0;
const uint index = pk_index(table, m_tbl_def);
rocksdb::Status s =
@@ -8885,6 +9303,8 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
if (!s.ok()) {
DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
m_table_handler));
+ } else {
+ bytes_written = key_slice.size();
}
longlong hidden_pk_id = 0;
@@ -8907,6 +9327,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
/* Deleting on secondary key doesn't need any locks: */
tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
secondary_key_slice);
+ bytes_written += secondary_key_slice.size();
}
}
@@ -8919,6 +9340,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
stats.rows_deleted++;
#endif
update_row_stats(ROWS_DELETED);
+ tx->update_bytes_written(bytes_written);
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -9005,13 +9427,37 @@ int ha_rocksdb::info(uint flag) {
stats.records+= sz/ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE;
stats.data_file_length+= sz;
}
- // Second, compute memtable stats
- uint64_t memtableCount;
- uint64_t memtableSize;
- rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
- &memtableCount, &memtableSize);
- stats.records += memtableCount;
- stats.data_file_length += memtableSize;
+ // Second, compute memtable stats. This call is expensive, so cache
+ // values computed for some time.
+ uint64_t cachetime = rocksdb_force_compute_memtable_stats_cachetime;
+ uint64_t time = (cachetime == 0) ? 0 : my_interval_timer() / 1000;
+ if (cachetime == 0 ||
+ time > m_table_handler->m_mtcache_last_update + cachetime) {
+ uint64_t memtableCount;
+ uint64_t memtableSize;
+
+ rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
+ &memtableCount, &memtableSize);
+
+ // Atomically update all of these fields at the same time
+ if (cachetime > 0) {
+ if (m_table_handler->m_mtcache_lock.fetch_add(
+ 1, std::memory_order_acquire) == 0) {
+ m_table_handler->m_mtcache_count = memtableCount;
+ m_table_handler->m_mtcache_size = memtableSize;
+ m_table_handler->m_mtcache_last_update = time;
+ }
+ m_table_handler->m_mtcache_lock.fetch_sub(1,
+ std::memory_order_release);
+ }
+
+ stats.records += memtableCount;
+ stats.data_file_length += memtableSize;
+ } else {
+ // Cached data is still valid, so use it instead
+ stats.records += m_table_handler->m_mtcache_count;
+ stats.data_file_length += m_table_handler->m_mtcache_size;
+ }
if (rocksdb_debug_optimizer_n_rows > 0)
stats.records = rocksdb_debug_optimizer_n_rows;
@@ -9691,6 +10137,7 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
char key_buf[MAX_KEY_LENGTH];
uint key_len;
+ ulonglong bytes_written = 0;
/*
Remove all records in each index.
(This is is not crash-safe, but it doesn't matter, because bulk row
@@ -9722,11 +10169,13 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
return tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
m_table_handler);
}
-
+ bytes_written += key.size();
it->Next();
}
}
+ tx->update_bytes_written(bytes_written);
+
return HA_EXIT_SUCCESS;
}
@@ -9775,6 +10224,8 @@ int ha_rocksdb::rename_table(const char *const from, const char *const to) {
DBUG_RETURN(-1);
}
+ DBUG_EXECUTE_IF("gen_sql_table_name", to_str = to_str + "#sql-test";);
+
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
dict_manager.lock();
@@ -10486,7 +10937,6 @@ int ha_rocksdb::inplace_populate_sk(
const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) {
DBUG_ENTER_FUNC();
int res = HA_EXIT_SUCCESS;
-
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
@@ -10542,7 +10992,7 @@ int ha_rocksdb::inplace_populate_sk(
open handlers have been closed at this point, and the one we're on is the
only one left.
*/
- if (m_sst_info != nullptr) {
+ if (m_sst_info) {
if ((res = finalize_bulk_load())) {
DBUG_RETURN(res);
}
@@ -10552,14 +11002,16 @@ int ha_rocksdb::inplace_populate_sk(
const ulonglong rdb_merge_buf_size = THDVAR(ha_thd(), merge_buf_size);
const ulonglong rdb_merge_combine_read_size =
THDVAR(ha_thd(), merge_combine_read_size);
+ const ulonglong rdb_merge_tmp_file_removal_delay =
+ THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms);
for (const auto &index : indexes) {
- const rocksdb::Comparator *index_comp = index->get_cf()->GetComparator();
bool is_unique_index =
new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME;
- Rdb_index_merge rdb_merge(thd_rocksdb_tmpdir(), rdb_merge_buf_size,
- rdb_merge_combine_read_size, index_comp);
+ Rdb_index_merge rdb_merge(
+ thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size,
+ rdb_merge_tmp_file_removal_delay, index->get_cf());
if ((res = rdb_merge.init())) {
DBUG_RETURN(res);
@@ -10588,7 +11040,8 @@ int ha_rocksdb::inplace_populate_sk(
/* Create new secondary index entry */
const int new_packed_size = index->pack_record(
new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple,
- &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id);
+ &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0,
+ nullptr, nullptr, m_ttl_bytes);
const rocksdb::Slice key = rocksdb::Slice(
reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size);
@@ -10653,7 +11106,7 @@ int ha_rocksdb::inplace_populate_sk(
/*
Insert key and slice to SST via SSTFileWriter API.
*/
- if ((res = bulk_load_key(tx, *index, merge_key, merge_val))) {
+ if ((res = bulk_load_key(tx, *index, merge_key, merge_val, false))) {
break;
}
}
@@ -11006,6 +11459,9 @@ static void myrocks_update_status() {
export_stats.queries_point = global_stats.queries[QUERIES_POINT];
export_stats.queries_range = global_stats.queries[QUERIES_RANGE];
+
+ export_stats.covered_secondary_key_lookups =
+ global_stats.covered_secondary_key_lookups;
}
static void myrocks_update_memory_status() {
@@ -11049,6 +11505,9 @@ static SHOW_VAR myrocks_status_variables[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("queries_range", &export_stats.queries_range,
SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("covered_secondary_key_lookups",
+ &export_stats.covered_secondary_key_lookups,
+ SHOW_LONGLONG),
{NullS, NullS, SHOW_LONG}};
@@ -11059,6 +11518,91 @@ static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) {
var->value = reinterpret_cast<char *>(&myrocks_status_variables);
}
+static ulonglong
+io_stall_prop_value(const std::map<std::string, std::string> &props,
+ const std::string &key) {
+ std::map<std::string, std::string>::const_iterator iter =
+ props.find("io_stalls." + key);
+ if (iter != props.end()) {
+ return std::stoull(iter->second);
+ } else {
+ DBUG_PRINT("warning",
+ ("RocksDB GetMapPropery hasn't returned key=%s", key.c_str()));
+ DBUG_ASSERT(0);
+ return 0;
+ }
+}
+
+static void update_rocksdb_stall_status() {
+ st_io_stall_stats local_io_stall_stats;
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
+ continue;
+ }
+
+ std::map<std::string, std::string> props;
+ if (!rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props)) {
+ continue;
+ }
+
+ local_io_stall_stats.level0_slowdown +=
+ io_stall_prop_value(props, "level0_slowdown");
+ local_io_stall_stats.level0_slowdown_with_compaction +=
+ io_stall_prop_value(props, "level0_slowdown_with_compaction");
+ local_io_stall_stats.level0_numfiles +=
+ io_stall_prop_value(props, "level0_numfiles");
+ local_io_stall_stats.level0_numfiles_with_compaction +=
+ io_stall_prop_value(props, "level0_numfiles_with_compaction");
+ local_io_stall_stats.stop_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "stop_for_pending_compaction_bytes");
+ local_io_stall_stats.slowdown_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "slowdown_for_pending_compaction_bytes");
+ local_io_stall_stats.memtable_compaction +=
+ io_stall_prop_value(props, "memtable_compaction");
+ local_io_stall_stats.memtable_slowdown +=
+ io_stall_prop_value(props, "memtable_slowdown");
+ local_io_stall_stats.total_stop += io_stall_prop_value(props, "total_stop");
+ local_io_stall_stats.total_slowdown +=
+ io_stall_prop_value(props, "total_slowdown");
+ }
+ io_stall_stats = local_io_stall_stats;
+}
+
+static SHOW_VAR rocksdb_stall_status_variables[] = {
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_stops",
+ &io_stall_stats.stop_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_slowdowns",
+ &io_stall_stats.slowdown_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_stops",
+ &io_stall_stats.memtable_compaction, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_slowdowns",
+ &io_stall_stats.memtable_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_stops", &io_stall_stats.total_stop,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_slowdowns", &io_stall_stats.total_slowdown,
+ SHOW_LONGLONG),
+ // end of the array marker
+ {NullS, NullS, SHOW_LONG}};
+
+static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) {
+ update_rocksdb_stall_status();
+ var->type = SHOW_ARRAY;
+ var->value = reinterpret_cast<char *>(&rocksdb_stall_status_variables);
+}
+
static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(block_cache_miss),
DEF_STATUS_VAR(block_cache_hit),
@@ -11126,7 +11670,12 @@ static SHOW_VAR rocksdb_status_vars[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other,
SHOW_LONGLONG),
+ // the variables generated by SHOW_FUNC are sorted only by prefix (first
+ // arg in the tuple below), so make sure it is unique to make sorting
+ // deterministic as quick sort is not stable
{"rocksdb", reinterpret_cast<char *>(&show_myrocks_vars), SHOW_FUNC},
+ {"rocksdb_stall", reinterpret_cast<char *>(&show_rocksdb_stall_vars),
+ SHOW_FUNC},
{NullS, NullS, SHOW_LONG}};
/*
@@ -11173,10 +11722,13 @@ void Rdb_background_thread::run() {
// pthread_cond_timedwait()) to wait on.
set_timespec(ts_next_sync, WAKE_UP_INTERVAL);
- // Flush the WAL.
- if (rdb && (rocksdb_flush_log_at_trx_commit == 2)) {
+ // Flush the WAL. Sync it for both background and never modes to copy
+ // InnoDB's behavior. For mode never, the wal file isn't even written,
+ // whereas background writes to the wal file, but issues the syncs in a
+ // background thread.
+ if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) {
DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes);
- const rocksdb::Status s = rdb->SyncWAL();
+ const rocksdb::Status s = rdb->FlushWAL(true);
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
}
@@ -11272,6 +11824,7 @@ int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; }
int rdb_dbug_set_ttl_read_filter_ts() {
return rocksdb_debug_ttl_read_filter_ts;
}
+bool rdb_dbug_set_ttl_ignore_pk() { return rocksdb_debug_ttl_ignore_pk; }
#endif
void rdb_update_global_stats(const operation_type &type, uint count,
@@ -11496,6 +12049,17 @@ void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var,
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
+void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ const uint32_t new_val = *static_cast<const uint32_t *>(save);
+ if (rocksdb_max_latest_deadlocks != new_val) {
+ rocksdb_max_latest_deadlocks = new_val;
+ rdb->SetDeadlockInfoBufferSize(rocksdb_max_latest_deadlocks);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
void rdb_set_collation_exception_list(const char *const exception_list) {
DBUG_ASSERT(rdb_collation_exceptions != nullptr);
@@ -11545,6 +12109,18 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var
*static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
}
+void rocksdb_set_bulk_load_allow_unsorted(
+ THD *const thd,
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr, const void *const save) {
+ if (THDVAR(thd, bulk_load)) {
+ my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET",
+ "Cannot change this setting while bulk load is enabled");
+ } else {
+ *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+ }
+}
+
static void rocksdb_set_max_background_jobs(THD *thd,
struct st_mysql_sys_var *const var,
void *const var_ptr,
@@ -11739,6 +12315,19 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
} // namespace myrocks
+/**
+ Construct and emit duplicate key error message using information
+ from table's record buffer.
+
+ @sa print_keydup_error(table, key, msg, errflag, thd, org_table_name).
+*/
+
+void print_keydup_error(TABLE *table, KEY *key, myf errflag,
+ const THD *thd, const char *org_table_name)
+{
+ print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
+}
+
/*
Register the storage engine plugin outside of myrocks namespace
so that mysql_declare_plugin does not get confused when it does
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 4cbee5cb64d..2949f6d6588 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -22,6 +22,7 @@
/* C++ standard header files */
#include <set>
#include <string>
+#include <unordered_map>
#include <unordered_set>
#include <vector>
@@ -250,6 +251,12 @@ enum collations_used {
#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
/*
+ Maximum index prefix length in bytes.
+*/
+#define MAX_INDEX_COL_LEN_LARGE 3072
+#define MAX_INDEX_COL_LEN_SMALL 767
+
+/*
MyRocks specific error codes. NB! Please make sure that you will update
HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in
rdb_error_messages to include any new error messages.
@@ -307,7 +314,14 @@ struct Rdb_table_handler {
/* Stores cumulative table statistics */
my_io_perf_atomic_t m_io_perf_read;
+ my_io_perf_atomic_t m_io_perf_write;
Rdb_atomic_perf_counters m_table_perf_context;
+
+ /* Stores cached memtable estimate statistics */
+ std::atomic_uint m_mtcache_lock;
+ uint64_t m_mtcache_count;
+ uint64_t m_mtcache_size;
+ uint64_t m_mtcache_last_update;
};
class Rdb_key_def;
@@ -374,6 +388,8 @@ struct st_global_stats {
ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX];
ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups;
};
/* Struct used for exporting status to MySQL */
@@ -393,6 +409,8 @@ struct st_export_stats {
ulonglong queries_point;
ulonglong queries_range;
+
+ ulonglong covered_secondary_key_lookups;
};
/* Struct used for exporting RocksDB memory status */
@@ -401,6 +419,27 @@ struct st_memory_stats {
ulonglong memtable_unflushed;
};
+/* Struct used for exporting RocksDB IO stalls stats */
+struct st_io_stall_stats {
+ ulonglong level0_slowdown;
+ ulonglong level0_slowdown_with_compaction;
+ ulonglong level0_numfiles;
+ ulonglong level0_numfiles_with_compaction;
+ ulonglong stop_for_pending_compaction_bytes;
+ ulonglong slowdown_for_pending_compaction_bytes;
+ ulonglong memtable_compaction;
+ ulonglong memtable_slowdown;
+ ulonglong total_stop;
+ ulonglong total_slowdown;
+
+ st_io_stall_stats()
+ : level0_slowdown(0), level0_slowdown_with_compaction(0),
+ level0_numfiles(0), level0_numfiles_with_compaction(0),
+ stop_for_pending_compaction_bytes(0),
+ slowdown_for_pending_compaction_bytes(0), memtable_compaction(0),
+ memtable_slowdown(0), total_stop(0), total_slowdown(0) {}
+};
+
} // namespace myrocks
#include "./rdb_buff.h"
@@ -509,6 +548,12 @@ class ha_rocksdb : public my_core::handler {
Pointer to the original TTL timestamp value (8 bytes) during UPDATE.
*/
char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD];
+ /*
+ The TTL timestamp value can change if the explicit TTL column is
+ updated. If we detect this when updating the PK, we indicate it here so
+ we know we must always update any SK's.
+ */
+ bool m_ttl_bytes_updated;
/* rowkey of the last record we've read, in StorageFormat. */
String m_last_rowkey;
@@ -555,7 +600,9 @@ class ha_rocksdb : public my_core::handler {
bool m_update_scope_is_valid;
/* SST information used for bulk loading the primary key */
- std::shared_ptr<Rdb_sst_info> m_sst_info;
+ std::unique_ptr<Rdb_sst_info> m_sst_info;
+ /* External merge sorts for bulk load: key ID -> merge sort instance */
+ std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
Rdb_transaction *m_bulk_load_tx;
/* Mutex to protect finalizing bulk load */
mysql_mutex_t m_bulk_load_mutex;
@@ -653,6 +700,13 @@ class ha_rocksdb : public my_core::handler {
void setup_read_decoders();
/*
+ For the active index, indicates which columns must be covered for the
+ current lookup to be covered. If the bitmap field is null, that means this
+ index does not cover the current lookup for any record.
+ */
+ MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0};
+
+ /*
Number of bytes in on-disk (storage) record format that are used for
storing SQL NULL flags.
*/
@@ -889,11 +943,7 @@ public:
DBUG_RETURN(MAX_REF_PARTS);
}
- uint max_supported_key_part_length() const override {
- DBUG_ENTER_FUNC();
-
- DBUG_RETURN(2048);
- }
+ uint max_supported_key_part_length() const override;
/** @brief
unireg.cc will call this to make sure that the storage engine can handle
@@ -1072,9 +1122,13 @@ private:
rocksdb::Slice *const packed_rec)
MY_ATTRIBUTE((__nonnull__));
- bool should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
+ bool should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
const int64_t curr_ts)
MY_ATTRIBUTE((__warn_unused_result__));
+ void rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward);
int index_first_intern(uchar *buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -1107,8 +1161,10 @@ private:
struct unique_sk_buf_info *sk_info)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
- const rocksdb::Slice &key, const rocksdb::Slice &value)
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ bool sort)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void update_bytes_written(ulonglong bytes_written);
int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info,
const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__));
int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h
index e465ed7cf25..85c3968cc99 100644
--- a/storage/rocksdb/ha_rocksdb_proto.h
+++ b/storage/rocksdb/ha_rocksdb_proto.h
@@ -77,6 +77,7 @@ bool rdb_is_ttl_read_filtering_enabled();
int rdb_dbug_set_ttl_rec_ts();
int rdb_dbug_set_ttl_snapshot_ts();
int rdb_dbug_set_ttl_read_filter_ts();
+bool rdb_dbug_set_ttl_ignore_pk();
#endif
enum operation_type : int;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc
new file mode 100644
index 00000000000..7adca5d7cf2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc
@@ -0,0 +1,174 @@
+###############################################################################
+# Common test file for high priority DDL
+###############################################################################
+
+
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+# Default values
+--let $con_block = con1
+--let $con_kill = default
+--let $should_kill = 1
+--let $recreate_table = 1
+--let $throw_error = 1
+
+##
+## killing conflicting shared locks by alter table
+##
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = alter table t1 modify i bigint;
+--let $high_priority_cmd = alter high_priority table t1 modify i bigint;
+
+--source include/ddl_high_priority_module.inc
+
+##
+## killing conflicting shared lock in a transaction
+## transaction will rollback
+##
+
+--let $blocking_sql = begin; insert into t1 values (4); select i from t1;
+--let $cmd = alter table t1 rename t1_new;
+--let $high_priority_cmd = alter high_priority table t1 rename t1_new;
+
+--source include/ddl_high_priority_module.inc
+
+select * from t1_new;
+drop table t1_new;
+
+##
+## simulate conflicting DDL which will not be killed
+##
+
+# Simulate conflicting DDL
+# This will hold MDL_SHARED_NO_READ_WRITE, which may be upgraded to exclusive
+# locks to run DDLs like ALTER TABLE
+# the upgradable/exclusive lock should not be killed
+
+--let $should_kill = 0
+
+--let $blocking_sql = lock tables t1 write;
+--let $cmd = drop table t1;
+--let $high_priority_cmd = drop high_priority table t1;
+
+--source include/ddl_high_priority_module.inc
+
+# restore $should_kill
+--let $should_kill = 1
+
+##
+## killing conflicting transaction by drop table DDL
+##
+
+--let $blocking_sql = lock tables t1 read; begin; insert into t1 values (4);
+--let $cmd = drop table t1;
+--let $high_priority_cmd = drop high_priority table t1;
+
+--source include/ddl_high_priority_module.inc
+
+##
+## no effect for regular users
+##
+
+connect (con2,localhost,test_user2,,test,,);
+# $con_kill is regular user
+--let $con_kill = con2
+--let $should_kill = 0
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = alter table t1 modify i bigint;
+--let $high_priority_cmd = alter high_priority table t1 modify i bigint;
+
+--source include/ddl_high_priority_module.inc
+
+disconnect con2;
+
+# restore $con_kill
+--let $con_kill = default
+# restore $should_kill
+--let $should_kill = 1
+
+##
+## create/drop index
+##
+
+# create index
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = create index idx1 on t1 (i);
+--let $high_priority_cmd = create high_priority index idx1 on t1 (i);
+
+--source include/ddl_high_priority_module.inc
+
+# drop index (use the previously created table)
+--let $recreate_table = 0
+
+--let $cmd = drop index idx1 on t1;
+--let $high_priority_cmd = drop high_priority index idx1 on t1;
+
+--source include/ddl_high_priority_module.inc
+
+# restore $recreate_table
+--let $recreate_table = 1
+
+##
+## high_priority truncate table
+##
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = truncate t1;
+--let $high_priority_cmd = truncate high_priority t1;
+
+--source include/ddl_high_priority_module.inc
+
+##
+## high_priority create/drop trigger
+##
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+--let $high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+
+--source include/ddl_high_priority_module.inc
+
+# drop trigger (use the previously created table)
+--let $recreate_table = 0
+
+--let $cmd = drop trigger ins_sum;
+--let $high_priority_cmd = drop high_priority trigger ins_sum;
+
+--source include/ddl_high_priority_module.inc
+
+# restore $recreate_table
+--let $recreate_table = 1
+
+##
+## high_priority optimize table
+##
+## "optimize table" doesn't throw errors. It catches all errors, and
+## returns a result set in a table
+##
+
+--let $throw_error = 0
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = optimize table t1;
+--let $high_priority_cmd = optimize high_priority table t1;
+
+--source include/ddl_high_priority_module.inc
+
+# restore throw_error
+--let $throw_error = 1
+
+##
+## clean up
+##
+
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc
new file mode 100644
index 00000000000..ffbdc306455
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc
@@ -0,0 +1,141 @@
+###############################################################################
+# This file plays as a function/module for ddl_high_priority test
+#
+# Usage: set the following variables before including
+#
+# $use_sys_var: whether using sys_var or syntax to trigger high_priority
+# value: 0/1
+#
+# $con_block: a blocking connection
+# value: con1/con2/default
+#
+# $con_kill: a connection that will attempt to kill $con_blocking
+# value: con1/con2/default
+#
+# $cmd: a regular command to evaluate (to use with sys var)
+# value: sql command
+#
+# $high_priority_cmd: a high_priority command to evaluate
+# value: sql command
+#
+# $should_kill: Expect the con_block to be killed or not
+# value: 0/1
+#
+# $recreate_table: Should recreate the test table or not
+# value: 0/1
+#
+# $throw_error: whether a command will throw lock_wait_timeout error.
+# Note, optimize table catches all errors.
+# value: 0/1
+###############################################################################
+
+##
+## Print out the parameters of the test set
+## (useful for debugging)
+##
+--echo
+--echo ## Test parameters:
+--echo ## use_sys_var = $use_sys_var
+--echo ## con_block = $con_block
+--echo ## con_kill = $con_kill
+--echo ## cmd = $cmd
+--echo ## high_priority_cmd = $high_priority_cmd
+--echo ## should_kill = $should_kill
+--echo ## recreate_table = $recreate_table
+--echo ## throw_error = $throw_error
+--echo
+
+
+##
+## Setup
+##
+
+connection default;
+
+# create con1
+connect (con1,localhost,test_user1,,test,,);
+
+if ($recreate_table) {
+ # create t1
+ --disable_warnings
+ drop table if exists t1;
+ --enable_warnings
+ create table t1 (i int);
+ show create table t1;
+ insert into t1 values (1), (2), (3);
+}
+
+##
+## Testing
+##
+
+--echo connection: $con_block
+--connection $con_block
+--eval $blocking_sql
+
+--echo connection: $con_kill
+--connection $con_kill
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+
+describe t1;
+
+--echo connection: default (for show processlist)
+connection default;
+--echo # both $con_block and $con_kill exist
+--replace_column 1 <Id> 3 <Host> 5 <Command> 6 <Time> 7 <State> 8 <Info> 9 <RExam> 10 <RSent> 11 <TID>
+show processlist;
+
+--echo connection: $con_kill
+--connection $con_kill
+
+# command will fail without high_priority
+if ($throw_error) {
+ --error ER_LOCK_WAIT_TIMEOUT
+ --eval $cmd
+}
+
+if (!$throw_error) {
+ --eval $cmd
+}
+
+if ($use_sys_var) {
+ set high_priority_ddl = 1;
+ select @@high_priority_ddl;
+
+ # non-supported command will timeout
+ --error ER_LOCK_WAIT_TIMEOUT
+ lock tables t1 write;
+
+ if (!$should_kill) {
+ # regular user ddl will fail regardless of high_priority_ddl being on
+ --error ER_LOCK_WAIT_TIMEOUT
+ --eval $cmd
+ }
+
+ if ($should_kill) {
+ --eval $cmd
+ }
+
+ # reset high_priority_ddl
+ set high_priority_ddl = 0;
+}
+
+if (!$use_sys_var) {
+ if (!$should_kill) {
+ # regular user ddl will fail regardless of high_priority being on
+ --error ER_LOCK_WAIT_TIMEOUT
+ --eval $high_priority_cmd
+ }
+
+ if ($should_kill) {
+ --eval $high_priority_cmd
+ }
+}
+
+--echo connection: default (for show processlist)
+connection default;
+--replace_column 1 <Id> 3 <Host> 5 <Command> 6 <Time> 7 <State> 8 <Info> 9 <RExam> 10 <RSent> 11 <TID>
+show processlist;
+
+disconnect con1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
new file mode 100644
index 00000000000..e96eb573c1f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
@@ -0,0 +1,21 @@
+#
+# A helper include file for prefix index index-only query tests
+#
+# Parameters:
+# $prefix_index_check_title - title of the test
+# $prefix_index_check_query - test query
+# $prefix_index_check_read_avoided_delta - expected change of
+# 'rocksdb_covered_secondary_key_lookups' status variable
+# value after running the query
+
+--let $show_count_statement = show status like 'rocksdb_covered_secondary_key_lookups'
+
+--echo # $prefix_index_check_title
+--let $base_count = query_get_value($show_count_statement, Value, 1)
+
+--eval $prefix_index_check_query
+
+--let $count = query_get_value($show_count_statement, Value, 1)
+--let $assert_text= $prefix_index_check_title: $prefix_index_check_read_avoided_delta rocksdb_covered_secondary_key_lookups
+--let $assert_cond= $count - $base_count = $prefix_index_check_read_avoided_delta
+--source include/assert.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/my.cnf b/storage/rocksdb/mysql-test/rocksdb/my.cnf
index 2beaf514cee..6f1722addc5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/my.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb/my.cnf
@@ -10,8 +10,11 @@ explicit-defaults-for-timestamp=1
loose-rocksdb_lock_wait_timeout=1
loose-rocksdb_strict_collation_check=0
+# MariaDB: speed up the tests:
loose-rocksdb-flush-log-at-trx-commit=0
+loose-rocksdb_force_compute_memtable_stats_cachetime=0
+
# The following is to get rid of the harmless
# "Deadlock found when trying to get lock" errors, see MDEV-12285.
log-warnings=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
index 06452a5437f..7b15ed47d44 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
@@ -4,20 +4,20 @@ CREATE DATABASE mysqlslap;
USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
# 2PC enabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
## 2PC + durability + single thread
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 1000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 1000 then 'true' else 'false' end
-false
+true
## 2PC + durability + group commit
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end
-false
+true
# 2PC enabled, MyRocks durability disabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
@@ -28,16 +28,16 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa
case when variable_value-@c = 0 then 'true' else 'false' end
true
# 2PC disabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
-false
+true
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
-false
+true
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
index b51680fa5fe..01fa9f1d35b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
@@ -289,12 +289,16 @@ set global rocksdb_bulk_load=1;
connect con1,localhost,root,,;
# Switch to connection con1
connection con1;
-show global variables like 'rocksdb_bulk_load';
+show global variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load ON
-show session variables like 'rocksdb_bulk_load';
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
+show session variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load ON
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
INSERT INTO t1 VALUES (1,1);
# Disconnecting on con1
@@ -341,10 +345,11 @@ SET @prior_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
SET global rocksdb_strict_collation_check = off;
SET session rocksdb_merge_combine_read_size = 566;
-SET session rocksdb_merge_buf_size = 336;
-show variables like '%rocksdb_bulk_load%';
+SET session rocksdb_merge_buf_size = 340;
+show variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_unsorted OFF
rocksdb_bulk_load_size 1000
CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
index eced62bd043..f230b173892 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
@@ -26,4 +26,30 @@ select @@rocksdb_bulk_load;
@@rocksdb_bulk_load
0
call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+99
+100
+101
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+INSERT INTO t1 VALUES(201);
+ERROR 23000: Failed to insert the record: the key already exists
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+200
+201
+202
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
new file mode 100644
index 00000000000..2a7c7bd69fd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
@@ -0,0 +1,103 @@
+DROP TABLE IF EXISTS t1;
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1;
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+a b
+-3 5
+-1 3
+2 0
+4 -2
+6 -4
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b));
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1;
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1;
+a b
+1 1
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2;
+a b
+1 1
+SELECT * FROM t1;
+a b
+1 1
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+a b
+1 1
+2 2
+DROP TABLE t1, t2;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1");
+CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+DROP TABLE t1, t2, t3;
+SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
new file mode 100644
index 00000000000..d8e5b92e897
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
@@ -0,0 +1,4 @@
+SET rocksdb_bulk_load=1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+ERROR HY000: Error when executing command SET: Cannot change this setting while bulk load is enabled
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
new file mode 100644
index 00000000000..d9d29e6ac69
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS stats_test_table;
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+SET GLOBAL rocksdb_perf_context_level=3;
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+io_write_bytes > 0
+1
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
new file mode 100644
index 00000000000..195215331b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
@@ -0,0 +1,73 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Eligible for optimization, access via fake_id only
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# No longer eligible for optimization since no covered bitmap was stored.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [No longer eligible for optimization since no covered bitmap was stored.: 0 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
new file mode 100644
index 00000000000..39130475349
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
@@ -0,0 +1,1009 @@
+##
+## Using the system variable high_priority_ddl"
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 rename t1_new;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create index idx1 on t1 (i);;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop index idx1 on t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop trigger ins_sum;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
+##
+## Using HIGH_PRIORITY syntax
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 rename t1_new;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority index idx1 on t1 (i);;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority index idx1 on t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate high_priority t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority trigger ins_sum;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+optimize high_priority table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
new file mode 100644
index 00000000000..d7cb89becb7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
@@ -0,0 +1,490 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #1
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #2
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 1;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 for update;
+i
+3
+select * from t where i=2 for update;
+select * from t where i=3 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+3
+rollback;
+i
+2
+rollback;
+set global rocksdb_max_latest_deadlocks = 5;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #5
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=100 for update;
+i
+select * from t where i=101 for update;
+i
+select * from t where i=2 for update;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=200 for update;
+i
+select * from t where i=201 for update;
+i
+select * from t where i=1 lock in share mode;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
index 49df0b28bdf..dba49d8ff41 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
@@ -5,6 +5,9 @@ DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
index 44668847649..fad2939d206 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
@@ -5,6 +5,8 @@ DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
index e5237fe9b1e..7a33fa83cb4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
@@ -1,6 +1,8 @@
call mtr.add_suppression("Column family 'cf1' not found");
call mtr.add_suppression("Column family 'rev:cf2' not found");
DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
index 954335debf2..b4cebb08bb1 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
@@ -178,16 +178,20 @@ id1 id2 id3
9 17 9
DROP TABLE t1;
DROP TABLE t2;
+set global rocksdb_large_prefix=1;
CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+set global rocksdb_large_prefix=1;
CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 9;
SELECT * FROM t1 WHERE id1 = 1;
id1 id2 id3
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
new file mode 100644
index 00000000000..963f9706ee8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
@@ -0,0 +1,80 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Baseline sanity check
+no-op query
+no-op query
+include/assert.inc [Baseline sanity check: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization, access via fake_id only
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+32 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+include/assert.inc [Eligible for optimization.: 1 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+8 bbbbbbbb
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+24 cccccccccccccccccccccccc
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+#
+# Test that multi-byte charsets are handled correctly
+#
+# Charset record obviously shorter than the prefix
+a b
+1 a
+include/assert.inc [Charset record obviously shorter than the prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record shorter than prefix
+a b
+2 cc
+include/assert.inc [Charset record shorter than prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record with glyphs shorter than prefix
+a b
+3 ŽŽ
+include/assert.inc [Charset record with glyphs shorter than prefix: 1 rocksdb_covered_secondary_key_lookups]
+# Charset record longer than prefix
+a b
+4 žžžž
+include/assert.inc [Charset record longer than prefix: 0 rocksdb_covered_secondary_key_lookups]
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
index 2a1fcd78126..a39f2d8c0d6 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
@@ -9,9 +9,9 @@ KEY (x)) ENGINE = ROCKSDB;
SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF
test is_ddl_t1 NULL PRIMARY 1 13 default
-test is_ddl_t1 NULL j 2 12 default
-test is_ddl_t1 NULL k 2 12 kl_cf
+test is_ddl_t1 NULL j 2 13 default
+test is_ddl_t1 NULL k 2 13 kl_cf
test is_ddl_t2 NULL PRIMARY 1 13 zy_cf
-test is_ddl_t2 NULL x 2 12 default
+test is_ddl_t2 NULL x 2 13 default
DROP TABLE is_ddl_t1;
DROP TABLE is_ddl_t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index.result b/storage/rocksdb/mysql-test/rocksdb/r/index.result
index 99390c8ceb2..0920d0e01b2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index.result
@@ -40,6 +40,33 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
#
# Issue #376: MyRocks: ORDER BY optimizer is unable to use the index extension
#
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
index 66481f81c67..1f4c88e5a33 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
@@ -46,3 +46,26 @@ SHOW KEYS IN t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
t1 0 PRIMARY 1 b A # NULL NULL LSMTREE
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
index a604663954b..9eaab9f53aa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
@@ -40,3 +40,30 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
index ae99badff14..bc1a96fa726 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
@@ -40,3 +40,30 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
index 83db308627d..d4d211b9288 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
@@ -17,7 +17,7 @@ id val1 val2
2 2 2
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -25,6 +25,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -48,7 +49,7 @@ id val1 val2
DELETE FROM t1 WHERE id=30;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -61,6 +62,7 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 8, write count 4
insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -68,7 +70,7 @@ END OF ROCKSDB TRANSACTION MONITOR OUTPUT
ROLLBACK;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -76,6 +78,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -84,7 +87,7 @@ START TRANSACTION;
INSERT INTO t1 VALUES(40,40,40);
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -97,6 +100,7 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 2, write count 1
insert count 1, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -104,7 +108,7 @@ END OF ROCKSDB TRANSACTION MONITOR OUTPUT
COMMIT;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -112,6 +116,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -133,7 +138,7 @@ UPDATE t2 SET value=3 WHERE id2=2;
DELETE FROM t2 WHERE id1=10;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -146,6 +151,7 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 9, write count 7
insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
new file mode 100644
index 00000000000..86ba6d923a8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS T1;
+CREATE TABLE T1 (
+P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+KEY SK (S1(8), S2(8), S3(8), S4(8),
+S5(8), S6(8), S7(8), S8(8),
+S9(8), S10(8), S11(8), S12(8),
+S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+'5', '6', '7', '8',
+'9', '10', '11', '12',
+'13', '14', '15', '16',
+'17', '18', '19', '20');
+SELECT * FROM T1;
+P1 P2 P3 P4 S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+# Not eligible for optimization, shorter than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+4 6
+include/assert.inc [Not eligible for optimization, shorter than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+'500000000', '600000000', '700000000', '800000000',
+'900000000', '100000000', '110000000', '120000000',
+'130000000', '140000000', '150000000', '160000000',
+'170000000', '180000000', '190000000', '200000000');
+# Not eligible for optimization, longer than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+include/assert.inc [Not eligible for optimization, longer than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
index 2e8610d43bd..6586b92d129 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
@@ -127,6 +127,8 @@ AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT')
TABLE_SCHEMA TABLE_NAME PARTITION_NAME STAT_TYPE VALUE
test t1 NULL INTERNAL_KEY_SKIPPED_COUNT 10
test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT 0
+set @tmp_flush_log= @@rocksdb_flush_log_at_trx_commit;
+set global rocksdb_flush_log_at_trx_commit=1;
BEGIN;
INSERT INTO t2 VALUES (1), (2);
INSERT INTO t2 VALUES (3), (4);
@@ -158,3 +160,4 @@ true
DROP TABLE t1;
DROP TABLE t2;
SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+set global rocksdb_flush_log_at_trx_commit= @tmp_flush_log;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
index 46acd055421..b3dba1612a5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -881,6 +881,7 @@ rocksdb_block_restart_interval 16
rocksdb_block_size 4096
rocksdb_block_size_deviation 10
rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_unsorted OFF
rocksdb_bulk_load_size 1000
rocksdb_bytes_per_sync 0
rocksdb_cache_index_and_filter_blocks ON
@@ -893,13 +894,16 @@ rocksdb_compaction_sequential_deletes 0
rocksdb_compaction_sequential_deletes_count_sd OFF
rocksdb_compaction_sequential_deletes_file_size 0
rocksdb_compaction_sequential_deletes_window 0
+rocksdb_concurrent_prepare ON
rocksdb_create_checkpoint
rocksdb_create_if_missing ON
rocksdb_create_missing_column_families OFF
rocksdb_datadir ./.rocksdb
rocksdb_db_write_buffer_size 0
rocksdb_deadlock_detect OFF
+rocksdb_deadlock_detect_depth 50
rocksdb_debug_optimizer_no_zero_cardinality ON
+rocksdb_debug_ttl_ignore_pk OFF
rocksdb_debug_ttl_read_filter_ts 0
rocksdb_debug_ttl_rec_ts 0
rocksdb_debug_ttl_snapshot_ts 0
@@ -916,6 +920,7 @@ rocksdb_error_if_exists OFF
rocksdb_flush_log_at_trx_commit 0
rocksdb_flush_memtable_on_analyze ON
rocksdb_force_compute_memtable_stats ON
+rocksdb_force_compute_memtable_stats_cachetime 0
rocksdb_force_flush_memtable_and_lzero_now OFF
rocksdb_force_flush_memtable_now OFF
rocksdb_force_index_records_in_range 0
@@ -925,12 +930,15 @@ rocksdb_info_log_level error_level
rocksdb_io_write_timeout 0
rocksdb_is_fd_close_on_exec ON
rocksdb_keep_log_file_num 1000
+rocksdb_large_prefix OFF
rocksdb_lock_scanned_rows OFF
rocksdb_lock_wait_timeout 1
rocksdb_log_file_time_to_roll 0
rocksdb_manifest_preallocation_size 4194304
+rocksdb_manual_wal_flush ON
rocksdb_master_skip_tx_api OFF
rocksdb_max_background_jobs 2
+rocksdb_max_latest_deadlocks 5
rocksdb_max_log_file_size 0
rocksdb_max_manifest_file_size 18446744073709551615
rocksdb_max_open_files -1
@@ -939,6 +947,7 @@ rocksdb_max_subcompactions 1
rocksdb_max_total_wal_size 0
rocksdb_merge_buf_size 67108864
rocksdb_merge_combine_read_size 1073741824
+rocksdb_merge_tmp_file_removal_delay_ms 0
rocksdb_new_table_reader_for_compaction_inputs OFF
rocksdb_no_block_cache OFF
rocksdb_override_cf_options
@@ -955,6 +964,7 @@ rocksdb_records_in_range 50
rocksdb_reset_stats OFF
rocksdb_seconds_between_stat_computes 3600
rocksdb_signal_drop_index_thread OFF
+rocksdb_sim_cache_size 0
rocksdb_skip_bloom_filter_on_read OFF
rocksdb_skip_fill_cache OFF
rocksdb_skip_unique_check_tables .*
@@ -970,6 +980,7 @@ rocksdb_trace_sst_api OFF
rocksdb_unsafe_for_binlog OFF
rocksdb_update_cf_options
rocksdb_use_adaptive_mutex OFF
+rocksdb_use_clock_cache OFF
rocksdb_use_direct_io_for_flush_and_compaction OFF
rocksdb_use_direct_reads OFF
rocksdb_use_fsync OFF
@@ -1340,7 +1351,6 @@ insert into t1 select (@a:=@a+1), 1234 from information_schema.session_variables
set @tmp1= @@rocksdb_max_row_locks;
set rocksdb_max_row_locks= 20;
update t1 set a=a+10;
-ERROR HY000: Status error 10 received from RocksDB: Operation aborted: Failed to acquire lock due to max_num_locks limit
DROP TABLE t1;
#
# Test AUTO_INCREMENT behavior problem,
@@ -1452,7 +1462,6 @@ Rocksdb_rows_read #
Rocksdb_rows_updated #
Rocksdb_rows_deleted_blind #
rocksdb_rows_expired #
-Rocksdb_system_rows_deleted #
Rocksdb_system_rows_inserted #
Rocksdb_system_rows_read #
Rocksdb_system_rows_updated #
@@ -1462,6 +1471,11 @@ rocksdb_memtable_unflushed #
rocksdb_queries_point #
rocksdb_queries_range #
Rocksdb_block_cache_hit #
+rocksdb_memtable_total #
+rocksdb_memtable_unflushed #
+rocksdb_queries_point #
+rocksdb_queries_range #
+rocksdb_covered_secondary_key_lookups #
Rocksdb_block_cache_index_hit #
Rocksdb_block_cache_index_miss #
Rocksdb_block_cache_miss #
@@ -1513,6 +1527,16 @@ Rocksdb_write_other #
Rocksdb_write_self #
Rocksdb_write_timedout #
Rocksdb_write_wal #
+rocksdb_stall_l0_file_count_limit_slowdowns #
+rocksdb_stall_locked_l0_file_count_limit_slowdowns #
+rocksdb_stall_l0_file_count_limit_stops #
+rocksdb_stall_locked_l0_file_count_limit_stops #
+rocksdb_stall_pending_compaction_limit_stops #
+rocksdb_stall_pending_compaction_limit_slowdowns #
+rocksdb_stall_memtable_limit_stops #
+rocksdb_stall_memtable_limit_slowdowns #
+rocksdb_stall_total_stops #
+rocksdb_stall_total_slowdowns #
rocksdb_stall_micros #
select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%';
VARIABLE_NAME
@@ -1530,6 +1554,7 @@ ROCKSDB_MEMTABLE_TOTAL
ROCKSDB_MEMTABLE_UNFLUSHED
ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
@@ -1580,6 +1605,16 @@ ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
ROCKSDB_RATE_LIMIT_DELAY_MILLIS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
ROCKSDB_STALL_MICROS
ROCKSDB_WAL_BYTES
ROCKSDB_WAL_GROUP_SYNCS
@@ -1606,6 +1641,7 @@ ROCKSDB_MEMTABLE_TOTAL
ROCKSDB_MEMTABLE_UNFLUSHED
ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
@@ -1656,6 +1692,16 @@ ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
ROCKSDB_RATE_LIMIT_DELAY_MILLIS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
ROCKSDB_STALL_MICROS
ROCKSDB_WAL_BYTES
ROCKSDB_WAL_GROUP_SYNCS
@@ -2155,7 +2201,9 @@ SET @old_mode = @@sql_mode;
SET sql_mode = 'strict_all_tables';
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
drop table t1;
+set global rocksdb_large_prefix=1;
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
select * from t1;
a b c
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
index d1072eee4ad..1bcd3692b4a 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
@@ -7,17 +7,8 @@ CREATE TABLE t2 (j INT, PRIMARY KEY (j) COMMENT 'rev:cf_t2') ENGINE = ROCKSDB;
CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
PARTITION BY KEY(l) PARTITIONS 4;
-SET GLOBAL rocksdb_force_flush_memtable_now=1;
-SHOW ENGINE rocksdb STATUS;
-Type Name Status
-STATISTICS # #
-DBSTATS # #
-CF_COMPACTION # #
-CF_COMPACTION # #
-CF_COMPACTION # #
-CF_COMPACTION # #
-MEMORY_STATS # #
-BG_THREADS # #
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
INSERT INTO t1 VALUES (1), (2), (3);
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -30,6 +21,19 @@ INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
SELECT COUNT(*) FROM t4;
COUNT(*)
5
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+SHOW ENGINE rocksdb STATUS;
+Type Name Status
+STATISTICS # #
+DBSTATS # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+MEMORY_STATS # #
+BG_THREADS # #
+BG_THREADS # #
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
CF_NAME STAT_TYPE VALUE
__system__ NUM_IMMUTABLE_MEM_TABLE #
@@ -356,7 +360,7 @@ Type Name Status
SHOW ENGINE ALL MUTEX;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -364,6 +368,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -371,7 +376,7 @@ END OF ROCKSDB TRANSACTION MONITOR OUTPUT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -384,8 +389,10 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 0, write count 0
insert count 0, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
ROLLBACK;
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
new file mode 100644
index 00000000000..7642dcda43f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
@@ -0,0 +1,4 @@
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+set session debug_dbug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug_dbug= "-d,gen_sql_table_name";
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
index 813f651be62..eda560fefdb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
@@ -1,6 +1,8 @@
call mtr.add_suppression("Column family 'cf1' not found");
call mtr.add_suppression("Column family 'rev:cf2' not found");
DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
index 79ed7ec0396..2903e9aa7eb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
@@ -337,9 +337,7 @@ b INT
) ENGINE=rocksdb
COMMENT='ttl_duration=100;';
ALTER TABLE t1 DROP PRIMARY KEY;
-ERROR HY000: TTL support is currently disabled when table has secondary indexes or hidden PK.
-ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
-ERROR HY000: TTL support is currently disabled when table has secondary indexes or hidden PK.
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
DROP TABLE t1;
CREATE TABLE t1 (
a INT PRIMARY KEY,
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
new file mode 100644
index 00000000000..1f748a3841a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
@@ -0,0 +1,709 @@
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int,
+c int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+2
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20),
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts int,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+ERROR HY000: TTL duration (abc) in MyRocks must be an unsigned non-null 64-bit integer.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) NOT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='ttl_duration=1'
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+ALTER TABLE t1 DROP PRIMARY KEY;
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int,
+PRIMARY KEY (a,b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+UPDATE t1 SET a=a+1;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+10 0
+2 0
+4 0
+6 0
+8 0
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT * FROM t1;
+a b
+10 0
+8 0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+6
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
new file mode 100644
index 00000000000..90de5447891
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
@@ -0,0 +1,510 @@
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b BIGINT UNSIGNED NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# 1 should be hidden
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# none should be hidden yet, compaction runs but records aren't expired
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+# all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+# should return everything
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+# Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+a int,
+b int,
+c int,
+PRIMARY KEY (a,b,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+min(a)
+NULL
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+max(a)
+NULL
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+a
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+a
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+8
+DROP TABLE t1;
+# Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT * FROM t1;
+a
+# No error is thrown here, under the hood index_next_with_direction is
+# filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+# Ensure no rows can disappear in the middle of long-running transactions
+# Also ensure repeatable-read works as expected
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 values (1, 1);
+connection con1;
+# Creating Snapshot (start transaction)
+BEGIN;
+# Nothing filtered out here
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+# Switching to connection 2
+connection con2;
+# compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# read filtered out, because on a different connection, on
+# this connection the records have 'expired' already so they are filtered out
+# even though they have not yet been removed by compaction
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# Switching to connection 1
+connection con1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+UPDATE t1 set a = a + 1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+2 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+2 1
+COMMIT;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+# On Connection 1
+connection con1;
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# On Connection 1
+connection con1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+connection con2;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+disconnect con2;
+disconnect con1;
+connection default;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+10 10
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (c1, c2),
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+c1 c2 name
+4 4 d
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+c1 c2 name
+4 4 d
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# Should see everything
+SELECT * FROM t1;
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+11 11
+12 12
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+# Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+a b
+3 3
+4 4
+5 5
+6 6
+8 8
+9 9
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+a b
+6 6
+8 8
+9 9
+13 13
+14 14
+# Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+a b
+5 5
+6 6
+9 9
+13 13
+14 14
+15 15
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+a b
+5 5
+6 6
+9 9
+# Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+a b
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
new file mode 100644
index 00000000000..e4c361576f5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
@@ -0,0 +1,82 @@
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
new file mode 100644
index 00000000000..713c7e92fa8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
@@ -0,0 +1,389 @@
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`),
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` int(11) NOT NULL,
+ `c2` int(11) NOT NULL,
+ `name` varchar(25) NOT NULL,
+ `event` date DEFAULT NULL,
+ PRIMARY KEY (`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY `kc2` (`c2`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;'
+ PARTITION BY LIST (`c1`)
+(PARTITION `custom_p0` VALUES IN (1,2,3) ENGINE = ROCKSDB,
+ PARTITION `custom_p1` VALUES IN (4,5,6) ENGINE = ROCKSDB,
+ PARTITION `custom_p2` VALUES IN (7,8,9) ENGINE = ROCKSDB)
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+4
+5
+6
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+4
+5
+6
+7
+8
+9
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+7
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+SELECT * FROM t1;
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+CREATE INDEX kc2 on t1 (c2);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
index 9eed611a970..ee23446eec0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
@@ -2,35 +2,26 @@ SET GLOBAL rocksdb_write_disable_wal=false;
SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
-0
-insert aaa(id, i) values(2,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
-0
-insert aaa(id, i) values(3,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
-0
-SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
-insert aaa(id, i) values(4,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
1
-insert aaa(id, i) values(5,1);
+insert aaa(id, i) values(2,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
2
-insert aaa(id, i) values(6,1);
+insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
3
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(4,1);
SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
-insert aaa(id, i) values(7,1);
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(5,1);
truncate table aaa;
drop table aaa;
set @@global.rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
diff --git a/storage/rocksdb/mysql-test/rocksdb/suite.pm b/storage/rocksdb/mysql-test/rocksdb/suite.pm
index 79c630f87f1..133d9344414 100644
--- a/storage/rocksdb/mysql-test/rocksdb/suite.pm
+++ b/storage/rocksdb/mysql-test/rocksdb/suite.pm
@@ -1,5 +1,7 @@
package My::Suite::Rocksdb;
+use My::SysInfo;
+
#
# Note: ../rocksdb_sys_vars/suite.pm file has a similar
# function. If you modify this file, consider modifying that one, too.
@@ -21,5 +23,7 @@ $ENV{MARIAROCKS_SST_DUMP}="$sst_dump";
# Temporarily disable testing under valgrind, due to MDEV-12439
return "RocksDB tests disabled under valgrind" if ($::opt_valgrind);
+return "Temporarily disabled on Windows due to MDEV-13852" if (My::SysInfo::IS_WINDOWS);
+
bless { };
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
index 009fd7beaf2..923c9db17f2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
@@ -13,7 +13,7 @@ USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
--echo # 2PC enabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
--echo ## 2PC + durability + single thread
@@ -28,7 +28,7 @@ select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true'
--echo # 2PC enabled, MyRocks durability disabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
@@ -41,7 +41,7 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa
--echo # 2PC disabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
index 7c1af1e38df..c1a91c2a5a2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
@@ -190,8 +190,8 @@ connect (con1,localhost,root,,);
--echo # Switch to connection con1
connection con1;
-show global variables like 'rocksdb_bulk_load';
-show session variables like 'rocksdb_bulk_load';
+show global variables like 'rocksdb_bulk_load%';
+show session variables like 'rocksdb_bulk_load%';
CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
@@ -259,9 +259,9 @@ SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
SET global rocksdb_strict_collation_check = off;
SET session rocksdb_merge_combine_read_size = 566;
-SET session rocksdb_merge_buf_size = 336;
+SET session rocksdb_merge_buf_size = 340;
-show variables like '%rocksdb_bulk_load%';
+show variables like 'rocksdb_bulk_load%';
CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
index bb3d8164200..4a4c42d1fcd 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
@@ -1,5 +1,6 @@
--source include/have_rocksdb.inc
+### Bulk load ###
CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
# Make sure we get an error with out of order keys during bulk load
@@ -33,4 +34,29 @@ select @@rocksdb_bulk_load;
call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+TRUNCATE TABLE t1;
+
+### Bulk load with unsorted PKs ###
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+# We should not get an error with out of order PKs
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+TRUNCATE TABLE t1;
+
+# We should get an error with duplicate PKs in the same bulk load
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES(201);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
new file mode 100644
index 00000000000..9d9433eaafa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
@@ -0,0 +1,137 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--let pk_cf=cf1
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+### Test individual INSERTs ###
+
+# A table with only a PK won't have rows until the bulk load is finished
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# A table with a PK and a SK shows rows immediately
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b));
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+
+# Inserting into another table finishes bulk load to the previous table
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1;
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2;
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+DROP TABLE t1, t2;
+
+### Test bulk load from a file ###
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf");
+eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+# Create a text file with data to import into the table.
+# PK and SK are not in any order
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+my $max = 5000000;
+my $sign = 1;
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $a = 1 + $sign * $ii;
+ my $b = 1 - $sign * $ii;
+ print $fh "$a\t$b\n";
+}
+close($fh);
+EOF
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(a) from t1;
+select count(b) from t1;
+select count(a) from t2;
+select count(b) from t2;
+select count(a) from t3;
+select count(b) from t3;
+
+DROP TABLE t1, t2, t3;
+SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
new file mode 100644
index 00000000000..eee4f713a9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+--source include/not_debug.inc
+
+# Cannot change unsorted input preference during bulk load
+SET rocksdb_bulk_load=1;
+--error ER_ERROR_WHEN_EXECUTING_COMMAND
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
new file mode 100644
index 00000000000..f0361707355
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS stats_test_table;
+--enable_warnings
+
+# Create the table
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+
+# Ensure appropriate perf_context_level is set
+SET GLOBAL rocksdb_perf_context_level=3;
+
+# Insert some values
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+
+# Verify the bytes written are updated in the table stats
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+
+# Cleanup
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
new file mode 100644
index 00000000000..ccd91a61ff0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
@@ -0,0 +1,79 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/not_debug.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+#
+# Normal case
+#
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+#
+# Create same table with older format to test compatibility
+#
+
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= No longer eligible for optimization since no covered bitmap was stored.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
new file mode 100644
index 00000000000..01bb5cfa4ed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
@@ -0,0 +1,18 @@
+# Test high priority DDL
+# There are two modes of high priority DDL:
+# 1. Through the system variable high_priority_ddl
+# 2. Through syntax: CREATE/ALTER/DROP/OPTIMIZE HIGH_PRIORITY ...
+
+--source include/have_rocksdb.inc
+
+--echo ##
+--echo ## Using the system variable high_priority_ddl"
+--echo ##
+--let $use_sys_var = 1;
+--source include/ddl_high_priority.inc
+
+--echo ##
+--echo ## Using HIGH_PRIORITY syntax
+--echo ##
+--let $use_sys_var = 0;
+--source include/ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
new file mode 100644
index 00000000000..d2abcb3b63b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
@@ -0,0 +1,153 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+eval create table t (i int primary key) engine=$engine;
+insert into t values (1), (2), (3);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+show engine rocksdb transaction status;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
index 842b85f87c9..6a352e9511b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
+++ b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
@@ -1,3 +1,8 @@
+##
+## See also: storage/rocksdb/mysql-test/rocksdb/suite.pm - tests on
+## platforms and under valgrind are disabled from there.
+##
+
cons_snapshot_serializable : Consistent read does not work on serializable
level_read_uncommitted : Not supported
level_serializable: Not supported
@@ -75,3 +80,7 @@ type_float : MDEV-12474 - Fails in fulltest
native_procedure : Not supported in MariaDB
+ddl_high_priority: Needs fractional @@lock_wait_timeout
+deadlock_tracking : Needs SHOW ENGINE ROCKSDB TRANSACTION STATUS
+bytes_written: Needs I_S.TABLE_STATISTICS.IO_WRITE_BYTES
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
index 09725adc558..0d3dee5ab17 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
@@ -10,6 +10,10 @@ DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
@@ -107,5 +111,8 @@ let $wait_condition = select count(*) = 0
where TYPE = 'DDL_DROP_INDEX_ONGOING';
--source include/wait_condition.inc
+## Upstream has removed the following:
+--disable_parsing
+--enable_parsing
# Cleanup
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
index 3991a8bd00d..14d856cc0c5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
@@ -11,6 +11,9 @@ DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
index 7a643d9a720..1a044384a45 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
@@ -7,6 +7,9 @@ call mtr.add_suppression("Column family 'rev:cf2' not found");
DROP TABLE IF EXISTS t1;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
index a9a09d44e01..b4a0c9e5e96 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
@@ -22,17 +22,21 @@ CREATE TABLE t2 (id1 INT, id2 INT, id3 INT,
DROP TABLE t1;
DROP TABLE t2;
+set global rocksdb_large_prefix=1;
CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+set global rocksdb_large_prefix=1;
CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
--source include/dup_key_update.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
new file mode 100644
index 00000000000..0b6cf42aff0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
@@ -0,0 +1,120 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/not_debug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--disable_query_log
+
+--let $prefix_index_check_title= Baseline sanity check
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT "no-op query"
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('d', 31)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+# The secondary_index_read call isn't covered because the next record in the
+# index has a bigfield value of length 33, so only one of two lookups are
+# covered here.
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 1
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('x', 32)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('y', 33)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('b', 8)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('c', 24)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('z', 128)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+--echo #
+--echo # Test that multi-byte charsets are handled correctly
+--echo #
+
+SET NAMES utf8;
+
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b VARCHAR(30) CHARACTER SET utf8 collate utf8_bin,
+ KEY k (b(2))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES
+ (1, 'a'),
+ (2, 'cc'),
+ (3, 'ŽŽ'),
+ (4, 'žžžž');
+
+--let $prefix_index_check_title= Charset record obviously shorter than the prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "a"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "cc"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record with glyphs shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "ŽŽ"
+--let $prefix_index_check_read_avoided_delta= 1
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record longer than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "žžžž"
+--let $prefix_index_check_read_avoided_delta= 0
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index.inc b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
index 6b4e4ff233b..8b000a255b3 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
@@ -119,3 +119,37 @@ DROP TABLE t1;
--enable_parsing
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(768))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3073))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
index 3abd2dd05fe..5dcfbaa8d3b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
@@ -62,3 +62,35 @@ ALTER TABLE t1 ADD CONSTRAINT PRIMARY KEY pk (a);
SHOW KEYS IN t1;
DROP TABLE t1;
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
new file mode 100644
index 00000000000..cb65cf91f05
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
@@ -0,0 +1,49 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/not_debug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS T1;
+--enable_warnings
+
+CREATE TABLE T1 (
+ P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+ S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+ S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+ S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+ S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+ PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+ KEY SK (S1(8), S2(8), S3(8), S4(8),
+ S5(8), S6(8), S7(8), S8(8),
+ S9(8), S10(8), S11(8), S12(8),
+ S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+ '5', '6', '7', '8',
+ '9', '10', '11', '12',
+ '13', '14', '15', '16',
+ '17', '18', '19', '20');
+
+SELECT * FROM T1;
+
+--let $prefix_index_check_title= Not eligible for optimization, shorter than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+ '500000000', '600000000', '700000000', '800000000',
+ '900000000', '100000000', '110000000', '120000000',
+ '130000000', '140000000', '150000000', '160000000',
+ '170000000', '180000000', '190000000', '200000000');
+
+--let $prefix_index_check_title= Not eligible for optimization, longer than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/perf_context.test b/storage/rocksdb/mysql-test/rocksdb/t/perf_context.test
index 4290811e868..ee41324a34d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/perf_context.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/perf_context.test
@@ -56,6 +56,8 @@ AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT')
# Statistics for multi-statement transactions cannot be attributed to
# individual tables but should show up in global perf context stats
+set @tmp_flush_log= @@rocksdb_flush_log_at_trx_commit;
+set global rocksdb_flush_log_at_trx_commit=1;
BEGIN;
INSERT INTO t2 VALUES (1), (2);
@@ -90,3 +92,5 @@ SELECT CASE WHEN @b - @a > 0 THEN 'true' ELSE 'false' END;
DROP TABLE t1;
DROP TABLE t2;
SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+set global rocksdb_flush_log_at_trx_commit= @tmp_flush_log;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
index a090a523c41..0089b62e0b9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -1663,7 +1663,9 @@ SET @old_mode = @@sql_mode;
SET sql_mode = 'strict_all_tables';
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
drop table t1;
+set global rocksdb_large_prefix=1;
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
select * from t1;
--replace_column 9 #
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
index 5a9c80188a9..06f64ddb3fa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
@@ -26,10 +26,8 @@ CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
PARTITION BY KEY(l) PARTITIONS 4;
-SET GLOBAL rocksdb_force_flush_memtable_now=1;
-
---replace_column 2 # 3 #
-SHOW ENGINE rocksdb STATUS;
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
INSERT INTO t1 VALUES (1), (2), (3);
SELECT COUNT(*) FROM t1;
@@ -40,6 +38,11 @@ SELECT COUNT(*) FROM t2;
INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
SELECT COUNT(*) FROM t4;
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+--replace_column 2 # 3 #
+SHOW ENGINE rocksdb STATUS;
+
# Fetch data from information schema as well
--replace_column 3 #
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
@@ -83,3 +86,5 @@ SHOW ENGINE rocksdb TRANSACTION STATUS;
ROLLBACK;
+# Restore old values
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
new file mode 100644
index 00000000000..945b0079cce
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
@@ -0,0 +1,39 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+# Write file to make mysql-test-run.pl expect the "crash", but don't restart the
+# server until it is told to
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+
+# Create a .frm file without a matching table
+--exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm
+
+# Restart the server with a .frm file exist but that table is not registered in RocksDB
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# This will append '#sql-test' to the end of new name
+set session debug_dbug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug_dbug= "-d,gen_sql_table_name";
+
+# Remove the corresponding .frm files
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm
+
+# Restart the server with a table registered in RocksDB but does not have a .frm file
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
index 833b6fdd91a..38bfb2eef8f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
@@ -374,8 +374,6 @@ COMMENT='ttl_duration=100;';
--error ER_RDB_TTL_UNSUPPORTED
ALTER TABLE t1 DROP PRIMARY KEY;
---error ER_RDB_TTL_UNSUPPORTED
-ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
new file mode 100644
index 00000000000..fb439e109e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
@@ -0,0 +1,780 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Basic TTL test, pk ignored, no sk
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# no rows should be filtered
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Basic TTL test
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# column before TTL in value
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# nullable column(s) before TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int,
+ c int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# variable len columns + null column(s) before TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL implicitly generated (no ttl column)
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL field as the PK
+CREATE TABLE t1 (
+ a int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should all still be there..
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+DROP TABLE t1;
+
+# TTL field inside multi-part pk
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field inside key with variable length things..
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL test where you compact (values still exist), real_sleep, then compact again,
+# values should now be gone.
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# Nothing should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 1 and 2 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 3 and 4 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# TTL field with nullable ttl column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20),
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL field with non 8-bit integer column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts int,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL duration as some random garbage value
+--error ER_RDB_TTL_DURATION_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+
+# TTL col is some column outside of the table
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+
+# TTL col must have accompanying duration
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+
+# Make sure it doesn't filter out things early
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Testing altering table comment with updated TTL duration
+# This should trigger a rebuild of the table
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Tables with hidden PK disabled
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+--error ER_RDB_TTL_UNSUPPORTED
+ALTER TABLE t1 DROP PRIMARY KEY;
+
+DROP TABLE t1;
+
+# Test replacing PK, ttl should still work after
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Make sure table comment filled with other text before/after will work
+# (basically, it needs semicolon before and after)
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int,
+ PRIMARY KEY (a,b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+# nothing removed here
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+# all removed here
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test to make sure that TTL retains original timestamp during update
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+UPDATE t1 SET a=a+1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 1,3,5 should be dropped
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Test rows expired stat variable and disable ttl variable
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Table with TTL won't increment rows expired when no records have been
+# compacted
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test update on TTL column in pk
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 7 should be gone here
+--sorted_result
+SELECT a FROM t1;
+DROP TABLE t1;
+
+# Add index inplace
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, implicit TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, TTL column in PK
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
new file mode 100644
index 00000000000..aefc2f5da34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
new file mode 100644
index 00000000000..d6be7d95f8d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
@@ -0,0 +1,500 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# The purpose of read filtering for tables with TTL is to ensure that during a
+# transaction a key which has expired already but not removed by compaction
+# yet, is not returned to the user.
+#
+# Without this the user might be hit with problems such as disappearing rows
+# within a transaction, etc, because the compaction filter ignores snapshots
+# when filtering keys.
+
+# Basic read filtering test
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test that some rows are hidden but others aren't...
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b BIGINT UNSIGNED NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # 1 should be hidden
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+--echo # none should be hidden yet, compaction runs but records aren't expired
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+--echo # all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+
+DROP TABLE t1;
+
+# Test the filtering code explicitly.
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# disable filtering
+set global rocksdb_enable_ttl_read_filtering=0;
+
+--echo # should return everything
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# enable filtering
+set global rocksdb_enable_ttl_read_filtering=1;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+--echo # Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+ a int,
+ b int,
+ c int,
+ PRIMARY KEY (a,b,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# HA_READ_KEY_EXACT, using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+
+# HA_READ_KEY_EXACT, not using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+
+# HA_READ_BEFORE_KEY, not using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+
+# HA_READ_BEFORE_KEY, using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+
+# HA_READ_KEY_OR_NEXT
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+
+# HA_READ_AFTER_KEY, /* Find next rec. after key-record */
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+
+# HA_READ_PREFIX_LAST, /* Last key with the same prefix */
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+
+# HA_READ_PREFIX_LAST_OR_PREV, /* Last or prev key with the same prefix */
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+
+# need to test read_range_first()
+# calls into read_range_next() and uses compare_keys() to see if its out of
+# range
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+DROP TABLE t1;
+
+--echo # Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1;
+
+--echo # No error is thrown here, under the hood index_next_with_direction is
+--echo # filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+
+--echo # Ensure no rows can disappear in the middle of long-running transactions
+--echo # Also ensure repeatable-read works as expected
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 values (1, 1);
+
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+
+# We need the below snippet in case establishing con1 took an arbitrary
+# amount of time. See https://github.com/facebook/mysql-5.6/pull/617#discussion_r120525391.
+--disable_query_log
+--let $snapshot_size= `SELECT COUNT(*) FROM t1`
+--let $i= 0
+while ($snapshot_size != 1)
+{
+ if ($i == 1000)
+ {
+ --die Your testing host is too slow for reasonable TTL testing
+ }
+
+ $i++;
+ ROLLBACK;
+ INSERT INTO t1 values (1,1);
+ BEGIN;
+ --let $snapshot_size= `SELECT COUNT(*) FROM t1`
+}
+--enable_query_log
+
+--echo # Nothing filtered out here
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--sleep 5
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+--echo # Switching to connection 2
+connection con2;
+--echo # compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+--echo # read filtered out, because on a different connection, on
+--echo # this connection the records have 'expired' already so they are filtered out
+--echo # even though they have not yet been removed by compaction
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # Switching to connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+UPDATE t1 set a = a + 1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+COMMIT;
+
+--sorted_result # <= filtered out here because time has passed.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+
+#transaction 1, create a snapshot and select * => returns nothing.
+#transaction 2, insert into table, flush
+#transaction 1, select * => returns nothing, but the snapshot should prevent the compaction code from removing the rows, no matter what the ttl duration is.
+#transaction 2, select * -> sees nothing, disable filter, select * -> sees everything, enable filter, select * -> sees nothing.
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+--echo # On Connection 1
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+# Sleep 5 secs after creating snapshot, this ensures any records created after
+# this can't be removed by compaction until this snapshot is released.
+--sleep 5
+
+--echo # On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # On Connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # On Connection 2
+connection con2;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=1;
+
+disconnect con2;
+disconnect con1;
+connection default;
+
+DROP TABLE t1;
+# Wait till we reached the initial number of concurrent sessions
+--source include/wait_until_count_sessions.inc
+
+# Test that index_next_with_direction skips records properly
+CREATE TABLE t1 (
+ a int,
+ b int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+--echo # None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test range scans with various conditionals
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (c1, c2),
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+
+DROP TABLE t1;
+
+# Test range scans with varying expirations
+CREATE TABLE t1 (
+ a int,
+ b int,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # Should see everything
+SELECT * FROM t1;
+
+--echo # Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+
+--echo # Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+
+--echo # Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
new file mode 100644
index 00000000000..4f9788ce33c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
@@ -0,0 +1,87 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Multiple indexes, trigger compaction on sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
new file mode 100644
index 00000000000..10a88c30361
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
@@ -0,0 +1 @@
+--rocksdb_enable_ttl_read_filtering=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
new file mode 100644
index 00000000000..c10c83f9f9d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
@@ -0,0 +1,300 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# Create a table with multiple partitions, but in the comment don't specify
+# that per-partition based column families (CF) should be created. Expect that
+# default CF will be used and new one won't be created.
+#
+# In addition, specify TTL on one of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+# Add secondary keys to all tables to test compatibility.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`),
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
+
+#
+# Create a table with multiple partitions and request for separate CF to be
+# created per every partition. As a result we expect three different CF-s to be
+# created.
+#
+# In addition, specify TTL on some of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# Now 2,5,8 should be removed (this verifies that TTL is only operating on the
+# particular CF.
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s/TTL per partition and verify that ALTER TABLE + DROP
+# PRIMARY, ADD PRIMARY work for that scenario and data is persisted/filtered as
+# expected.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# TTL should be reset after alter table
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+# ...so nothing should be gone here
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with non-partitioned TTL duration, with partitioned TTL
+# columns
+#
+# In this case the same TTL duration will be applied across different TTL
+# columns in different partitions, except for in p2 where we override the ttl
+# duration.
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+
+# here we expect only 1,2,3 to be gone, ttl implicit.
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# here we expect only 4,5,6 to be gone, ttl based on column c2.
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# at this point only 7,8,9 should be left..
+DROP TABLE t1;
+
+#
+# Add index inplace
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+
+# Nothing filtered out
+--sorted_result
+SELECT * FROM t1;
+
+CREATE INDEX kc2 on t1 (c2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
index cc7c0502bc0..7a053c659b2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
@@ -31,7 +31,6 @@ select plugin_name, plugin_type from information_schema.plugins where plugin_nam
--echo #
--echo # Now, repeat the same with another set of invalid arguments
--echo #
-
--let $_mysqld_option=--rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1 --default-storage-engine=myisam
--source include/restart_mysqld_with_option.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
index ca953afd65d..c20bb1fc89c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
@@ -6,8 +6,8 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
-sleep 30;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+--exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
@@ -16,19 +16,22 @@ select variable_value-@a from information_schema.global_status where variable_na
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+--exec sleep 5
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(4,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-insert aaa(id, i) values(5,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-insert aaa(id, i) values(6,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+
+let $status_var=rocksdb_wal_synced;
+let $status_var_value=`select @a+1`;
+source include/wait_for_status_var.inc;
SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
-insert aaa(id, i) values(7,1);
+--exec sleep 5
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(5,1);
let $status_var=rocksdb_wal_synced;
-let $status_var_value=`select @a+4`;
+let $status_var_value=`select @a+1`;
source include/wait_for_status_var.inc;
truncate table aaa;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
index 7a7400f17e1..493107ec071 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
@@ -1,4 +1,5 @@
DROP TABLE IF EXISTS t1;
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
'con1'
@@ -7,12 +8,10 @@ SET DEBUG_SYNC='rocksdb.prepared SIGNAL parked WAIT_FOR go';
insert into t1 values (1, 1, "iamtheogthealphaandomega");;
'con2'
insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
-SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
-SET GLOBAL ROCKSDB_WRITE_SYNC = OFF;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
SET GLOBAL SYNC_BINLOG = 0;
SET DEBUG_SYNC='now WAIT_FOR parked';
-SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
-SET GLOBAL ROCKSDB_WRITE_SYNC = ON;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
SET DEBUG_SYNC='now SIGNAL go';
@@ -23,5 +22,5 @@ a b c
1 1 iamtheogthealphaandomega
select count(*) from t1;
count(*)
-1000000
+4096
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
new file mode 100644
index 00000000000..8a1fd1b94e0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
@@ -0,0 +1,39 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+include/rpl_connect.inc [creating slave_block]
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection slave
+select * from t1;
+i
+1
+2
+3
+connection slave_block
+lock tables t1 read;
+connection master;
+create high_priority index idx1 on t1 (i);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+connection slave;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
index 0f68de04712..6143824eea6 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
@@ -3,6 +3,11 @@
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/big_test.inc
+# The test involves a crash which does not seem to be handled well with
+# mysql-test/lib/My/SafeProcess/my_safe_process under valgrind as it hangs
+# forever. The test did not mean to verify the memory leaks so not much
+# coverage should be missed by not running it under valgrind.
+--source include/not_valgrind.inc
--exec echo > $MYSQLTEST_VARDIR/log/mysqld.1.err
@@ -10,16 +15,18 @@
DROP TABLE IF EXISTS t1;
--enable_warnings
+# Set it to the minimum so that we can make the binlog rotate with a few inserts
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
connect (con1, localhost, root,,);
connect (con2, localhost, root,,);
-# On connection one we insert a row and pause after commit marker is written to WAL.
-# Connection two then inserts many rows. After connection two
-# completes connection one continues only to crash before commit but after
-# binlog write. On crash recovery we see that connection one's value
+# On connection one we insert a row and pause after prepare marker is written to
+# WAL. Connection two then inserts many rows to rotate the binlog. After
+# connection two completes, connection one continues only to crash before commit
+# but after binlog write. On crash recovery we see that connection one's value
# has been recovered and commited
connection con1;
--echo 'con1'
@@ -35,14 +42,14 @@ insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
# Disable 2PC and syncing for faster inserting of dummy rows
# These rows only purpose is to rotate the binlog
-SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
-SET GLOBAL ROCKSDB_WRITE_SYNC = OFF;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
SET GLOBAL SYNC_BINLOG = 0;
SET DEBUG_SYNC='now WAIT_FOR parked';
--disable_query_log
--let $pk= 3
-while ($pk < 1000000) {
+# binlog size is 4096 bytes so with that many insertion it will definitely rotate
+while ($pk < 4096) {
eval insert into t1 values ($pk, 1, "foobardatagoesheresothatmorelogsrollwhichiswhatwewant");
--inc $pk
}
@@ -50,18 +57,16 @@ while ($pk < 1000000) {
# re-enable 2PC an syncing then write to trigger a flush
# before we trigger the crash to simulate full-durability
-SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
-SET GLOBAL ROCKSDB_WRITE_SYNC = ON;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
SET DEBUG_SYNC='now SIGNAL go';
-
+--source include/wait_until_disconnected.inc
--enable_reconnect
--source include/wait_until_connected_again.inc
-
---exec sleep 60
+--disable_reconnect
--exec python suite/rocksdb/t/check_log_for_xa.py $MYSQLTEST_VARDIR/log/mysqld.1.err commit,prepare,rollback
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
new file mode 100644
index 00000000000..7cf4a4d32b5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
@@ -0,0 +1,2 @@
+--source include/have_rocksdb.inc
+--source include/rpl_ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
index 1e9b0a9d3bb..20c2d025e0c 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
@@ -8,3 +8,4 @@ default-storage-engine=rocksdb
sql-mode=NO_ENGINE_SUBSTITUTION
explicit-defaults-for-timestamp=1
loose-rocksdb_lock_wait_timeout=1
+loose-rocksdb_force_compute_memtable_stats_cachetime=0
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
new file mode 100644
index 00000000000..a59ba561181
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'aaa'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'bbb'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_global_value;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_session_value;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
new file mode 100644
index 00000000000..11d4f2363f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444;
+ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
new file mode 100644
index 00000000000..6f05268745d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
@@ -0,0 +1,79 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_global_value;
+@start_global_value
+50
+SET @start_session_value = @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_session_value;
+@start_session_value
+50
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 'aaa'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to '123'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_global_value;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_session_value;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
new file mode 100644
index 00000000000..1d8eb721c1b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 1"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 1;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 0"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 0;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to on"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = on;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_ignore_pk' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'bbb'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
index a1c4d3caaa4..90fd829e7c3 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
@@ -1,7 +1,7 @@
DROP TABLE IF EXISTS t;
CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
-SET @ORIG_PAUSE_BACKGROUND_WORK = @@rocksdb_force_compute_memtable_stats;
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
set global rocksdb_force_flush_memtable_now = true;
INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
set global rocksdb_force_compute_memtable_stats=0;
@@ -12,4 +12,4 @@ select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' e
case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end
true
DROP TABLE t;
-set global rocksdb_force_compute_memtable_stats = @ORIG_PAUSE_BACKGROUND_WORK;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
new file mode 100644
index 00000000000..50e06b5bacb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
@@ -0,0 +1,68 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 0"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 0;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1024"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1024;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1073741824"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1073741824;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1073741824
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 444;
+ERROR HY000: Variable 'rocksdb_force_compute_memtable_stats_cachetime' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 'aaa'"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
new file mode 100644
index 00000000000..89697683d1c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_LARGE_PREFIX;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 1"
+SET @@global.ROCKSDB_LARGE_PREFIX = 1;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 0"
+SET @@global.ROCKSDB_LARGE_PREFIX = 0;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to on"
+SET @@global.ROCKSDB_LARGE_PREFIX = on;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@session.ROCKSDB_LARGE_PREFIX to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_LARGE_PREFIX = 444;
+ERROR HY000: Variable 'rocksdb_large_prefix' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'aaa'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'bbb'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+SET @@global.ROCKSDB_LARGE_PREFIX = @start_global_value;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
new file mode 100644
index 00000000000..9b176263a23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_MANUAL_WAL_FLUSH;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_MANUAL_WAL_FLUSH to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MANUAL_WAL_FLUSH = 444;
+ERROR HY000: Variable 'rocksdb_manual_wal_flush' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
new file mode 100644
index 00000000000..74dbdb4288c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
@@ -0,0 +1,53 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+SELECT @start_global_value;
+@start_global_value
+5
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 100"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 100;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 1"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 1;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@session.ROCKSDB_MAX_LATEST_DEADLOCKS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_MAX_LATEST_DEADLOCKS = 444;
+ERROR HY000: Variable 'rocksdb_max_latest_deadlocks' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 'aaa'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to '123'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
new file mode 100644
index 00000000000..277de716d70
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+SET @start_global_value = @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'aaa'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'bbb'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to on"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = on;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_global_value;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_session_value;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+include/assert.inc [Alter should have taken at least 10 seconds]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
new file mode 100644
index 00000000000..ef4c619457b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_SIM_CACHE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_SIM_CACHE_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_SIM_CACHE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_sim_cache_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
new file mode 100644
index 00000000000..f23d1889027
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_USE_CLOCK_CACHE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_CLOCK_CACHE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_CLOCK_CACHE = 444;
+ERROR HY000: Variable 'rocksdb_use_clock_cache' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
new file mode 100644
index 00000000000..e57396e0fdc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
new file mode 100644
index 00000000000..451653fe769
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_CONCURRENT_PREPARE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
new file mode 100644
index 00000000000..cab72a11e18
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_DEADLOCK_DETECT_DEPTH
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
new file mode 100644
index 00000000000..8ad071e131b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_IGNORE_PK
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
index 3a0d7f63938..318ae1ee598 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
@@ -6,7 +6,7 @@ DROP TABLE IF EXISTS t;
CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
-SET @ORIG_PAUSE_BACKGROUND_WORK = @@rocksdb_force_compute_memtable_stats;
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
set global rocksdb_force_flush_memtable_now = true;
INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
@@ -20,4 +20,4 @@ SELECT TABLE_ROWS INTO @ROWS_INCLUDE_MEMTABLE FROM information_schema.TABLES WHE
select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end;
DROP TABLE t;
-set global rocksdb_force_compute_memtable_stats = @ORIG_PAUSE_BACKGROUND_WORK;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
new file mode 100644
index 00000000000..20180ec16a9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
new file mode 100644
index 00000000000..5ed3c74131e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_LARGE_PREFIX
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
new file mode 100644
index 00000000000..3e01722d5ea
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_MANUAL_WAL_FLUSH
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
new file mode 100644
index 00000000000..9917ec31d9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_MAX_LATEST_DEADLOCKS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
new file mode 100644
index 00000000000..03cc0b11d8c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
@@ -0,0 +1,49 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+
+--let $sys_var=ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, FLOOR(RAND() * 100));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+let $start= `SELECT UNIX_TIMESTAMP()`;
+# this should take a lot longer than normal because each deleted merge file
+# will sleep for 1 secs. There should be about 13 buffers.
+# So it should take at least 13 secs
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+let $stop_exec= `SELECT UNIX_TIMESTAMP()`;
+
+let $time_diff= `SELECT ($stop_exec - $start)`;
+let $assert_text= Alter should have taken at least 10 seconds;
+let $assert_cond= $time_diff >= 10;
+source include/assert.inc;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
new file mode 100644
index 00000000000..a82d50e1d03
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_SIM_CACHE_SIZE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
new file mode 100644
index 00000000000..d3e7b652039
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_USE_CLOCK_CACHE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h
index 87cfbb54e61..c9647707232 100644
--- a/storage/rocksdb/rdb_buff.h
+++ b/storage/rocksdb/rdb_buff.h
@@ -379,6 +379,22 @@ public:
DBUG_ASSERT(pos < get_current_pos() && (pos + 1) < get_current_pos());
rdb_netbuf_store_uint16(m_data.data() + pos, new_val);
}
+
+ void truncate(const size_t &pos) {
+ DBUG_ASSERT(pos < m_data.size());
+ m_data.resize(pos);
+ }
+
+ void allocate(const size_t &len, const uchar &val = 0) {
+ DBUG_ASSERT(len > 0);
+ m_data.resize(m_data.size() + len, val);
+ }
+
+ /*
+ An awful hack to deallocate the buffer without relying on the deconstructor.
+ This is needed to suppress valgrind errors in rocksdb.partition
+ */
+ void free() { std::vector<uchar>().swap(m_data); }
};
/*
diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h
index ea08ac458a6..9bc7ece6e7a 100644
--- a/storage/rocksdb/rdb_compact_filter.h
+++ b/storage/rocksdb/rdb_compact_filter.h
@@ -139,6 +139,14 @@ public:
gl_index_id.cf_id, gl_index_id.index_id);
}
+#ifndef NDEBUG
+ if (rdb_dbug_set_ttl_ignore_pk() &&
+ index_info.m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY) {
+ *ttl_duration = 0;
+ return;
+ }
+#endif
+
*ttl_duration = index_info.m_ttl_duration;
if (Rdb_key_def::has_index_flag(index_info.m_index_flags,
Rdb_key_def::TTL_FLAG)) {
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index 54cb5e5d402..6de2fee3dae 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -36,9 +36,12 @@
#include <vector>
/* MySQL header files */
+#include "./field.h"
#include "./key.h"
#include "./m_ctype.h"
#include "./my_bit.h"
+#include "./my_bitmap.h"
+#include "./sql_table.h"
/* MyRocks header files */
#include "./ha_rocksdb_proto.h"
@@ -53,20 +56,16 @@ void get_mem_comparable_space(const CHARSET_INFO *cs,
size_t *mb_len);
/*
- MariaDB's stand-in for Field::check_field_name_match that facebook/mysql-5.6
- uses.
-
- They have that function because of their JSON support, where "a.b.c" is a
- sub-field of "a.b".
+ MariaDB's replacement for FB/MySQL Field::check_field_name_match :
*/
-
-static bool check_field_name_match(Field *field, const char *name)
+inline bool field_check_field_name_match(Field *field, const char *name)
{
return (0 == my_strcasecmp(system_charset_info,
field->field_name.str,
name));
}
+
/*
Rdb_key_def class implementation
*/
@@ -92,6 +91,14 @@ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg,
{
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
DBUG_ASSERT(m_cf_handle != nullptr);
}
@@ -109,6 +116,14 @@ Rdb_key_def::Rdb_key_def(const Rdb_key_def &k)
m_maxlength(k.m_maxlength) {
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
if (k.m_pack_info) {
const size_t size = sizeof(Rdb_field_packing) * k.m_key_parts;
m_pack_info =
@@ -285,7 +300,7 @@ void Rdb_key_def::setup(const TABLE *const tbl,
the offset of the TTL key part here.
*/
if (!m_ttl_column.empty() &&
- check_field_name_match(field, m_ttl_column.c_str())) {
+ field_check_field_name_match(field, m_ttl_column.c_str())) {
DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
DBUG_ASSERT(field->key_type() == HA_KEYTYPE_ULONGLONG);
DBUG_ASSERT(!field->real_maybe_null());
@@ -405,7 +420,7 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
if (skip_checks) {
for (uint i = 0; i < table_arg->s->fields; i++) {
Field *const field = table_arg->field[i];
- if (check_field_name_match(field, ttl_col_str.c_str())) {
+ if (field_check_field_name_match(field, ttl_col_str.c_str())) {
*ttl_column = ttl_col_str;
*ttl_field_offset = i;
}
@@ -418,7 +433,7 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
bool found = false;
for (uint i = 0; i < table_arg->s->fields; i++) {
Field *const field = table_arg->field[i];
- if (check_field_name_match(field, ttl_col_str.c_str()) &&
+ if (field_check_field_name_match(field, ttl_col_str.c_str()) &&
field->real_type() == MYSQL_TYPE_LONGLONG &&
field->key_type() == HA_KEYTYPE_ULONGLONG &&
!field->real_maybe_null()) {
@@ -774,11 +789,14 @@ uint Rdb_key_def::pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer,
*/
bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) {
- const uchar *ptr = (const uchar *)unpack_info.data();
size_t size = unpack_info.size();
+ if (size == 0) {
+ return false;
+ }
+ const uchar *ptr = (const uchar *)unpack_info.data();
// Skip unpack info if present.
- if (size >= RDB_UNPACK_HEADER_SIZE && ptr[0] == RDB_UNPACK_DATA_TAG) {
+ if (is_unpack_data_tag(ptr[0]) && size >= get_unpack_header_size(ptr[0])) {
const uint16 skip_len = rdb_netbuf_to_uint16(ptr + 1);
SHIP_ASSERT(size >= skip_len);
@@ -808,6 +826,120 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) {
return changed;
}
+static const std::map<char, size_t> UNPACK_HEADER_SIZES = {
+ {RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE},
+ {RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}};
+
+/*
+ @return The length in bytes of the header specified by the given tag
+*/
+size_t Rdb_key_def::get_unpack_header_size(char tag) {
+ DBUG_ASSERT(is_unpack_data_tag(tag));
+ return UNPACK_HEADER_SIZES.at(tag);
+}
+
+/*
+ Get a bitmap indicating which varchar columns must be covered for this
+ lookup to be covered. If the bitmap is a subset of the covered bitmap, then
+ the lookup is covered. If it can already be determined that the lookup is
+ not covered, map->bitmap will be set to null.
+ */
+void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const {
+ DBUG_ASSERT(map->bitmap == nullptr);
+ bitmap_init(map, nullptr, MAX_REF_PARTS, false);
+ uint curr_bitmap_pos = 0;
+
+ // Indicates which columns in the read set might be covered.
+ MY_BITMAP maybe_covered_bitmap;
+ bitmap_init(&maybe_covered_bitmap, nullptr, table->read_set->n_bits, false);
+
+ for (uint i = 0; i < m_key_parts; i++) {
+ if (table_has_hidden_pk(table) && i + 1 == m_key_parts) {
+ continue;
+ }
+
+ Field *const field = m_pack_info[i].get_field_in_table(table);
+
+ // Columns which are always covered are not stored in the covered bitmap so
+ // we can ignore them here too.
+ if (m_pack_info[i].m_covered &&
+ bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ continue;
+ }
+
+ switch (field->real_type()) {
+ // This type may be covered depending on the record. If it was requested,
+ // we require the covered bitmap to have this bit set.
+ case MYSQL_TYPE_VARCHAR:
+ if (curr_bitmap_pos < MAX_REF_PARTS) {
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(map, curr_bitmap_pos);
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ }
+ curr_bitmap_pos++;
+ } else {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ // This column is a type which is never covered. If it was requested, we
+ // know this lookup will never be covered.
+ default:
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ }
+ }
+
+ // If there are columns which are not covered in the read set, the lookup
+ // can't be covered.
+ if (!bitmap_cmp(table->read_set, &maybe_covered_bitmap)) {
+ bitmap_free(map);
+ }
+ bitmap_free(&maybe_covered_bitmap);
+}
+
+/*
+ Return true if for this secondary index
+ - All of the requested columns are in the index
+ - All values for columns that are prefix-only indexes are shorter or equal
+ in length to the prefix
+ */
+bool Rdb_key_def::covers_lookup(TABLE *const table,
+ const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const lookup_bitmap) const {
+ DBUG_ASSERT(lookup_bitmap != nullptr);
+ if (!use_covered_bitmap_format() || lookup_bitmap->bitmap == nullptr) {
+ return false;
+ }
+
+ Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info);
+
+ // Check if this unpack_info has a covered_bitmap
+ const char *unpack_header = unp_reader.get_current_ptr();
+ const bool has_covered_unpack_info =
+ unp_reader.remaining_bytes() &&
+ unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG;
+ if (!has_covered_unpack_info ||
+ !unp_reader.read(RDB_UNPACK_COVERED_HEADER_SIZE)) {
+ return false;
+ }
+
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
+
+ return bitmap_is_subset(lookup_bitmap, &covered_bitmap);
+}
+
uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
uchar *tuple, uchar *const packed_tuple,
uchar *const pack_buffer,
@@ -872,14 +1004,12 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
Length of the packed tuple
*/
-uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
- const uchar *const record,
- uchar *const packed_tuple,
- Rdb_string_writer *const unpack_info,
- const bool &should_store_row_debug_checksums,
- const longlong &hidden_pk_id, uint n_key_parts,
- uint *const n_null_fields,
- uint *const ttl_pk_offset) const {
+uint Rdb_key_def::pack_record(
+ const TABLE *const tbl, uchar *const pack_buffer, const uchar *const record,
+ uchar *const packed_tuple, Rdb_string_writer *const unpack_info,
+ const bool &should_store_row_debug_checksums, const longlong &hidden_pk_id,
+ uint n_key_parts, uint *const n_null_fields, uint *const ttl_pk_offset,
+ const char *const ttl_bytes) const {
DBUG_ASSERT(tbl != nullptr);
DBUG_ASSERT(pack_buffer != nullptr);
DBUG_ASSERT(record != nullptr);
@@ -890,7 +1020,9 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
(m_index_type == INDEX_TYPE_SECONDARY));
uchar *tuple = packed_tuple;
+ size_t unpack_start_pos = size_t(-1);
size_t unpack_len_pos = size_t(-1);
+ size_t covered_bitmap_pos = size_t(-1);
const bool hidden_pk_exists = table_has_hidden_pk(tbl);
rdb_netbuf_store_index(tuple, m_index_number);
@@ -912,14 +1044,57 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
if (n_null_fields)
*n_null_fields = 0;
+ // Check if we need a covered bitmap. If it is certain that all key parts are
+ // covering, we don't need one.
+ bool store_covered_bitmap = false;
+ if (unpack_info && use_covered_bitmap_format()) {
+ for (uint i = 0; i < n_key_parts; i++) {
+ if (!m_pack_info[i].m_covered) {
+ store_covered_bitmap = true;
+ break;
+ }
+ }
+ }
+
+ const char tag =
+ store_covered_bitmap ? RDB_UNPACK_COVERED_DATA_TAG : RDB_UNPACK_DATA_TAG;
+
if (unpack_info) {
unpack_info->clear();
- unpack_info->write_uint8(RDB_UNPACK_DATA_TAG);
+
+ if (m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0) {
+ // Reserve space for index flag fields
+ unpack_info->allocate(m_total_index_flags_length);
+
+ // Insert TTL timestamp
+ if (has_ttl() && ttl_bytes) {
+ write_index_flag_field(unpack_info,
+ reinterpret_cast<const uchar *const>(ttl_bytes),
+ Rdb_key_def::TTL_FLAG);
+ }
+ }
+
+ unpack_start_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint8(tag);
unpack_len_pos = unpack_info->get_current_pos();
// we don't know the total length yet, so write a zero
unpack_info->write_uint16(0);
+
+ if (store_covered_bitmap) {
+ // Reserve two bytes for the covered bitmap. This will store, for key
+ // parts which are not always covering, whether or not it is covering
+ // for this record.
+ covered_bitmap_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint16(0);
+ }
}
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ uint curr_bitmap_pos = 0;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+
for (uint i = 0; i < n_key_parts; i++) {
// Fill hidden pk id into the last key part for secondary keys for tables
// with no pk
@@ -938,7 +1113,7 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
// Save the ttl duration offset in the key so we can store it in front of
// the record later.
if (ttl_pk_offset && m_ttl_duration > 0 && i == m_ttl_pk_key_part_offset) {
- DBUG_ASSERT(check_field_name_match(field, m_ttl_column.c_str()));
+ DBUG_ASSERT(field_check_field_name_match(field, m_ttl_column.c_str()));
DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
DBUG_ASSERT(field->key_type() == HA_KEYTYPE_ULONGLONG);
DBUG_ASSERT(!field->real_maybe_null());
@@ -953,6 +1128,25 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
tuple = pack_field(field, &m_pack_info[i], tuple, packed_tuple, pack_buffer,
unpack_info, n_null_fields);
+ // If this key part is a prefix of a VARCHAR field, check if it's covered.
+ if (store_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
+ !m_pack_info[i].m_covered && curr_bitmap_pos < MAX_REF_PARTS) {
+ size_t data_length = field->data_length();
+ uint16 key_length;
+ if (m_pk_part_no[i] == (uint)-1) {
+ key_length = tbl->key_info[get_keyno()].key_part[i].length;
+ } else {
+ key_length =
+ tbl->key_info[tbl->s->primary_key].key_part[m_pk_part_no[i]].length;
+ }
+
+ if (m_pack_info[i].m_unpack_func != nullptr &&
+ data_length <= key_length) {
+ bitmap_set_bit(&covered_bitmap, curr_bitmap_pos);
+ }
+ curr_bitmap_pos++;
+ }
+
// Restore field->ptr and field->null_ptr
field->move_field(tbl->record[0] + field_offset,
maybe_null ? tbl->record[0] + null_offset : nullptr,
@@ -960,7 +1154,7 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
}
if (unpack_info) {
- const size_t len = unpack_info->get_current_pos();
+ const size_t len = unpack_info->get_current_pos() - unpack_start_pos;
DBUG_ASSERT(len <= std::numeric_limits<uint16_t>::max());
// Don't store the unpack_info if it has only the header (that is, there's
@@ -968,9 +1162,12 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
// Primary Keys are special: for them, store the unpack_info even if it's
// empty (provided m_maybe_unpack_info==true, see
// ha_rocksdb::convert_record_to_storage_format)
- if (len == RDB_UNPACK_HEADER_SIZE &&
- m_index_type != Rdb_key_def::INDEX_TYPE_PRIMARY) {
- unpack_info->clear();
+ if (m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ if (len == get_unpack_header_size(tag) && !covered_bits) {
+ unpack_info->truncate(unpack_start_pos);
+ } else if (store_covered_bitmap) {
+ unpack_info->write_uint16_at(covered_bitmap_pos, covered_bits);
+ }
} else {
unpack_info->write_uint16_at(unpack_len_pos, len);
}
@@ -1216,11 +1413,30 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
// For secondary keys, we expect the value field to contain unpack data and
// checksum data in that order. One or both can be missing, but they cannot
// be reordered.
+ const char *unpack_header = unp_reader.get_current_ptr();
const bool has_unpack_info =
- unp_reader.remaining_bytes() &&
- *unp_reader.get_current_ptr() == RDB_UNPACK_DATA_TAG;
- if (has_unpack_info && !unp_reader.read(RDB_UNPACK_HEADER_SIZE)) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ unp_reader.remaining_bytes() && is_unpack_data_tag(unpack_header[0]);
+ if (has_unpack_info) {
+ if ((m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0 &&
+ !unp_reader.read(m_total_index_flags_length)) ||
+ !unp_reader.read(get_unpack_header_size(unpack_header[0]))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
+ // Read the covered bitmap
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ uint curr_bitmap_pos = 0;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+
+ const bool has_covered_bitmap =
+ has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG);
+ if (has_covered_bitmap) {
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
}
for (uint i = 0; i < m_key_parts; i++) {
@@ -1241,7 +1457,13 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
Field *const field = fpi->get_field_in_table(table);
- if (fpi->m_unpack_func) {
+ bool covered_column = true;
+ if (has_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
+ !m_pack_info[i].m_covered) {
+ covered_column = curr_bitmap_pos < MAX_REF_PARTS &&
+ bitmap_is_set(&covered_bitmap, curr_bitmap_pos++);
+ }
+ if (fpi->m_unpack_func && covered_column) {
/* It is possible to unpack this column. Do it. */
uint field_offset = field->ptr - table->record[0];
@@ -2835,6 +3057,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
m_skip_func = &Rdb_key_def::skip_max_length;
m_pack_func = &Rdb_key_def::pack_with_make_sort_key;
+ m_covered = false;
+
switch (type) {
case MYSQL_TYPE_LONGLONG:
case MYSQL_TYPE_LONG:
@@ -2842,14 +3066,17 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_TINY:
m_unpack_func = &Rdb_key_def::unpack_integer;
+ m_covered = true;
return true;
case MYSQL_TYPE_DOUBLE:
m_unpack_func = &Rdb_key_def::unpack_double;
+ m_covered = true;
return true;
case MYSQL_TYPE_FLOAT:
m_unpack_func = &Rdb_key_def::unpack_float;
+ m_covered = true;
return true;
case MYSQL_TYPE_NEWDECIMAL:
@@ -2867,6 +3094,7 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */
/* Everything that comes here is packed with just a memcpy(). */
m_unpack_func = &Rdb_key_def::unpack_binary_str;
+ m_covered = true;
return true;
case MYSQL_TYPE_NEWDATE:
@@ -2876,6 +3104,7 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
and little-endian)
*/
m_unpack_func = &Rdb_key_def::unpack_newdate;
+ m_covered = true;
return true;
case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_MEDIUM_BLOB:
@@ -3044,27 +3273,36 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
}
}
- // Make an adjustment: unpacking partially covered columns is not
- // possible. field->table is populated when called through
- // Rdb_key_def::setup, but not during ha_rocksdb::index_flags.
+ // Make an adjustment: if this column is partially covered, tell the SQL
+ // layer we can't do index-only scans. Later when we perform an index read,
+ // we'll check on a record-by-record basis if we can do an index-only scan
+ // or not.
+ uint field_length;
if (field->table) {
- // Get the original Field object and compare lengths. If this key part is
- // a prefix of a column, then we can't do index-only scans.
- if (field->table->field[field->field_index]->field_length != key_length) {
- m_unpack_func = nullptr;
- m_make_unpack_info_func = nullptr;
- m_unpack_info_stores_value = true;
- res = false;
- }
+ field_length = field->table->field[field->field_index]->field_length;
} else {
- if (field->field_length != key_length) {
+ field_length = field->field_length;
+ }
+
+ if (field_length != key_length) {
+ res = false;
+ // If this index doesn't support covered bitmaps, then we won't know
+ // during a read if the column is actually covered or not. If so, we need
+ // to assume the column isn't covered and skip it during unpacking.
+ //
+ // If key_descr == NULL, then this is a dummy field and we probably don't
+ // need to perform this step. However, to preserve the behavior before
+ // this change, we'll only skip this step if we have an index which
+ // supports covered bitmaps.
+ if (!key_descr || !key_descr->use_covered_bitmap_format()) {
m_unpack_func = nullptr;
m_make_unpack_info_func = nullptr;
m_unpack_info_stores_value = true;
- res = false;
}
}
}
+
+ m_covered = res;
return res;
}
@@ -3180,18 +3418,19 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
// Length that each index flag takes inside the record.
// Each index in the array maps to the enum INDEX_FLAG
-static const std::array<int, 1> index_flag_lengths = {
+static const std::array<uint, 1> index_flag_lengths = {
{ROCKSDB_SIZEOF_TTL_RECORD}};
-
bool Rdb_key_def::has_index_flag(uint32 index_flags, enum INDEX_FLAG flag) {
return flag & index_flags;
}
uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
- enum INDEX_FLAG flag) {
+ enum INDEX_FLAG flag,
+ uint *const length) {
- DBUG_ASSERT(Rdb_key_def::has_index_flag(index_flags, flag));
+ DBUG_ASSERT_IMP(flag != MAX_FLAG,
+ Rdb_key_def::has_index_flag(index_flags, flag));
uint offset = 0;
for (size_t bit = 0; bit < sizeof(index_flags) * CHAR_BIT; ++bit) {
@@ -3199,6 +3438,9 @@ uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
/* Exit once we've reached the proper flag */
if (flag & mask) {
+ if (length != nullptr) {
+ *length = index_flag_lengths[bit];
+ }
break;
}
@@ -3210,6 +3452,15 @@ uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
return offset;
}
+void Rdb_key_def::write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const {
+ uint len;
+ uint offset = calculate_index_flag_offset(m_index_flags_bitmap, flag, &len);
+ DBUG_ASSERT(offset + len <= buf->get_current_pos());
+ memcpy(buf->ptr() + offset, val, len);
+}
+
void Rdb_tbl_def::check_if_is_mysql_system_table() {
static const char *const system_dbs[] = {
"mysql", "performance_schema", "information_schema",
@@ -3308,10 +3559,12 @@ struct Rdb_validate_tbls : public Rdb_tables_scanner {
int Rdb_validate_tbls::add_table(Rdb_tbl_def *tdef) {
DBUG_ASSERT(tdef != nullptr);
- /* Add the database/table into the list */
- bool is_partition = tdef->base_partition().size() != 0;
- m_list[tdef->base_dbname()].insert(
- tbl_info_t(tdef->base_tablename(), is_partition));
+ /* Add the database/table into the list that are not temp table */
+ if (tdef->base_tablename().find(tmp_file_prefix) == std::string::npos) {
+ bool is_partition = tdef->base_partition().size() != 0;
+ m_list[tdef->base_dbname()].insert(
+ tbl_info_t(tdef->base_tablename(), is_partition));
+ }
return HA_EXIT_SUCCESS;
}
@@ -3394,9 +3647,9 @@ bool Rdb_validate_tbls::scan_for_frms(const std::string &datadir,
/* Scan through the files in the directory */
struct fileinfo *file_info = dir_info->dir_entry;
for (uint ii = 0; ii < dir_info->number_of_files; ii++, file_info++) {
- /* Find .frm files that are not temp files (those that start with '#') */
+ /* Find .frm files that are not temp files (those that contain '#sql') */
const char *ext = strrchr(file_info->name, '.');
- if (ext != nullptr && !is_prefix(file_info->name, tmp_file_prefix) &&
+ if (ext != nullptr && strstr(file_info->name, tmp_file_prefix) == nullptr &&
strcmp(ext, ".frm") == 0) {
std::string tablename =
std::string(file_info->name, ext - file_info->name);
@@ -3723,6 +3976,20 @@ Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) {
return empty;
}
+// this method returns the name of the table based on an index id. It acquires
+// a read lock on m_rwlock.
+const std::string
+Rdb_ddl_manager::safe_get_table_name(const GL_INDEX_ID &gl_index_id) {
+ std::string ret;
+ mysql_rwlock_rdlock(&m_rwlock);
+ auto it = m_index_num_to_keydef.find(gl_index_id);
+ if (it != m_index_num_to_keydef.end()) {
+ ret = it->second.first;
+ }
+ mysql_rwlock_unlock(&m_rwlock);
+ return ret;
+}
+
void Rdb_ddl_manager::set_stats(
const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats) {
mysql_rwlock_wrlock(&m_rwlock);
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 9c7cd956eb3..41747eb885c 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -16,6 +16,7 @@
#pragma once
/* C++ standard header files */
+#include <cstdlib>
#include <algorithm>
#include <atomic>
#include <map>
@@ -109,8 +110,8 @@ const size_t RDB_CHECKSUM_CHUNK_SIZE = 2 * RDB_CHECKSUM_SIZE + 1;
const char RDB_CHECKSUM_DATA_TAG = 0x01;
/*
- Unpack data is variable length. It is a 1 tag-byte plus a
- two byte length field. The length field includes the header as well.
+ Unpack data is variable length. The header is 1 tag-byte plus a two byte
+ length field. The length field includes the header as well.
*/
const char RDB_UNPACK_DATA_TAG = 0x02;
const size_t RDB_UNPACK_DATA_LEN_SIZE = sizeof(uint16_t);
@@ -118,6 +119,17 @@ const size_t RDB_UNPACK_HEADER_SIZE =
sizeof(RDB_UNPACK_DATA_TAG) + RDB_UNPACK_DATA_LEN_SIZE;
/*
+ This header format is 1 tag-byte plus a two byte length field plus a two byte
+ covered bitmap. The length field includes the header size.
+*/
+const char RDB_UNPACK_COVERED_DATA_TAG = 0x03;
+const size_t RDB_UNPACK_COVERED_DATA_LEN_SIZE = sizeof(uint16_t);
+const size_t RDB_COVERED_BITMAP_SIZE = sizeof(uint16_t);
+const size_t RDB_UNPACK_COVERED_HEADER_SIZE =
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) + RDB_UNPACK_COVERED_DATA_LEN_SIZE +
+ RDB_COVERED_BITMAP_SIZE;
+
+/*
Data dictionary index info field sizes.
*/
const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16);
@@ -190,7 +202,8 @@ public:
const bool &should_store_row_debug_checksums,
const longlong &hidden_pk_id = 0, uint n_key_parts = 0,
uint *const n_null_fields = nullptr,
- uint *const ttl_pk_offset = nullptr) const;
+ uint *const ttl_pk_offset = nullptr,
+ const char *const ttl_bytes = nullptr) const;
/* Pack the hidden primary key into mem-comparable form. */
uint pack_hidden_pk(const longlong &hidden_pk_id,
uchar *const packed_tuple) const;
@@ -248,6 +261,17 @@ public:
return true;
}
+ void get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const;
+
+ bool covers_lookup(TABLE *const table,
+ const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const map) const;
+
+ inline bool use_covered_bitmap_format() const {
+ return m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version >= SECONDARY_FORMAT_VERSION_UPDATE3;
+ }
+
/*
Return true if the passed mem-comparable key
- is from this index, and
@@ -305,6 +329,8 @@ public:
return m_prefix_extractor.get();
}
+ static size_t get_unpack_header_size(char tag);
+
Rdb_key_def &operator=(const Rdb_key_def &) = delete;
Rdb_key_def(const Rdb_key_def &k);
Rdb_key_def(uint indexnr_arg, uint keyno_arg,
@@ -428,7 +454,16 @@ public:
// an inefficient where data that was a multiple of 8 bytes in length
// had an extra 9 bytes of encoded data.
SECONDARY_FORMAT_VERSION_UPDATE2 = 12,
- SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_UPDATE2,
+ // This change includes support for TTL
+ // - This means that when TTL is specified for the table an 8-byte TTL
+ // field is prepended in front of each value.
+ SECONDARY_FORMAT_VERSION_TTL = 13,
+ SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_TTL,
+ // This change includes support for covering SK lookups for varchars. A
+ // 2-byte bitmap is added after the tag-byte to unpack_info only for
+ // records which have covered varchar columns. Currently waiting before
+ // enabling in prod.
+ SECONDARY_FORMAT_VERSION_UPDATE3 = 65535,
};
void setup(const TABLE *const table, const Rdb_tbl_def *const tbl_def);
@@ -444,7 +479,11 @@ public:
static bool has_index_flag(uint32 index_flags, enum INDEX_FLAG flag);
static uint32 calculate_index_flag_offset(uint32 index_flags,
- enum INDEX_FLAG flag);
+ enum INDEX_FLAG flag,
+ uint *const field_length = nullptr);
+ void write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const;
static const std::string
gen_qualifier_for_table(const char *const qualifier,
@@ -597,6 +636,10 @@ public:
SECONDARY_FORMAT_VERSION_UPDATE2);
}
+ static inline bool is_unpack_data_tag(char c) {
+ return c == RDB_UNPACK_DATA_TAG || c == RDB_UNPACK_COVERED_DATA_TAG;
+ }
+
private:
#ifndef DBUG_OFF
inline bool is_storage_available(const int &offset, const int &needed) const {
@@ -643,6 +686,11 @@ public:
uint32 m_index_flags_bitmap;
/*
+ How much space in bytes the index flag fields occupy.
+ */
+ uint32 m_total_index_flags_length;
+
+ /*
Offset in the records where the 8-byte TTL is stored (UINT_MAX if no TTL)
*/
uint32 m_ttl_rec_offset;
@@ -760,6 +808,13 @@ public:
// spaces in the upack_info
bool m_unpack_info_uses_two_bytes;
+ /*
+ True implies that an index-only read is always possible for this field.
+ False means an index-only read may be possible depending on the record and
+ field type.
+ */
+ bool m_covered;
+
const std::vector<uchar> *space_xfrm;
size_t space_xfrm_len;
size_t space_mb_len;
@@ -1037,6 +1092,8 @@ public:
return m_sequence.get_and_update_next_number(dict);
}
+ const std::string safe_get_table_name(const GL_INDEX_ID &gl_index_id);
+
/* Walk the data dictionary */
int scan_for_tables(Rdb_tables_scanner *tables_scanner);
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
index e89ff6eedb8..346a2994ec1 100644
--- a/storage/rocksdb/rdb_i_s.cc
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -41,6 +41,7 @@
#include "./rdb_cf_manager.h"
#include "./rdb_datadic.h"
#include "./rdb_utils.h"
+#include "./rdb_mariadb_server_port.h"
#include "./rdb_mariadb_port.h"
@@ -849,17 +850,22 @@ static int rdb_i_s_compact_stats_fill_table(
continue;
}
- std::map<std::string, double> props;
+ std::map<std::string, std::string> props;
bool bool_ret MY_ATTRIBUTE((__unused__));
bool_ret = rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props);
DBUG_ASSERT(bool_ret);
+ const std::string prop_name_prefix = "compaction.";
for (auto const &prop_ent : props) {
std::string prop_name = prop_ent.first;
- double value = prop_ent.second;
- std::size_t del_pos = prop_name.find('.');
+ if (prop_name.find(prop_name_prefix) != 0) {
+ continue;
+ }
+ std::string value = prop_ent.second;
+ std::size_t del_pos = prop_name.find('.', prop_name_prefix.size());
DBUG_ASSERT(del_pos != std::string::npos);
- std::string level_str = prop_name.substr(0, del_pos);
+ std::string level_str = prop_name.substr(
+ prop_name_prefix.size(), del_pos - prop_name_prefix.size());
std::string type_str = prop_name.substr(del_pos + 1);
Field **field = tables->table->field;
@@ -868,7 +874,7 @@ static int rdb_i_s_compact_stats_fill_table(
field[0]->store(cf_name.c_str(), cf_name.size(), system_charset_info);
field[1]->store(level_str.c_str(), level_str.size(), system_charset_info);
field[2]->store(type_str.c_str(), type_str.size(), system_charset_info);
- field[3]->store(value, true);
+ field[3]->store(std::stod(value));
ret |= static_cast<int>(
my_core::schema_table_store_record(thd, tables->table));
diff --git a/storage/rocksdb/rdb_index_merge.cc b/storage/rocksdb/rdb_index_merge.cc
index c31d0864b7b..44cec7c2a9c 100644
--- a/storage/rocksdb/rdb_index_merge.cc
+++ b/storage/rocksdb/rdb_index_merge.cc
@@ -31,17 +31,38 @@ namespace myrocks {
Rdb_index_merge::Rdb_index_merge(const char *const tmpfile_path,
const ulonglong &merge_buf_size,
const ulonglong &merge_combine_read_size,
- const rocksdb::Comparator *const comparator)
+ const ulonglong &merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf)
: m_tmpfile_path(tmpfile_path), m_merge_buf_size(merge_buf_size),
m_merge_combine_read_size(merge_combine_read_size),
- m_comparator(comparator), m_rec_buf_unsorted(nullptr),
- m_output_buf(nullptr) {}
+ m_merge_tmp_file_removal_delay(merge_tmp_file_removal_delay),
+ m_cf_handle(cf), m_rec_buf_unsorted(nullptr), m_output_buf(nullptr) {}
Rdb_index_merge::~Rdb_index_merge() {
/*
- Close tmp file, we don't need to worry about deletion, mysql handles it.
+ If merge_tmp_file_removal_delay is set, sleep between calls to chsize.
+
+ This helps mitigate potential trim stalls on flash when large files are
+ being deleted too quickly.
+ */
+ if (m_merge_tmp_file_removal_delay > 0) {
+ uint64 curr_size = m_merge_buf_size * m_merge_file.m_num_sort_buffers;
+ for (uint i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ if (my_chsize(m_merge_file.m_fd, curr_size, 0, MYF(MY_WME))) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error truncating file during fast index creation.");
+ }
+
+ my_sleep(m_merge_tmp_file_removal_delay * 1000);
+ curr_size -= m_merge_buf_size;
+ }
+ }
+
+ /*
+ Close file descriptor, we don't need to worry about deletion,
+ mysql handles it.
*/
- my_close(m_merge_file.fd, MYF(MY_WME));
+ my_close(m_merge_file.m_fd, MYF(MY_WME));
}
int Rdb_index_merge::init() {
@@ -75,7 +96,7 @@ int Rdb_index_merge::init() {
Create a merge file in the given location.
*/
int Rdb_index_merge::merge_file_create() {
- DBUG_ASSERT(m_merge_file.fd == -1);
+ DBUG_ASSERT(m_merge_file.m_fd == -1);
int fd;
#ifdef MARIAROCKS_NOT_YET // mysql_tmpfile_path use
@@ -89,11 +110,13 @@ int Rdb_index_merge::merge_file_create() {
fd = mysql_tmpfile("myrocks");
#endif
if (fd < 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("Failed to create temp file during fast index creation.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
- m_merge_file.fd = fd;
- m_merge_file.num_sort_buffers = 0;
+ m_merge_file.m_fd = fd;
+ m_merge_file.m_num_sort_buffers = 0;
return HA_EXIT_SUCCESS;
}
@@ -114,10 +137,10 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
out to disk in sorted order using offset tree.
*/
const uint total_offset = RDB_MERGE_CHUNK_LEN +
- m_rec_buf_unsorted->curr_offset +
+ m_rec_buf_unsorted->m_curr_offset +
RDB_MERGE_KEY_DELIMITER + RDB_MERGE_VAL_DELIMITER +
key.size() + val.size();
- if (total_offset >= m_rec_buf_unsorted->total_size) {
+ if (total_offset >= m_rec_buf_unsorted->m_total_size) {
/*
If the offset tree is empty here, that means that the proposed key to
add is too large for the buffer.
@@ -136,7 +159,7 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
}
}
- const ulonglong rec_offset = m_rec_buf_unsorted->curr_offset;
+ const ulonglong rec_offset = m_rec_buf_unsorted->m_curr_offset;
/*
Store key and value in temporary unsorted in memory buffer pointed to by
@@ -145,8 +168,15 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
m_rec_buf_unsorted->store_key_value(key, val);
/* Find sort order of the new record */
- m_offset_tree.emplace(m_rec_buf_unsorted->block.get() + rec_offset,
- m_comparator);
+ auto res =
+ m_offset_tree.emplace(m_rec_buf_unsorted->m_block.get() + rec_offset,
+ m_cf_handle->GetComparator());
+ if (!res.second) {
+ my_printf_error(ER_DUP_ENTRY,
+ "Failed to insert the record: the key already exists",
+ MYF(0));
+ return ER_DUP_ENTRY;
+ }
return HA_EXIT_SUCCESS;
}
@@ -155,33 +185,33 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
Sort + write merge buffer chunk out to disk.
*/
int Rdb_index_merge::merge_buf_write() {
- DBUG_ASSERT(m_merge_file.fd != -1);
+ DBUG_ASSERT(m_merge_file.m_fd != -1);
DBUG_ASSERT(m_rec_buf_unsorted != nullptr);
DBUG_ASSERT(m_output_buf != nullptr);
DBUG_ASSERT(!m_offset_tree.empty());
/* Write actual chunk size to first 8 bytes of the merge buffer */
- merge_store_uint64(m_output_buf->block.get(),
- m_rec_buf_unsorted->curr_offset + RDB_MERGE_CHUNK_LEN);
- m_output_buf->curr_offset += RDB_MERGE_CHUNK_LEN;
+ merge_store_uint64(m_output_buf->m_block.get(),
+ m_rec_buf_unsorted->m_curr_offset + RDB_MERGE_CHUNK_LEN);
+ m_output_buf->m_curr_offset += RDB_MERGE_CHUNK_LEN;
/*
Iterate through the offset tree. Should be ordered by the secondary key
at this point.
*/
for (const auto &rec : m_offset_tree) {
- DBUG_ASSERT(m_output_buf->curr_offset <= m_merge_buf_size);
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_merge_buf_size);
/* Read record from offset (should never fail) */
rocksdb::Slice key;
rocksdb::Slice val;
- merge_read_rec(rec.block, &key, &val);
+ merge_read_rec(rec.m_block, &key, &val);
/* Store key and value into sorted output buffer */
m_output_buf->store_key_value(key, val);
}
- DBUG_ASSERT(m_output_buf->curr_offset <= m_output_buf->total_size);
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_output_buf->m_total_size);
/*
Write output buffer to disk.
@@ -189,8 +219,9 @@ int Rdb_index_merge::merge_buf_write() {
Need to position cursor to the chunk it needs to be at on filesystem
then write into the respective merge buffer.
*/
- if (my_seek(m_merge_file.fd, m_merge_file.num_sort_buffers * m_merge_buf_size,
- SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ if (my_seek(m_merge_file.m_fd,
+ m_merge_file.m_num_sort_buffers * m_merge_buf_size, SEEK_SET,
+ MYF(0)) == MY_FILEPOS_ERROR) {
// NO_LINT_DEBUG
sql_print_error("Error seeking to location in merge file on disk.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
@@ -201,16 +232,16 @@ int Rdb_index_merge::merge_buf_write() {
cache can flush out all of the files at the same time, causing a write
burst.
*/
- if (my_write(m_merge_file.fd, m_output_buf->block.get(),
- m_output_buf->total_size, MYF(MY_WME | MY_NABP)) ||
- mysql_file_sync(m_merge_file.fd, MYF(MY_WME))) {
+ if (my_write(m_merge_file.m_fd, m_output_buf->m_block.get(),
+ m_output_buf->m_total_size, MYF(MY_WME | MY_NABP)) ||
+ mysql_file_sync(m_merge_file.m_fd, MYF(MY_WME))) {
// NO_LINT_DEBUG
sql_print_error("Error writing sorted merge buffer to disk.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/* Increment merge file offset to track number of merge buffers written */
- m_merge_file.num_sort_buffers += 1;
+ m_merge_file.m_num_sort_buffers += 1;
/* Reset everything for next run */
merge_reset();
@@ -233,28 +264,29 @@ int Rdb_index_merge::merge_heap_prepare() {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
- DBUG_ASSERT(m_merge_file.num_sort_buffers > 0);
+ DBUG_ASSERT(m_merge_file.m_num_sort_buffers > 0);
/*
For an n-way merge, we need to read chunks of each merge file
simultaneously.
*/
ulonglong chunk_size =
- m_merge_combine_read_size / m_merge_file.num_sort_buffers;
+ m_merge_combine_read_size / m_merge_file.m_num_sort_buffers;
if (chunk_size >= m_merge_buf_size) {
chunk_size = m_merge_buf_size;
}
/* Allocate buffers for each chunk */
- for (ulonglong i = 0; i < m_merge_file.num_sort_buffers; i++) {
- const auto entry = std::make_shared<merge_heap_entry>(m_comparator);
+ for (ulonglong i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ const auto entry =
+ std::make_shared<merge_heap_entry>(m_cf_handle->GetComparator());
/*
Read chunk_size bytes from each chunk on disk, and place inside
respective chunk buffer.
*/
const size_t total_size =
- entry->prepare(m_merge_file.fd, i * m_merge_buf_size, chunk_size);
+ entry->prepare(m_merge_file.m_fd, i * m_merge_buf_size, chunk_size);
if (total_size == (size_t)-1) {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
@@ -266,7 +298,7 @@ int Rdb_index_merge::merge_heap_prepare() {
}
/* Read the first record from each buffer to initially populate the heap */
- if (entry->read_rec(&entry->key, &entry->val)) {
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
// NO_LINT_DEBUG
sql_print_error("Chunk size is too small to process merge.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
@@ -291,7 +323,7 @@ int Rdb_index_merge::next(rocksdb::Slice *const key,
If there are no sort buffer records (alters on empty tables),
also exit here.
*/
- if (m_merge_file.num_sort_buffers == 0) {
+ if (m_merge_file.m_num_sort_buffers == 0) {
if (m_offset_tree.empty()) {
return -1;
}
@@ -299,7 +331,7 @@ int Rdb_index_merge::next(rocksdb::Slice *const key,
const auto rec = m_offset_tree.begin();
/* Read record from offset */
- merge_read_rec(rec->block, key, val);
+ merge_read_rec(rec->m_block, key, val);
m_offset_tree.erase(rec);
return HA_EXIT_SUCCESS;
@@ -339,8 +371,8 @@ void Rdb_index_merge::merge_heap_top(rocksdb::Slice *const key,
DBUG_ASSERT(!m_merge_min_heap.empty());
const std::shared_ptr<merge_heap_entry> &entry = m_merge_min_heap.top();
- *key = entry->key;
- *val = entry->val;
+ *key = entry->m_key;
+ *val = entry->m_val;
}
/**
@@ -360,12 +392,12 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
/*
We are finished w/ current chunk if:
- current_offset + disk_offset == total_size
+ current_offset + disk_offset == m_total_size
Return without adding entry back onto heap.
If heap is also empty, we must be finished with merge.
*/
- if (entry->chunk_info->is_chunk_finished()) {
+ if (entry->m_chunk_info->is_chunk_finished()) {
if (m_merge_min_heap.empty()) {
return -1;
}
@@ -377,19 +409,19 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
/*
Make sure we haven't reached the end of the chunk.
*/
- DBUG_ASSERT(!entry->chunk_info->is_chunk_finished());
+ DBUG_ASSERT(!entry->m_chunk_info->is_chunk_finished());
/*
If merge_read_rec fails, it means the either the chunk was cut off
or we've reached the end of the respective chunk.
*/
- if (entry->read_rec(&entry->key, &entry->val)) {
- if (entry->read_next_chunk_from_disk(m_merge_file.fd)) {
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
+ if (entry->read_next_chunk_from_disk(m_merge_file.m_fd)) {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/* Try reading record again, should never fail. */
- if (entry->read_rec(&entry->key, &entry->val)) {
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
}
@@ -403,32 +435,33 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
}
int Rdb_index_merge::merge_heap_entry::read_next_chunk_from_disk(File fd) {
- if (chunk_info->read_next_chunk_from_disk(fd)) {
+ if (m_chunk_info->read_next_chunk_from_disk(fd)) {
return HA_EXIT_FAILURE;
}
- block = chunk_info->block.get();
+ m_block = m_chunk_info->m_block.get();
return HA_EXIT_SUCCESS;
}
int Rdb_index_merge::merge_buf_info::read_next_chunk_from_disk(File fd) {
- disk_curr_offset += curr_offset;
+ m_disk_curr_offset += m_curr_offset;
- if (my_seek(fd, disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ if (my_seek(fd, m_disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
// NO_LINT_DEBUG
sql_print_error("Error seeking to location in merge file on disk.");
return HA_EXIT_FAILURE;
}
/* Overwrite the old block */
- const size_t bytes_read = my_read(fd, block.get(), block_len, MYF(MY_WME));
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_block_len, MYF(MY_WME));
if (bytes_read == (size_t)-1) {
// NO_LINT_DEBUG
sql_print_error("Error reading merge file from disk.");
return HA_EXIT_FAILURE;
}
- curr_offset = 0;
+ m_curr_offset = 0;
return HA_EXIT_SUCCESS;
}
@@ -464,39 +497,39 @@ void Rdb_index_merge::read_slice(rocksdb::Slice *slice,
int Rdb_index_merge::merge_heap_entry::read_rec(rocksdb::Slice *const key,
rocksdb::Slice *const val) {
- const uchar *block_ptr = block;
- const auto orig_offset = chunk_info->curr_offset;
- const auto orig_block = block;
+ const uchar *block_ptr = m_block;
+ const auto orig_offset = m_chunk_info->m_curr_offset;
+ const auto orig_block = m_block;
/* Read key at block offset into key slice and the value into value slice*/
if (read_slice(key, &block_ptr) != 0) {
return HA_EXIT_FAILURE;
}
- chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block;
- block += (uintptr_t)block_ptr - (uintptr_t)block;
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
if (read_slice(val, &block_ptr) != 0) {
- chunk_info->curr_offset = orig_offset;
- block = orig_block;
+ m_chunk_info->m_curr_offset = orig_offset;
+ m_block = orig_block;
return HA_EXIT_FAILURE;
}
- chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block;
- block += (uintptr_t)block_ptr - (uintptr_t)block;
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
return HA_EXIT_SUCCESS;
}
int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice,
const uchar **block_ptr) {
- if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) {
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) {
return HA_EXIT_FAILURE;
}
uint64 slice_len;
merge_read_uint64(block_ptr, &slice_len);
- if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) {
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) {
return HA_EXIT_FAILURE;
}
@@ -508,18 +541,18 @@ int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice,
size_t Rdb_index_merge::merge_heap_entry::prepare(File fd, ulonglong f_offset,
ulonglong chunk_size) {
- chunk_info = std::make_shared<merge_buf_info>(chunk_size);
- const size_t res = chunk_info->prepare(fd, f_offset);
+ m_chunk_info = std::make_shared<merge_buf_info>(chunk_size);
+ const size_t res = m_chunk_info->prepare(fd, f_offset);
if (res != (size_t)-1) {
- block = chunk_info->block.get() + RDB_MERGE_CHUNK_LEN;
+ m_block = m_chunk_info->m_block.get() + RDB_MERGE_CHUNK_LEN;
}
return res;
}
size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
- disk_start_offset = f_offset;
- disk_curr_offset = f_offset;
+ m_disk_start_offset = f_offset;
+ m_disk_curr_offset = f_offset;
/*
Need to position cursor to the chunk it needs to be at on filesystem
@@ -531,7 +564,8 @@ size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
return (size_t)-1;
}
- const size_t bytes_read = my_read(fd, block.get(), total_size, MYF(MY_WME));
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_total_size, MYF(MY_WME));
if (bytes_read == (size_t)-1) {
// NO_LINT_DEBUG
sql_print_error("Error reading merge file from disk.");
@@ -542,10 +576,10 @@ size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
Read the first 8 bytes of each chunk, this gives us the actual
size of each chunk.
*/
- const uchar *block_ptr = block.get();
- merge_read_uint64(&block_ptr, &total_size);
- curr_offset += RDB_MERGE_CHUNK_LEN;
- return total_size;
+ const uchar *block_ptr = m_block.get();
+ merge_read_uint64(&block_ptr, &m_total_size);
+ m_curr_offset += RDB_MERGE_CHUNK_LEN;
+ return m_total_size;
}
/* Store key and value w/ their respective delimiters at the given offset */
@@ -557,13 +591,13 @@ void Rdb_index_merge::merge_buf_info::store_key_value(
void Rdb_index_merge::merge_buf_info::store_slice(const rocksdb::Slice &slice) {
/* Store length delimiter */
- merge_store_uint64(&block[curr_offset], slice.size());
+ merge_store_uint64(&m_block[m_curr_offset], slice.size());
/* Store slice data */
- memcpy(&block[curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(),
+ memcpy(&m_block[m_curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(),
slice.size());
- curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER;
+ m_curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER;
}
void Rdb_index_merge::merge_reset() {
@@ -574,13 +608,13 @@ void Rdb_index_merge::merge_reset() {
m_offset_tree.clear();
/* Reset sort buffer block */
- if (m_rec_buf_unsorted && m_rec_buf_unsorted->block) {
- m_rec_buf_unsorted->curr_offset = 0;
+ if (m_rec_buf_unsorted && m_rec_buf_unsorted->m_block) {
+ m_rec_buf_unsorted->m_curr_offset = 0;
}
/* Reset output buf */
- if (m_output_buf && m_output_buf->block) {
- m_output_buf->curr_offset = 0;
+ if (m_output_buf && m_output_buf->m_block) {
+ m_output_buf->m_curr_offset = 0;
}
}
diff --git a/storage/rocksdb/rdb_index_merge.h b/storage/rocksdb/rdb_index_merge.h
index 9d1469fc34e..6e53663160a 100644
--- a/storage/rocksdb/rdb_index_merge.h
+++ b/storage/rocksdb/rdb_index_merge.h
@@ -49,23 +49,23 @@ class Rdb_index_merge {
Rdb_index_merge(const Rdb_index_merge &p) = delete;
Rdb_index_merge &operator=(const Rdb_index_merge &p) = delete;
-public:
+ public:
/* Information about temporary files used in external merge sort */
struct merge_file_info {
- File fd = -1; /* file descriptor */
- ulong num_sort_buffers; /* number of sort buffers in temp file */
+ File m_fd = -1; /* file descriptor */
+ ulong m_num_sort_buffers = 0; /* number of sort buffers in temp file */
};
/* Buffer for sorting in main memory. */
struct merge_buf_info {
/* heap memory allocated for main memory sort/merge */
- std::unique_ptr<uchar[]> block;
+ std::unique_ptr<uchar[]> m_block;
const ulonglong
- block_len; /* amount of data bytes allocated for block above */
- ulonglong curr_offset; /* offset of the record pointer for the block */
- ulonglong disk_start_offset; /* where the chunk starts on disk */
- ulonglong disk_curr_offset; /* current offset on disk */
- ulonglong total_size; /* total # of data bytes in chunk */
+ m_block_len; /* amount of data bytes allocated for block above */
+ ulonglong m_curr_offset; /* offset of the record pointer for the block */
+ ulonglong m_disk_start_offset; /* where the chunk starts on disk */
+ ulonglong m_disk_curr_offset; /* current offset on disk */
+ ulonglong m_total_size; /* total # of data bytes in chunk */
void store_key_value(const rocksdb::Slice &key, const rocksdb::Slice &val)
MY_ATTRIBUTE((__nonnull__));
@@ -78,32 +78,33 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
inline bool is_chunk_finished() const {
- return curr_offset + disk_curr_offset - disk_start_offset == total_size;
+ return m_curr_offset + m_disk_curr_offset - m_disk_start_offset ==
+ m_total_size;
}
inline bool has_space(uint64 needed) const {
- return curr_offset + needed <= block_len;
+ return m_curr_offset + needed <= m_block_len;
}
explicit merge_buf_info(const ulonglong merge_block_size)
- : block(nullptr), block_len(merge_block_size), curr_offset(0),
- disk_start_offset(0), disk_curr_offset(0),
- total_size(merge_block_size) {
+ : m_block(nullptr), m_block_len(merge_block_size), m_curr_offset(0),
+ m_disk_start_offset(0), m_disk_curr_offset(0),
+ m_total_size(merge_block_size) {
/* Will throw an exception if it runs out of memory here */
- block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
+ m_block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
/* Initialize entire buffer to 0 to avoid valgrind errors */
- memset(block.get(), 0, merge_block_size);
+ memset(m_block.get(), 0, merge_block_size);
}
};
/* Represents an entry in the heap during merge phase of external sort */
struct merge_heap_entry {
- std::shared_ptr<merge_buf_info> chunk_info; /* pointer to buffer info */
- uchar *block; /* pointer to heap memory where record is stored */
- const rocksdb::Comparator *const comparator;
- rocksdb::Slice key; /* current key pointed to by block ptr */
- rocksdb::Slice val;
+ std::shared_ptr<merge_buf_info> m_chunk_info; /* pointer to buffer info */
+ uchar *m_block; /* pointer to heap memory where record is stored */
+ const rocksdb::Comparator *const m_comparator;
+ rocksdb::Slice m_key; /* current key pointed to by block ptr */
+ rocksdb::Slice m_val;
size_t prepare(File fd, ulonglong f_offset, ulonglong chunk_size)
MY_ATTRIBUTE((__nonnull__));
@@ -118,35 +119,37 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
explicit merge_heap_entry(const rocksdb::Comparator *const comparator)
- : chunk_info(nullptr), block(nullptr), comparator(comparator) {}
+ : m_chunk_info(nullptr), m_block(nullptr), m_comparator(comparator) {}
};
struct merge_heap_comparator {
bool operator()(const std::shared_ptr<merge_heap_entry> &lhs,
const std::shared_ptr<merge_heap_entry> &rhs) {
- return lhs->comparator->Compare(rhs->key, lhs->key) < 0;
+ return lhs->m_comparator->Compare(rhs->m_key, lhs->m_key) < 0;
}
};
/* Represents a record in unsorted buffer */
struct merge_record {
- uchar *block; /* points to offset of key in sort buffer */
- const rocksdb::Comparator *const comparator;
+ uchar *m_block; /* points to offset of key in sort buffer */
+ const rocksdb::Comparator *const m_comparator;
bool operator<(const merge_record &record) const {
- return merge_record_compare(this->block, record.block, comparator) < 0;
+ return merge_record_compare(this->m_block, record.m_block, m_comparator) <
+ 0;
}
merge_record(uchar *const block,
const rocksdb::Comparator *const comparator)
- : block(block), comparator(comparator) {}
+ : m_block(block), m_comparator(comparator) {}
};
-private:
+ private:
const char *m_tmpfile_path;
const ulonglong m_merge_buf_size;
const ulonglong m_merge_combine_read_size;
- const rocksdb::Comparator *m_comparator;
+ const ulonglong m_merge_tmp_file_removal_delay;
+ rocksdb::ColumnFamilyHandle *m_cf_handle;
struct merge_file_info m_merge_file;
std::shared_ptr<merge_buf_info> m_rec_buf_unsorted;
std::shared_ptr<merge_buf_info> m_output_buf;
@@ -184,11 +187,12 @@ private:
void read_slice(rocksdb::Slice *slice, const uchar *block_ptr)
MY_ATTRIBUTE((__nonnull__));
-public:
+ public:
Rdb_index_merge(const char *const tmpfile_path,
const ulonglong &merge_buf_size,
const ulonglong &merge_combine_read_size,
- const rocksdb::Comparator *const comparator);
+ const ulonglong &merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf);
~Rdb_index_merge();
int init() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -213,6 +217,8 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void merge_reset();
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; }
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_mariadb_server_port.h b/storage/rocksdb/rdb_mariadb_server_port.h
index e424fbb91f8..fe963446ebb 100644
--- a/storage/rocksdb/rdb_mariadb_server_port.h
+++ b/storage/rocksdb/rdb_mariadb_server_port.h
@@ -70,4 +70,7 @@ class Regex_list_handler
void warn_about_bad_patterns(const Regex_list_handler* regex_list_handler,
const char *name);
+void print_keydup_error(TABLE *table, KEY *key, myf errflag,
+ const THD *thd, const char *org_table_name=NULL);
+
#endif
diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc
index 8dd7b6ff0d5..d126d156314 100644
--- a/storage/rocksdb/rdb_perf_context.cc
+++ b/storage/rocksdb/rdb_perf_context.cc
@@ -179,6 +179,16 @@ bool Rdb_io_perf::start(const uint32_t perf_context_level) {
return true;
}
+void Rdb_io_perf::update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written) {
+ const rocksdb::PerfLevel perf_level =
+ static_cast<rocksdb::PerfLevel>(perf_context_level);
+ if (perf_level != rocksdb::kDisable && m_shared_io_perf_write) {
+ io_write_bytes += bytes_written;
+ io_write_requests += 1;
+ }
+}
+
void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
const rocksdb::PerfLevel perf_level =
static_cast<rocksdb::PerfLevel>(perf_context_level);
@@ -217,6 +227,18 @@ void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
}
#ifdef MARIAROCKS_NOT_YET
+ if (m_shared_io_perf_write &&
+ (io_write_bytes != 0 || io_write_requests != 0)) {
+ my_io_perf_t io_perf_write;
+ io_perf_write.init();
+ io_perf_write.bytes = io_write_bytes;
+ io_perf_write.requests = io_write_requests;
+ m_shared_io_perf_write->sum(io_perf_write);
+ m_stats->table_io_perf_write.sum(io_perf_write);
+ io_write_bytes = 0;
+ io_write_requests = 0;
+ }
+
if (m_stats) {
if (rocksdb::get_perf_context()->internal_key_skipped_count != 0) {
m_stats->key_skipped +=
diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h
index 9d580ff0b8a..f9b9fd48d3e 100644
--- a/storage/rocksdb/rdb_perf_context.h
+++ b/storage/rocksdb/rdb_perf_context.h
@@ -110,30 +110,42 @@ class Rdb_io_perf {
// Context management
Rdb_atomic_perf_counters *m_atomic_counters = nullptr;
my_io_perf_atomic_t *m_shared_io_perf_read = nullptr;
+ my_io_perf_atomic_t *m_shared_io_perf_write = nullptr;
ha_statistics *m_stats = nullptr;
-public:
+ uint64_t io_write_bytes;
+ uint64_t io_write_requests;
+
+ public:
Rdb_io_perf(const Rdb_io_perf &) = delete;
Rdb_io_perf &operator=(const Rdb_io_perf &) = delete;
void init(Rdb_atomic_perf_counters *const atomic_counters,
my_io_perf_atomic_t *const shared_io_perf_read,
+ my_io_perf_atomic_t *const shared_io_perf_write,
ha_statistics *const stats) {
DBUG_ASSERT(atomic_counters != nullptr);
DBUG_ASSERT(shared_io_perf_read != nullptr);
+ DBUG_ASSERT(shared_io_perf_write != nullptr);
DBUG_ASSERT(stats != nullptr);
m_atomic_counters = atomic_counters;
m_shared_io_perf_read = shared_io_perf_read;
+ m_shared_io_perf_write = shared_io_perf_write;
m_stats = stats;
+
+ io_write_bytes = 0;
+ io_write_requests = 0;
}
bool start(const uint32_t perf_context_level);
+ void update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written);
void end_and_record(const uint32_t perf_context_level);
explicit Rdb_io_perf()
: m_atomic_counters(nullptr), m_shared_io_perf_read(nullptr),
- m_stats(nullptr) {}
+ m_stats(nullptr), io_write_bytes(0), io_write_requests(0) {}
};
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc
index 6f14e5048dd..335676a6ba4 100644
--- a/storage/rocksdb/rdb_utils.cc
+++ b/storage/rocksdb/rdb_utils.cc
@@ -303,6 +303,18 @@ bool rdb_database_exists(const std::string &db_name) {
return true;
}
+void rdb_log_status_error(const rocksdb::Status &s, const char *msg) {
+ if (msg == nullptr) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: status error, code: %d, error message: %s",
+ s.code(), s.ToString().c_str());
+ return;
+ }
+
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: %s, Status Code: %d, Status: %s", msg, s.code(),
+ s.ToString().c_str());
+}
/*
@brief
@@ -340,18 +352,4 @@ const char *get_rocksdb_supported_compression_types()
return compression_methods_buf.c_str();
}
-void rdb_log_status_error(const rocksdb::Status &s, const char *msg) {
- if (msg == nullptr) {
- // NO_LINT_DEBUG
- sql_print_error("RocksDB: status error, code: %d, error message: %s",
- s.code(), s.ToString().c_str());
- return;
- }
-
- // NO_LINT_DEBUG
- sql_print_error("RocksDB: %s, Status Code: %d, Status: %s", msg, s.code(),
- s.ToString().c_str());
-}
-
-
} // namespace myrocks
diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb
-Subproject 857e9960be8a5e4ee3aecfd7d27fe7b1ef6bb35
+Subproject 9a970c81af9807071bd690f4c808c5045866291
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index db7e2172e93..528bfcb9f4a 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -757,7 +757,6 @@ buf_flush_update_zip_checksum(
srv_checksum_algorithm)));
mach_write_to_8(page + FIL_PAGE_LSN, lsn);
- memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
}
@@ -935,8 +934,6 @@ buf_flush_write_block_low(
bpage->newest_modification);
ut_a(page_zip_verify_checksum(frame, zip_size));
-
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
break;
case BUF_BLOCK_FILE_PAGE:
frame = bpage->zip.data;
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index a8752059b5b..3dd8aa8081b 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1045,156 +1045,24 @@ fil_space_extend_must_retry(
page_size = UNIV_PAGE_SIZE;
}
-#ifdef _WIN32
- const ulint io_completion_type = OS_FILE_READ;
- /* Logically or physically extend the file with zero bytes,
- depending on whether it is sparse. */
+ /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
+ fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.*/
+ os_offset_t new_size = std::max(
+ os_offset_t(size - file_start_page_no) * page_size,
+ os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE));
- /* FIXME: Call DeviceIoControl(node->handle, FSCTL_SET_SPARSE, ...)
- when opening a file when FSP_FLAGS_HAS_PAGE_COMPRESSION(). */
- {
- FILE_END_OF_FILE_INFO feof;
- /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
- fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
- Do not shrink short ROW_FORMAT=COMPRESSED files. */
- feof.EndOfFile.QuadPart = std::max(
- os_offset_t(size - file_start_page_no) * page_size,
- os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
- *success = SetFileInformationByHandle(node->handle,
- FileEndOfFileInfo,
- &feof, sizeof feof);
- if (!*success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
- " from " INT64PF
- " to " INT64PF " bytes failed with %u",
- node->name,
- os_offset_t(node->size) * page_size,
- feof.EndOfFile.QuadPart, GetLastError());
- } else {
- start_page_no = size;
- }
- }
-#else
- /* We will logically extend the file with ftruncate() if
- page_compression is enabled, because the file is expected to
- be sparse in that case. Make sure that ftruncate() can deal
- with large files. */
- const bool is_sparse = sizeof(off_t) >= 8
- && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
-
-# ifdef HAVE_POSIX_FALLOCATE
- /* We must complete the I/O request after invoking
- posix_fallocate() to avoid an assertion failure at shutdown.
- Because no actual writes were dispatched, a read operation
- will suffice. */
- const ulint io_completion_type = srv_use_posix_fallocate
- || is_sparse ? OS_FILE_READ : OS_FILE_WRITE;
-
- if (srv_use_posix_fallocate && !is_sparse) {
- const os_offset_t start_offset
- = os_offset_t(start_page_no - file_start_page_no)
- * page_size;
- const ulint n_pages = size - start_page_no;
- const os_offset_t len = os_offset_t(n_pages) * page_size;
-
- int err;
- do {
- err = posix_fallocate(node->handle, start_offset, len);
- } while (err == EINTR
- && srv_shutdown_state == SRV_SHUTDOWN_NONE);
-
- *success = !err;
- if (!*success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
- " from " INT64PF " to " INT64PF " bytes"
- " failed with error %d",
- node->name, start_offset, len + start_offset,
- err);
- }
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- *success = FALSE;
- os_has_said_disk_full = TRUE;);
-
- if (*success) {
- os_has_said_disk_full = FALSE;
- start_page_no = size;
- }
- } else
-# else
- const ulint io_completion_type = is_sparse
- ? OS_FILE_READ : OS_FILE_WRITE;
-# endif
- if (is_sparse) {
- /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
- fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
- Do not shrink short ROW_FORMAT=COMPRESSED files. */
- off_t s = std::max(off_t(size - file_start_page_no)
- * off_t(page_size),
- off_t(FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
- *success = !ftruncate(node->handle, s);
- if (!*success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
- " from " INT64PF " to " INT64PF " bytes"
- " failed with error %d",
- node->name,
- os_offset_t(start_page_no - file_start_page_no)
- * page_size, os_offset_t(s), errno);
- } else {
- start_page_no = size;
- }
- } else {
- /* Extend at most 64 pages at a time */
- ulint buf_size = ut_min(64, size - start_page_no)
- * page_size;
- byte* buf2 = static_cast<byte*>(
- calloc(1, buf_size + page_size));
- *success = buf2 != NULL;
- if (!buf2) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF
- " bytes to extend file",
- buf_size + page_size);
- }
- byte* const buf = static_cast<byte*>(
- ut_align(buf2, page_size));
-
- while (*success && start_page_no < size) {
- ulint n_pages
- = ut_min(buf_size / page_size,
- size - start_page_no);
-
- os_offset_t offset = static_cast<os_offset_t>(
- start_page_no - file_start_page_no)
- * page_size;
-
- *success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
- node->name, node->handle, buf,
- offset, page_size * n_pages,
- page_size, node, NULL,
- space->id, NULL, 0);
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- *success = FALSE;
- os_has_said_disk_full = TRUE;);
-
- if (*success) {
- os_has_said_disk_full = FALSE;
- }
- /* Let us measure the size of the file
- to determine how much we were able to
- extend it */
- os_offset_t fsize = os_file_get_size(node->handle);
- ut_a(fsize != os_offset_t(-1));
+ *success = os_file_set_size(node->name, node->handle, new_size,
+ FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
- start_page_no = ulint(fsize / page_size)
- + file_start_page_no;
- }
- free(buf2);
+ DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+ *success = FALSE;
+ os_has_said_disk_full = TRUE;);
+
+ if (*success) {
+ os_has_said_disk_full = FALSE;
+ start_page_no = size;
}
-#endif
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
@@ -1204,7 +1072,7 @@ fil_space_extend_must_retry(
space->size += file_size - node->size;
node->size = file_size;
- fil_node_complete_io(node, fil_system, io_completion_type);
+ fil_node_complete_io(node, fil_system, OS_FILE_READ);
node->being_extended = FALSE;
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index f3c72576501..183f65bcbd8 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2575,7 +2575,16 @@ os_file_get_size(
#endif /* __WIN__ */
}
-/** Set the size of a newly created file.
+/** Extend a file.
+
+On Windows, extending a file allocates blocks for the file,
+unless the file is sparse.
+
+On Unix, we will extend the file with ftruncate(), if
+file needs to be sparse. Otherwise posix_fallocate() is used
+when available, and if not, binary zeroes are added to the end
+of file.
+
@param[in] name file name
@param[in] file file handle
@param[in] size desired file size
@@ -2626,15 +2635,21 @@ os_file_set_size(
"file %s failed with error %d",
size, name, err);
}
+ /* Set errno because posix_fallocate() does not do it.*/
+ errno = err;
return(!err);
}
# endif
+ os_offset_t current_size = os_file_get_size(file);
+
+ if (current_size >= size) {
+ return true;
+ }
+
/* Write up to 1 megabyte at a time. */
ulint buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
- os_offset_t current_size = 0;
-
byte* buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
if (!buf2) {
diff --git a/strings/json_lib.c b/strings/json_lib.c
index 3df2ecd4768..cf99afd6f7b 100644
--- a/strings/json_lib.c
+++ b/strings/json_lib.c
@@ -1043,7 +1043,7 @@ static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
/* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
-/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
+/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
diff --git a/unittest/mysys/ma_dyncol-t.c b/unittest/mysys/ma_dyncol-t.c
index 3b43c10a6a8..124f16e15be 100644
--- a/unittest/mysys/ma_dyncol-t.c
+++ b/unittest/mysys/ma_dyncol-t.c
@@ -124,7 +124,7 @@ void test_value_single_double(double num, const char *name)
if (mariadb_dyncol_get_num(&str, 1, &res))
goto err;
rc= (res.type == DYN_COL_DOUBLE) && (res.x.double_value == num);
- num= res.x.ulong_value;
+ num= res.x.double_value;
err:
ok(rc, "%s - %lf", name, num);
/* cleanup */
diff --git a/unittest/mysys/my_getopt-t.c b/unittest/mysys/my_getopt-t.c
index 39814d76690..3e16d79424e 100644
--- a/unittest/mysys/my_getopt-t.c
+++ b/unittest/mysys/my_getopt-t.c
@@ -72,7 +72,7 @@ void run(const char *arg, ...)
arg= va_arg(ap, char*);
}
va_end(ap);
- arg_c= arg_v - arg_s;
+ arg_c= (int)(arg_v - arg_s);
arg_v= arg_s;
res= handle_options(&arg_c, &arg_v, mopts_options, 0);
}