diff options
author | Sergei Petrunia <psergey@askmonty.org> | 2017-02-06 17:39:08 +0000 |
---|---|---|
committer | Sergei Petrunia <psergey@askmonty.org> | 2017-02-06 17:39:08 +0000 |
commit | 7468ccfadf27a0c5f87c8909ee8514bdd52dc4a7 (patch) | |
tree | b77bc71584e1fe8d727b04a1577dd612da7b9fdd | |
parent | cfb59f3196aac1b41cdda79952031dcc64042914 (diff) | |
download | mariadb-git-7468ccfadf27a0c5f87c8909ee8514bdd52dc4a7.tar.gz |
Copy of
commit d1bb19b8f751875472211312c8e810143a7ba4b6
Author: Manuel Ung <mung@fb.com>
Date: Fri Feb 3 11:50:34 2017 -0800
Add cardinality stats to information schema
Summary: This adds cardinality stats to the INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP table. This is the only missing user collected properties from SST files that we don't expose, which is useful for debugging cardinality bugs.
Reviewed By: hermanlee
Differential Revision: D4509156
fbshipit-source-id: 2d3918a
82 files changed, 9303 insertions, 10585 deletions
diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc index e4338f07986..04c433acabe 100644 --- a/storage/rocksdb/event_listener.cc +++ b/storage/rocksdb/event_listener.cc @@ -22,8 +22,8 @@ #include <vector> /* MySQL includes */ -#include <mysql/plugin.h> #include <my_global.h> +#include <mysql/plugin.h> /* MyRocks includes */ #include "./ha_rocksdb.h" @@ -33,10 +33,8 @@ namespace myrocks { static std::vector<Rdb_index_stats> -extract_index_stats( - const std::vector<std::string>& files, - const rocksdb::TablePropertiesCollection& props -) { +extract_index_stats(const std::vector<std::string> &files, + const rocksdb::TablePropertiesCollection &props) { std::vector<Rdb_index_stats> ret; for (auto fn : files) { const auto it = props.find(fn); @@ -49,11 +47,10 @@ extract_index_stats( } void Rdb_event_listener::update_index_stats( - const rocksdb::TableProperties& props -) { + const rocksdb::TableProperties &props) { DBUG_ASSERT(m_ddl_manager != nullptr); const auto tbl_props = - std::make_shared<const rocksdb::TableProperties>(props); + std::make_shared<const rocksdb::TableProperties>(props); std::vector<Rdb_index_stats> stats; Rdb_tbl_prop_coll::read_stats_from_tbl_props(tbl_props, &stats); @@ -62,32 +59,26 @@ void Rdb_event_listener::update_index_stats( } void Rdb_event_listener::OnCompactionCompleted( - rocksdb::DB *db, - const rocksdb::CompactionJobInfo& ci -) { + rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) { DBUG_ASSERT(db != nullptr); DBUG_ASSERT(m_ddl_manager != nullptr); if (ci.status.ok()) { m_ddl_manager->adjust_stats( - extract_index_stats(ci.output_files, ci.table_properties), - extract_index_stats(ci.input_files, ci.table_properties)); + extract_index_stats(ci.output_files, ci.table_properties), + extract_index_stats(ci.input_files, ci.table_properties)); } } void Rdb_event_listener::OnFlushCompleted( - rocksdb::DB* db, - const rocksdb::FlushJobInfo& flush_job_info -) { + rocksdb::DB *db, const rocksdb::FlushJobInfo &flush_job_info) { DBUG_ASSERT(db != nullptr); update_index_stats(flush_job_info.table_properties); } void Rdb_event_listener::OnExternalFileIngested( - rocksdb::DB* db, - const rocksdb::ExternalFileIngestionInfo& info -) { + rocksdb::DB *db, const rocksdb::ExternalFileIngestionInfo &info) { DBUG_ASSERT(db != nullptr); update_index_stats(info.table_properties); } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h index 8895da45c7b..d535031644b 100644 --- a/storage/rocksdb/event_listener.h +++ b/storage/rocksdb/event_listener.h @@ -21,29 +21,26 @@ namespace myrocks { class Rdb_ddl_manager; -class Rdb_event_listener : public rocksdb::EventListener -{ - public: - Rdb_event_listener(const Rdb_event_listener&) = delete; - Rdb_event_listener& operator=(const Rdb_event_listener&) = delete; - - explicit Rdb_event_listener(Rdb_ddl_manager* const ddl_manager) : - m_ddl_manager(ddl_manager) { - } - - void OnCompactionCompleted( - rocksdb::DB* db, const rocksdb::CompactionJobInfo& ci) override; - void OnFlushCompleted( - rocksdb::DB* db, const rocksdb::FlushJobInfo& flush_job_info) override; +class Rdb_event_listener : public rocksdb::EventListener { +public: + Rdb_event_listener(const Rdb_event_listener &) = delete; + Rdb_event_listener &operator=(const Rdb_event_listener &) = delete; + + explicit Rdb_event_listener(Rdb_ddl_manager *const ddl_manager) + : m_ddl_manager(ddl_manager) {} + + void OnCompactionCompleted(rocksdb::DB *db, + const rocksdb::CompactionJobInfo &ci) override; + void OnFlushCompleted(rocksdb::DB *db, + const rocksdb::FlushJobInfo &flush_job_info) override; void OnExternalFileIngested( - rocksdb::DB* db, const rocksdb::ExternalFileIngestionInfo& ingestion_info) - override; + rocksdb::DB *db, + const rocksdb::ExternalFileIngestionInfo &ingestion_info) override; - private: - Rdb_ddl_manager* m_ddl_manager; +private: + Rdb_ddl_manager *m_ddl_manager; - void update_index_stats( - const rocksdb::TableProperties& props); + void update_index_stats(const rocksdb::TableProperties &props); }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index a557f96b92f..59c6e1ab33b 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif #define MYSQL_SERVER 1 @@ -24,11 +24,11 @@ #include "./ha_rocksdb.h" /* C++ standard header files */ -#include <set> +#include <algorithm> #include <queue> +#include <set> #include <string> #include <vector> -#include <algorithm> /* MySQL includes */ #include "./debug_sync.h" @@ -36,12 +36,13 @@ #include "./my_stacktrace.h" #include "./sql_audit.h" #include "./sql_table.h" -#include <mysys_err.h> #include <mysql/psi/mysql_table.h> #include <mysql/thread_pool_priv.h> +#include <mysys_err.h> /* RocksDB includes */ #include "rocksdb/compaction_filter.h" +#include "rocksdb/persistent_cache.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" #include "rocksdb/utilities/checkpoint.h" @@ -61,8 +62,7 @@ #include "./rdb_threads.h" // Internal MySQL APIs not exposed in any header. -extern "C" -{ +extern "C" { /** Mark transaction to rollback and mark error as fatal to a sub-statement. @param thd Thread handle @@ -107,17 +107,15 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton, my_core::TABLE_SHARE *table_arg, my_core::MEM_ROOT *mem_root); -bool can_use_bloom_filter(THD *thd, - const Rdb_key_def& kd, +bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, - const bool use_all_keys, - bool is_ascending); + const bool use_all_keys, bool is_ascending); /////////////////////////////////////////////////////////// // Parameters and settings /////////////////////////////////////////////////////////// -static char * rocksdb_default_cf_options; -static char * rocksdb_override_cf_options; +static char *rocksdb_default_cf_options; +static char *rocksdb_override_cf_options; Rdb_cf_options rocksdb_cf_options_map; /////////////////////////////////////////////////////////// @@ -125,20 +123,18 @@ Rdb_cf_options rocksdb_cf_options_map; /////////////////////////////////////////////////////////// handlerton *rocksdb_hton; -rocksdb::TransactionDB *rdb= nullptr; +rocksdb::TransactionDB *rdb = nullptr; static std::shared_ptr<rocksdb::Statistics> rocksdb_stats; static std::unique_ptr<rocksdb::Env> flashcache_aware_env; -static std::shared_ptr<Rdb_tbl_prop_coll_factory> - properties_collector_factory; +static std::shared_ptr<Rdb_tbl_prop_coll_factory> properties_collector_factory; Rdb_dict_manager dict_manager; Rdb_cf_manager cf_manager; Rdb_ddl_manager ddl_manager; -const char* m_mysql_gtid; +const char *m_mysql_gtid; Rdb_binlog_manager binlog_manager; - /** MyRocks background thread control N.B. This is besides RocksDB's own background threads @@ -147,127 +143,107 @@ Rdb_binlog_manager binlog_manager; static Rdb_background_thread rdb_bg_thread; - // List of table names (using regex) that are exceptions to the strict // collation check requirement. Regex_list_handler *rdb_collation_exceptions; -static const char* const ERRSTR_ROLLBACK_ONLY - = "This transaction was rolled back and cannot be " +static const char *const ERRSTR_ROLLBACK_ONLY = + "This transaction was rolled back and cannot be " "committed. Only supported operation is to roll it back, " "so all pending changes will be discarded. " "Please restart another transaction."; - -static void -rocksdb_flush_all_memtables() -{ - const Rdb_cf_manager& cf_manager= rdb_get_cf_manager(); +static void rocksdb_flush_all_memtables() { + const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); for (const auto &cf_handle : cf_manager.get_all_cf()) { rdb->Flush(rocksdb::FlushOptions(), cf_handle); } } -static void -rocksdb_compact_column_family_stub(THD* const thd, - struct st_mysql_sys_var* const var, - void* const var_ptr, - const void* const save) -{ -} +static void rocksdb_compact_column_family_stub( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) {} -static int -rocksdb_compact_column_family(THD* const thd, - struct st_mysql_sys_var* const var, - void* const var_ptr, - struct st_mysql_value* const value) -{ +static int rocksdb_compact_column_family(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + struct st_mysql_value *const value) { char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); + int len = sizeof(buff); DBUG_ASSERT(value != nullptr); - if (const char* const cf = value->val_str(value, buff, &len)) { + if (const char *const cf = value->val_str(value, buff, &len)) { bool is_automatic; auto cfh = cf_manager.get_cf(cf, "", nullptr, &is_automatic); if (cfh != nullptr && rdb != nullptr) { - sql_print_information("RocksDB: Manual compaction of column family: %s\n", cf); + sql_print_information("RocksDB: Manual compaction of column family: %s\n", + cf); rdb->CompactRange(rocksdb::CompactRangeOptions(), cfh, nullptr, nullptr); } } - return 0; + return HA_EXIT_SUCCESS; } /////////////////////////////////////////////////////////// // Hash map: table name => open table handler /////////////////////////////////////////////////////////// -namespace // anonymous namespace = not visible outside this source file +namespace // anonymous namespace = not visible outside this source file { -const ulong TABLE_HASH_SIZE= 32; +const ulong TABLE_HASH_SIZE = 32; -struct Rdb_open_tables_map -{ +struct Rdb_open_tables_map { /* Hash table used to track the handlers of open tables */ - my_core::HASH m_hash; + my_core::HASH m_hash; /* The mutex used to protect the hash table */ mutable mysql_mutex_t m_mutex; - void init_hash(void) - { - (void) my_hash_init(&m_hash, my_core::system_charset_info, TABLE_HASH_SIZE, - 0, 0, - (my_hash_get_key) Rdb_open_tables_map::get_hash_key, - 0, 0); + void init_hash(void) { + (void)my_hash_init(&m_hash, my_core::system_charset_info, TABLE_HASH_SIZE, + 0, 0, (my_hash_get_key)Rdb_open_tables_map::get_hash_key, + 0, 0); } - void free_hash(void) - { - my_hash_free(&m_hash); - } + void free_hash(void) { my_hash_free(&m_hash); } - static uchar* get_hash_key(Rdb_table_handler* const table_handler, - size_t* const length, - my_bool not_used __attribute__((__unused__))); + static uchar *get_hash_key(Rdb_table_handler *const table_handler, + size_t *const length, + my_bool not_used MY_ATTRIBUTE((__unused__))); - Rdb_table_handler* get_table_handler(const char* const table_name); - void release_table_handler(Rdb_table_handler* const table_handler); + Rdb_table_handler *get_table_handler(const char *const table_name); + void release_table_handler(Rdb_table_handler *const table_handler); std::vector<std::string> get_table_names(void) const; }; -} // anonymous namespace +} // anonymous namespace static Rdb_open_tables_map rdb_open_tables; - -static std::string rdb_normalize_dir(std::string dir) -{ - while (dir.size() > 0 && dir.back() == '/') - { +static std::string rdb_normalize_dir(std::string dir) { + while (dir.size() > 0 && dir.back() == '/') { dir.resize(dir.size() - 1); } return dir; } - static int rocksdb_create_checkpoint( - THD* const thd __attribute__((__unused__)), - struct st_mysql_sys_var* const var __attribute__((__unused__)), - void* const save __attribute__((__unused__)), - struct st_mysql_value* const value) -{ + THD *const thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const save MY_ATTRIBUTE((__unused__)), + struct st_mysql_value *const value) { char buf[FN_REFLEN]; int len = sizeof(buf); - const char* const checkpoint_dir_raw= value->val_str(value, buf, &len); + const char *const checkpoint_dir_raw = value->val_str(value, buf, &len); if (checkpoint_dir_raw) { if (rdb != nullptr) { - std::string checkpoint_dir= rdb_normalize_dir(checkpoint_dir_raw); + std::string checkpoint_dir = rdb_normalize_dir(checkpoint_dir_raw); // NO_LINT_DEBUG sql_print_information("RocksDB: creating checkpoint in directory : %s\n", - checkpoint_dir.c_str()); - rocksdb::Checkpoint* checkpoint; + checkpoint_dir.c_str()); + rocksdb::Checkpoint *checkpoint; auto status = rocksdb::Checkpoint::Create(rdb, &checkpoint); if (status.ok()) { status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str()); @@ -284,613 +260,612 @@ static int rocksdb_create_checkpoint( delete checkpoint; } else { const std::string err_text(status.ToString()); - my_printf_error(ER_UNKNOWN_ERROR, - "RocksDB: failed to initialize checkpoint. status %d %s\n", - MYF(0), status.code(), err_text.c_str()); + my_printf_error( + ER_UNKNOWN_ERROR, + "RocksDB: failed to initialize checkpoint. status %d %s\n", MYF(0), + status.code(), err_text.c_str()); } return status.code(); - } + } } return HA_ERR_INTERNAL_ERROR; } /* This method is needed to indicate that the ROCKSDB_CREATE_CHECKPOINT command is not read-only */ -static void -rocksdb_create_checkpoint_stub(THD* const thd, - struct st_mysql_sys_var* const var, - void* const var_ptr, - const void* const save) -{ -} - -static void -rocksdb_force_flush_memtable_now_stub(THD* const thd, - struct st_mysql_sys_var* const var, - void* const var_ptr, - const void* const save) -{ -} - -static int -rocksdb_force_flush_memtable_now(THD* const thd, - struct st_mysql_sys_var* const var, - void* const var_ptr, - struct st_mysql_value* const value) -{ +static void rocksdb_create_checkpoint_stub(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) {} + +static void rocksdb_force_flush_memtable_now_stub( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) {} + +static int rocksdb_force_flush_memtable_now( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + struct st_mysql_value *const value) { sql_print_information("RocksDB: Manual memtable flush\n"); rocksdb_flush_all_memtables(); - return 0; + return HA_EXIT_SUCCESS; } static void rocksdb_drop_index_wakeup_thread( - my_core::THD* const thd __attribute__((__unused__)), - struct st_mysql_sys_var* const var __attribute__((__unused__)), - void* const var_ptr __attribute__((__unused__)), - const void* const save); + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save); -static my_bool rocksdb_pause_background_work= 0; +static my_bool rocksdb_pause_background_work = 0; static mysql_mutex_t rdb_sysvars_mutex; static void rocksdb_set_pause_background_work( - my_core::THD* const thd __attribute__((__unused__)), - struct st_mysql_sys_var* const var __attribute__((__unused__)), - void* const var_ptr __attribute__((__unused__)), - const void* const save) -{ + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { mysql_mutex_lock(&rdb_sysvars_mutex); - const bool pause_requested= *static_cast<const bool*>(save); + const bool pause_requested = *static_cast<const bool *>(save); if (rocksdb_pause_background_work != pause_requested) { if (pause_requested) { rdb->PauseBackgroundWork(); } else { rdb->ContinueBackgroundWork(); } - rocksdb_pause_background_work= pause_requested; + rocksdb_pause_background_work = pause_requested; } mysql_mutex_unlock(&rdb_sysvars_mutex); } -static void -rocksdb_set_compaction_options(THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save); +static void rocksdb_set_compaction_options(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); -static void -rocksdb_set_table_stats_sampling_pct(THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save); +static void rocksdb_set_table_stats_sampling_pct(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, + const void *save); -static void -rocksdb_set_rate_limiter_bytes_per_sec(THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save); +static void rocksdb_set_rate_limiter_bytes_per_sec(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, + const void *save); static void rdb_set_collation_exception_list(const char *exception_list); -static void -rocksdb_set_collation_exception_list(THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save); +static void rocksdb_set_collation_exception_list(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, + const void *save); static void -rocksdb_set_bulk_load(THD* thd, - struct st_mysql_sys_var* var __attribute__((__unused__)), - void* var_ptr, - const void* save); +rocksdb_set_bulk_load(THD *thd, + struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *var_ptr, const void *save); + +static void rocksdb_set_max_background_compactions( + THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save); ////////////////////////////////////////////////////////////////////////////// // Options definitions ////////////////////////////////////////////////////////////////////////////// static long long rocksdb_block_cache_size; /* Use unsigned long long instead of uint64_t because of MySQL compatibility */ -static unsigned long long // NOLINT(runtime/int) +static unsigned long long // NOLINT(runtime/int) rocksdb_rate_limiter_bytes_per_sec; +static unsigned long // NOLINT(runtime/int) + rocksdb_persistent_cache_size; static uint64_t rocksdb_info_log_level; -static char * rocksdb_wal_dir; +static char *rocksdb_wal_dir; +static char *rocksdb_persistent_cache_path; static uint64_t rocksdb_index_type; static char rocksdb_background_sync; static uint32_t rocksdb_debug_optimizer_n_rows; static my_bool rocksdb_debug_optimizer_no_zero_cardinality; static uint32_t rocksdb_wal_recovery_mode; static uint32_t rocksdb_access_hint_on_compaction_start; -static char * rocksdb_compact_cf_name; -static char * rocksdb_checkpoint_name; +static char *rocksdb_compact_cf_name; +static char *rocksdb_checkpoint_name; static my_bool rocksdb_signal_drop_index_thread; -static my_bool rocksdb_strict_collation_check= 1; -static my_bool rocksdb_disable_2pc= 0; -static char * rocksdb_strict_collation_exceptions; -static my_bool rocksdb_collect_sst_properties= 1; -static my_bool rocksdb_force_flush_memtable_now_var= 0; -static uint64_t rocksdb_number_stat_computes= 0; -static uint32_t rocksdb_seconds_between_stat_computes= 3600; -static long long rocksdb_compaction_sequential_deletes= 0l; -static long long rocksdb_compaction_sequential_deletes_window= 0l; -static long long rocksdb_compaction_sequential_deletes_file_size= 0l; +static my_bool rocksdb_strict_collation_check = 1; +static my_bool rocksdb_enable_2pc = 0; +static char *rocksdb_strict_collation_exceptions; +static my_bool rocksdb_collect_sst_properties = 1; +static my_bool rocksdb_force_flush_memtable_now_var = 0; +static uint64_t rocksdb_number_stat_computes = 0; +static uint32_t rocksdb_seconds_between_stat_computes = 3600; +static long long rocksdb_compaction_sequential_deletes = 0l; +static long long rocksdb_compaction_sequential_deletes_window = 0l; +static long long rocksdb_compaction_sequential_deletes_file_size = 0l; static uint32_t rocksdb_validate_tables = 1; -static char * rocksdb_datadir; +static char *rocksdb_datadir; static uint32_t rocksdb_table_stats_sampling_pct; -static my_bool rocksdb_enable_bulk_load_api= 1; -static my_bool rpl_skip_tx_api_var= 0; -static my_bool rocksdb_print_snapshot_conflict_queries= 0; +static my_bool rocksdb_enable_bulk_load_api = 1; +static my_bool rocksdb_print_snapshot_conflict_queries = 0; std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0); std::atomic<uint64_t> rocksdb_wal_group_syncs(0); -static rocksdb::DBOptions rdb_init_rocksdb_db_options(void) -{ +static rocksdb::DBOptions rdb_init_rocksdb_db_options(void) { rocksdb::DBOptions o; - o.create_if_missing= true; + o.create_if_missing = true; o.listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager)); - o.info_log_level= rocksdb::InfoLogLevel::INFO_LEVEL; - o.max_subcompactions= DEFAULT_SUBCOMPACTIONS; + o.info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL; + o.max_subcompactions = DEFAULT_SUBCOMPACTIONS; return o; } -static rocksdb::DBOptions rocksdb_db_options= rdb_init_rocksdb_db_options(); +static rocksdb::DBOptions rocksdb_db_options = rdb_init_rocksdb_db_options(); static rocksdb::BlockBasedTableOptions rocksdb_tbl_options; static std::shared_ptr<rocksdb::RateLimiter> rocksdb_rate_limiter; /* This enum needs to be kept up to date with rocksdb::InfoLogLevel */ -static const char* info_log_level_names[] = { - "debug_level", - "info_level", - "warn_level", - "error_level", - "fatal_level", - NullS -}; +static const char *info_log_level_names[] = {"debug_level", "info_level", + "warn_level", "error_level", + "fatal_level", NullS}; static TYPELIB info_log_level_typelib = { - array_elements(info_log_level_names) - 1, - "info_log_level_typelib", - info_log_level_names, - nullptr -}; + array_elements(info_log_level_names) - 1, "info_log_level_typelib", + info_log_level_names, nullptr}; -static void -rocksdb_set_rocksdb_info_log_level(THD* const thd, - struct st_mysql_sys_var* const var, - void* const var_ptr, - const void* const save) -{ +static void rocksdb_set_rocksdb_info_log_level( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) { DBUG_ASSERT(save != nullptr); mysql_mutex_lock(&rdb_sysvars_mutex); - rocksdb_info_log_level = *static_cast<const uint64_t*>(save); + rocksdb_info_log_level = *static_cast<const uint64_t *>(save); rocksdb_db_options.info_log->SetInfoLogLevel( static_cast<const rocksdb::InfoLogLevel>(rocksdb_info_log_level)); mysql_mutex_unlock(&rdb_sysvars_mutex); } -static const char* index_type_names[] = { - "kBinarySearch", - "kHashSearch", - NullS -}; +static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS}; -static TYPELIB index_type_typelib = { - array_elements(index_type_names) - 1, - "index_type_typelib", - index_type_names, - nullptr -}; +static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1, + "index_type_typelib", index_type_names, + nullptr}; + +const ulong RDB_MAX_LOCK_WAIT_SECONDS = 1024 * 1024 * 1024; +const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024 * 1024; +const ulong RDB_DEFAULT_BULK_LOAD_SIZE = 1000; +const ulong RDB_MAX_BULK_LOAD_SIZE = 1024 * 1024 * 1024; +const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024; +const size_t RDB_MIN_MERGE_BUF_SIZE = 100; +const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE = 1024 * 1024 * 1024; +const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE = 100; +const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE = 512 * 1024 * 1024; +const int64 RDB_MIN_BLOCK_CACHE_SIZE = 1024; +const int RDB_MAX_CHECKSUMS_PCT = 100; -const ulong RDB_MAX_LOCK_WAIT_SECONDS= 1024*1024*1024; -const ulong RDB_MAX_ROW_LOCKS= 1024*1024*1024; -const ulong RDB_DEFAULT_BULK_LOAD_SIZE= 1000; -const ulong RDB_MAX_BULK_LOAD_SIZE= 1024*1024*1024; -const size_t RDB_DEFAULT_MERGE_BUF_SIZE= 64*1024*1024; -const size_t RDB_MIN_MERGE_BUF_SIZE= 100; -const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE= 1024*1024*1024; -const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE= 100; -const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE= 512*1024*1024; -const int64 RDB_MIN_BLOCK_CACHE_SIZE= 1024; -const int RDB_MAX_CHECKSUMS_PCT= 100; - -//TODO: 0 means don't wait at all, and we don't support it yet? +// TODO: 0 means don't wait at all, and we don't support it yet? static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, - "Number of seconds to wait for lock", - nullptr, nullptr, /*default*/ 1, /*min*/ 1, - /*max*/ RDB_MAX_LOCK_WAIT_SECONDS, 0); + "Number of seconds to wait for lock", nullptr, + nullptr, /*default*/ 1, /*min*/ 1, + /*max*/ RDB_MAX_LOCK_WAIT_SECONDS, 0); static MYSQL_THDVAR_BOOL(deadlock_detect, PLUGIN_VAR_RQCMDARG, - "Enables deadlock detection", nullptr, nullptr, FALSE); - -static MYSQL_THDVAR_BOOL(trace_sst_api, PLUGIN_VAR_RQCMDARG, - "Generate trace output in the log for each call to the SstFileWriter", - nullptr, nullptr, FALSE); - -static MYSQL_THDVAR_BOOL(bulk_load, PLUGIN_VAR_RQCMDARG, - "Use bulk-load mode for inserts. This enables both " - "rocksdb_skip_unique_check and rocksdb_commit_in_the_middle.", - nullptr, rocksdb_set_bulk_load, FALSE); - -static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, - rocksdb_enable_bulk_load_api, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Enables using SstFileWriter for bulk loading", - nullptr, nullptr, rocksdb_enable_bulk_load_api); - -static MYSQL_THDVAR_STR(tmpdir, - PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC, - "Directory for temporary files during DDL operations.", - nullptr, nullptr, ""); - -static MYSQL_THDVAR_STR(skip_unique_check_tables, - PLUGIN_VAR_RQCMDARG|PLUGIN_VAR_MEMALLOC, - "Skip unique constraint checking for the specified tables", nullptr, nullptr, - ".*"); - -static MYSQL_THDVAR_BOOL(skip_unique_check, PLUGIN_VAR_RQCMDARG, - "Skip unique constraint checking for all tables", nullptr, nullptr, FALSE); - -static MYSQL_THDVAR_BOOL(commit_in_the_middle, PLUGIN_VAR_RQCMDARG, - "Commit rows implicitly every rocksdb_bulk_load_size, on bulk load/insert, " - "update and delete", - nullptr, nullptr, FALSE); - -static MYSQL_THDVAR_STR(read_free_rpl_tables, - PLUGIN_VAR_RQCMDARG|PLUGIN_VAR_MEMALLOC, - "List of tables that will use read-free replication on the slave " - "(i.e. not lookup a row during replication)", nullptr, nullptr, ""); + "Enables deadlock detection", nullptr, nullptr, FALSE); -static MYSQL_SYSVAR_BOOL( - rpl_skip_tx_api, - rpl_skip_tx_api_var, - PLUGIN_VAR_RQCMDARG, - "Use write batches for replication thread instead of tx api", nullptr, - nullptr, FALSE); +static MYSQL_THDVAR_BOOL( + trace_sst_api, PLUGIN_VAR_RQCMDARG, + "Generate trace output in the log for each call to the SstFileWriter", + nullptr, nullptr, FALSE); + +static MYSQL_THDVAR_BOOL( + bulk_load, PLUGIN_VAR_RQCMDARG, + "Use bulk-load mode for inserts. This disables " + "unique_checks and enables rocksdb_commit_in_the_middle.", + nullptr, rocksdb_set_bulk_load, FALSE); + +static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Enables using SstFileWriter for bulk loading", + nullptr, nullptr, rocksdb_enable_bulk_load_api); + +static MYSQL_THDVAR_STR(tmpdir, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC, + "Directory for temporary files during DDL operations.", + nullptr, nullptr, ""); + +static MYSQL_THDVAR_STR( + skip_unique_check_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "Skip unique constraint checking for the specified tables", nullptr, + nullptr, ".*"); + +static MYSQL_THDVAR_BOOL( + commit_in_the_middle, PLUGIN_VAR_RQCMDARG, + "Commit rows implicitly every rocksdb_bulk_load_size, on bulk load/insert, " + "update and delete", + nullptr, nullptr, FALSE); + +static MYSQL_THDVAR_STR( + read_free_rpl_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "List of tables that will use read-free replication on the slave " + "(i.e. not lookup a row during replication)", + nullptr, nullptr, ""); static MYSQL_THDVAR_BOOL(skip_bloom_filter_on_read, PLUGIN_VAR_RQCMDARG, - "Skip using bloom filter for reads", nullptr, nullptr, FALSE); + "Skip using bloom filter for reads", nullptr, nullptr, + FALSE); static MYSQL_THDVAR_ULONG(max_row_locks, PLUGIN_VAR_RQCMDARG, - "Maximum number of locks a transaction can have", - nullptr, nullptr, - /*default*/ RDB_MAX_ROW_LOCKS, - /*min*/ 1, - /*max*/ RDB_MAX_ROW_LOCKS, 0); + "Maximum number of locks a transaction can have", + nullptr, nullptr, + /*default*/ RDB_MAX_ROW_LOCKS, + /*min*/ 1, + /*max*/ RDB_MAX_ROW_LOCKS, 0); -static MYSQL_THDVAR_BOOL(lock_scanned_rows, PLUGIN_VAR_RQCMDARG, - "Take and hold locks on rows that are scanned but not updated", - nullptr, nullptr, FALSE); +static MYSQL_THDVAR_BOOL( + lock_scanned_rows, PLUGIN_VAR_RQCMDARG, + "Take and hold locks on rows that are scanned but not updated", nullptr, + nullptr, FALSE); static MYSQL_THDVAR_ULONG(bulk_load_size, PLUGIN_VAR_RQCMDARG, - "Max #records in a batch for bulk-load mode", - nullptr, nullptr, - /*default*/ RDB_DEFAULT_BULK_LOAD_SIZE, - /*min*/ 1, - /*max*/ RDB_MAX_BULK_LOAD_SIZE, 0); - -static MYSQL_THDVAR_ULONGLONG(merge_buf_size, PLUGIN_VAR_RQCMDARG, - "Size to allocate for merge sort buffers written out to disk " - "during inplace index creation.", - nullptr, nullptr, - /* default (64MB) */ RDB_DEFAULT_MERGE_BUF_SIZE, - /* min (100B) */ RDB_MIN_MERGE_BUF_SIZE, - /* max */ SIZE_T_MAX, 1); - -static MYSQL_THDVAR_ULONGLONG(merge_combine_read_size, PLUGIN_VAR_RQCMDARG, - "Size that we have to work with during combine (reading from disk) phase of " - "external sort during fast index creation.", - nullptr, nullptr, - /* default (1GB) */ RDB_DEFAULT_MERGE_COMBINE_READ_SIZE, - /* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE, - /* max */ SIZE_T_MAX, 1); - -static MYSQL_SYSVAR_BOOL(create_if_missing, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.create_if_missing), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::create_if_missing for RocksDB", - nullptr, nullptr, rocksdb_db_options.create_if_missing); - -static MYSQL_SYSVAR_BOOL(create_missing_column_families, - *reinterpret_cast<my_bool*>( - &rocksdb_db_options.create_missing_column_families), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::create_missing_column_families for RocksDB", - nullptr, nullptr, rocksdb_db_options.create_missing_column_families); - -static MYSQL_SYSVAR_BOOL(error_if_exists, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.error_if_exists), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::error_if_exists for RocksDB", - nullptr, nullptr, rocksdb_db_options.error_if_exists); - -static MYSQL_SYSVAR_BOOL(paranoid_checks, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.paranoid_checks), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::paranoid_checks for RocksDB", - nullptr, nullptr, rocksdb_db_options.paranoid_checks); - -static MYSQL_SYSVAR_ULONGLONG(rate_limiter_bytes_per_sec, - rocksdb_rate_limiter_bytes_per_sec, - PLUGIN_VAR_RQCMDARG, - "DBOptions::rate_limiter bytes_per_sec for RocksDB", - nullptr, rocksdb_set_rate_limiter_bytes_per_sec, /* default */ 0L, - /* min */ 0L, /* max */ MAX_RATE_LIMITER_BYTES_PER_SEC, 0); - -static MYSQL_SYSVAR_ENUM(info_log_level, - rocksdb_info_log_level, - PLUGIN_VAR_RQCMDARG, - "Filter level for info logs to be written mysqld error log. " - "Valid values include 'debug_level', 'info_level', 'warn_level'" - "'error_level' and 'fatal_level'.", - nullptr, rocksdb_set_rocksdb_info_log_level, - rocksdb::InfoLogLevel::ERROR_LEVEL, &info_log_level_typelib); - -static MYSQL_THDVAR_INT(perf_context_level, - PLUGIN_VAR_RQCMDARG, - "Perf Context Level for rocksdb internal timer stat collection", - nullptr, nullptr, - /* default */ rocksdb::PerfLevel::kUninitialized, - /* min */ rocksdb::PerfLevel::kUninitialized, - /* max */ rocksdb::PerfLevel::kOutOfBounds - 1, 0); - -static MYSQL_SYSVAR_UINT(wal_recovery_mode, - rocksdb_wal_recovery_mode, - PLUGIN_VAR_RQCMDARG, - "DBOptions::wal_recovery_mode for RocksDB", - nullptr, nullptr, - /* default */ (uint) rocksdb::WALRecoveryMode::kPointInTimeRecovery, - /* min */ (uint) rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords, - /* max */ (uint) rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0); + "Max #records in a batch for bulk-load mode", nullptr, + nullptr, + /*default*/ RDB_DEFAULT_BULK_LOAD_SIZE, + /*min*/ 1, + /*max*/ RDB_MAX_BULK_LOAD_SIZE, 0); + +static MYSQL_THDVAR_ULONGLONG( + merge_buf_size, PLUGIN_VAR_RQCMDARG, + "Size to allocate for merge sort buffers written out to disk " + "during inplace index creation.", + nullptr, nullptr, + /* default (64MB) */ RDB_DEFAULT_MERGE_BUF_SIZE, + /* min (100B) */ RDB_MIN_MERGE_BUF_SIZE, + /* max */ SIZE_T_MAX, 1); + +static MYSQL_THDVAR_ULONGLONG( + merge_combine_read_size, PLUGIN_VAR_RQCMDARG, + "Size that we have to work with during combine (reading from disk) phase " + "of " + "external sort during fast index creation.", + nullptr, nullptr, + /* default (1GB) */ RDB_DEFAULT_MERGE_COMBINE_READ_SIZE, + /* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE, + /* max */ SIZE_T_MAX, 1); + +static MYSQL_SYSVAR_BOOL( + create_if_missing, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.create_if_missing), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::create_if_missing for RocksDB", nullptr, nullptr, + rocksdb_db_options.create_if_missing); + +static MYSQL_SYSVAR_BOOL( + create_missing_column_families, + *reinterpret_cast<my_bool *>( + &rocksdb_db_options.create_missing_column_families), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::create_missing_column_families for RocksDB", nullptr, nullptr, + rocksdb_db_options.create_missing_column_families); + +static MYSQL_SYSVAR_BOOL( + error_if_exists, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.error_if_exists), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::error_if_exists for RocksDB", nullptr, nullptr, + rocksdb_db_options.error_if_exists); + +static MYSQL_SYSVAR_BOOL( + paranoid_checks, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.paranoid_checks), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::paranoid_checks for RocksDB", nullptr, nullptr, + rocksdb_db_options.paranoid_checks); + +static MYSQL_SYSVAR_ULONGLONG( + rate_limiter_bytes_per_sec, rocksdb_rate_limiter_bytes_per_sec, + PLUGIN_VAR_RQCMDARG, "DBOptions::rate_limiter bytes_per_sec for RocksDB", + nullptr, rocksdb_set_rate_limiter_bytes_per_sec, /* default */ 0L, + /* min */ 0L, /* max */ MAX_RATE_LIMITER_BYTES_PER_SEC, 0); + +static MYSQL_SYSVAR_ENUM( + info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG, + "Filter level for info logs to be written mysqld error log. " + "Valid values include 'debug_level', 'info_level', 'warn_level'" + "'error_level' and 'fatal_level'.", + nullptr, rocksdb_set_rocksdb_info_log_level, + rocksdb::InfoLogLevel::ERROR_LEVEL, &info_log_level_typelib); + +static MYSQL_THDVAR_INT( + perf_context_level, PLUGIN_VAR_RQCMDARG, + "Perf Context Level for rocksdb internal timer stat collection", nullptr, + nullptr, + /* default */ rocksdb::PerfLevel::kUninitialized, + /* min */ rocksdb::PerfLevel::kUninitialized, + /* max */ rocksdb::PerfLevel::kOutOfBounds - 1, 0); + +static MYSQL_SYSVAR_UINT( + wal_recovery_mode, rocksdb_wal_recovery_mode, PLUGIN_VAR_RQCMDARG, + "DBOptions::wal_recovery_mode for RocksDB", nullptr, nullptr, + /* default */ (uint)rocksdb::WALRecoveryMode::kPointInTimeRecovery, + /* min */ (uint)rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords, + /* max */ (uint)rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0); static MYSQL_SYSVAR_ULONG(compaction_readahead_size, - rocksdb_db_options.compaction_readahead_size, - PLUGIN_VAR_RQCMDARG, - "DBOptions::compaction_readahead_size for RocksDB", - nullptr, nullptr, rocksdb_db_options.compaction_readahead_size, - /* min */ 0L, /* max */ ULONG_MAX, 0); - -static MYSQL_SYSVAR_BOOL(new_table_reader_for_compaction_inputs, - *reinterpret_cast<my_bool*> - (&rocksdb_db_options.new_table_reader_for_compaction_inputs), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::new_table_reader_for_compaction_inputs for RocksDB", - nullptr, nullptr, rocksdb_db_options.new_table_reader_for_compaction_inputs); - -static MYSQL_SYSVAR_UINT(access_hint_on_compaction_start, - rocksdb_access_hint_on_compaction_start, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::access_hint_on_compaction_start for RocksDB", - nullptr, nullptr, - /* default */ (uint) rocksdb::Options::AccessHint::NORMAL, - /* min */ (uint) rocksdb::Options::AccessHint::NONE, - /* max */ (uint) rocksdb::Options::AccessHint::WILLNEED, 0); - -static MYSQL_SYSVAR_BOOL(allow_concurrent_memtable_write, - *reinterpret_cast<my_bool*>( - &rocksdb_db_options.allow_concurrent_memtable_write), - PLUGIN_VAR_RQCMDARG, - "DBOptions::allow_concurrent_memtable_write for RocksDB", - nullptr, nullptr, false); - -static MYSQL_SYSVAR_BOOL(enable_write_thread_adaptive_yield, - *reinterpret_cast<my_bool*>( - &rocksdb_db_options.enable_write_thread_adaptive_yield), - PLUGIN_VAR_RQCMDARG, - "DBOptions::enable_write_thread_adaptive_yield for RocksDB", - nullptr, nullptr, false); - -static MYSQL_SYSVAR_INT(max_open_files, - rocksdb_db_options.max_open_files, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::max_open_files for RocksDB", - nullptr, nullptr, rocksdb_db_options.max_open_files, - /* min */ -1, /* max */ INT_MAX, 0); + rocksdb_db_options.compaction_readahead_size, + PLUGIN_VAR_RQCMDARG, + "DBOptions::compaction_readahead_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options.compaction_readahead_size, + /* min */ 0L, /* max */ ULONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + new_table_reader_for_compaction_inputs, + *reinterpret_cast<my_bool *>( + &rocksdb_db_options.new_table_reader_for_compaction_inputs), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::new_table_reader_for_compaction_inputs for RocksDB", nullptr, + nullptr, rocksdb_db_options.new_table_reader_for_compaction_inputs); + +static MYSQL_SYSVAR_UINT( + access_hint_on_compaction_start, rocksdb_access_hint_on_compaction_start, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::access_hint_on_compaction_start for RocksDB", nullptr, nullptr, + /* default */ (uint)rocksdb::Options::AccessHint::NORMAL, + /* min */ (uint)rocksdb::Options::AccessHint::NONE, + /* max */ (uint)rocksdb::Options::AccessHint::WILLNEED, 0); + +static MYSQL_SYSVAR_BOOL( + allow_concurrent_memtable_write, + *reinterpret_cast<my_bool *>( + &rocksdb_db_options.allow_concurrent_memtable_write), + PLUGIN_VAR_RQCMDARG, + "DBOptions::allow_concurrent_memtable_write for RocksDB", nullptr, nullptr, + false); + +static MYSQL_SYSVAR_BOOL( + enable_write_thread_adaptive_yield, + *reinterpret_cast<my_bool *>( + &rocksdb_db_options.enable_write_thread_adaptive_yield), + PLUGIN_VAR_RQCMDARG, + "DBOptions::enable_write_thread_adaptive_yield for RocksDB", nullptr, + nullptr, false); + +static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options.max_open_files, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_open_files for RocksDB", nullptr, + nullptr, rocksdb_db_options.max_open_files, + /* min */ -1, /* max */ INT_MAX, 0); static MYSQL_SYSVAR_ULONG(max_total_wal_size, - rocksdb_db_options.max_total_wal_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::max_total_wal_size for RocksDB", - nullptr, nullptr, rocksdb_db_options.max_total_wal_size, - /* min */ 0L, /* max */ LONG_MAX, 0); - -static MYSQL_SYSVAR_BOOL(disabledatasync, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.disableDataSync), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::disableDataSync for RocksDB", - nullptr, nullptr, rocksdb_db_options.disableDataSync); - -static MYSQL_SYSVAR_BOOL(use_fsync, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.use_fsync), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::use_fsync for RocksDB", - nullptr, nullptr, rocksdb_db_options.use_fsync); + rocksdb_db_options.max_total_wal_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_total_wal_size for RocksDB", nullptr, + nullptr, rocksdb_db_options.max_total_wal_size, + /* min */ 0L, /* max */ LONG_MAX, 0); -static MYSQL_SYSVAR_STR(wal_dir, rocksdb_wal_dir, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::wal_dir for RocksDB", - nullptr, nullptr, rocksdb_db_options.wal_dir.c_str()); +static MYSQL_SYSVAR_BOOL( + disabledatasync, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.disableDataSync), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::disableDataSync for RocksDB", nullptr, nullptr, + rocksdb_db_options.disableDataSync); -static MYSQL_SYSVAR_ULONG(delete_obsolete_files_period_micros, - rocksdb_db_options.delete_obsolete_files_period_micros, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::delete_obsolete_files_period_micros for RocksDB", - nullptr, nullptr, rocksdb_db_options.delete_obsolete_files_period_micros, - /* min */ 0L, /* max */ LONG_MAX, 0); +static MYSQL_SYSVAR_BOOL( + use_fsync, *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_fsync), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_fsync for RocksDB", nullptr, nullptr, + rocksdb_db_options.use_fsync); + +static MYSQL_SYSVAR_STR(wal_dir, rocksdb_wal_dir, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::wal_dir for RocksDB", nullptr, nullptr, + rocksdb_db_options.wal_dir.c_str()); + +static MYSQL_SYSVAR_STR( + persistent_cache_path, rocksdb_persistent_cache_path, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Path for BlockBasedTableOptions::persistent_cache for RocksDB", nullptr, + nullptr, ""); + +static MYSQL_SYSVAR_ULONG( + persistent_cache_size, rocksdb_persistent_cache_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of cache for BlockBasedTableOptions::persistent_cache for RocksDB", + nullptr, nullptr, rocksdb_persistent_cache_size, + /* min */ 0L, /* max */ ULONG_MAX, 0); + +static MYSQL_SYSVAR_ULONG( + delete_obsolete_files_period_micros, + rocksdb_db_options.delete_obsolete_files_period_micros, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::delete_obsolete_files_period_micros for RocksDB", nullptr, + nullptr, rocksdb_db_options.delete_obsolete_files_period_micros, + /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_INT(base_background_compactions, - rocksdb_db_options.base_background_compactions, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::base_background_compactions for RocksDB", - nullptr, nullptr, rocksdb_db_options.base_background_compactions, - /* min */ -1, /* max */ MAX_BACKGROUND_COMPACTIONS, 0); + rocksdb_db_options.base_background_compactions, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::base_background_compactions for RocksDB", + nullptr, nullptr, + rocksdb_db_options.base_background_compactions, + /* min */ -1, /* max */ MAX_BACKGROUND_COMPACTIONS, 0); static MYSQL_SYSVAR_INT(max_background_compactions, - rocksdb_db_options.max_background_compactions, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::max_background_compactions for RocksDB", - nullptr, nullptr, rocksdb_db_options.max_background_compactions, - /* min */ 1, /* max */ MAX_BACKGROUND_COMPACTIONS, 0); + rocksdb_db_options.max_background_compactions, + PLUGIN_VAR_RQCMDARG, + "DBOptions::max_background_compactions for RocksDB", + nullptr, rocksdb_set_max_background_compactions, + rocksdb_db_options.max_background_compactions, + /* min */ 1, /* max */ MAX_BACKGROUND_COMPACTIONS, 0); static MYSQL_SYSVAR_INT(max_background_flushes, - rocksdb_db_options.max_background_flushes, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::max_background_flushes for RocksDB", - nullptr, nullptr, rocksdb_db_options.max_background_flushes, - /* min */ 1, /* max */ MAX_BACKGROUND_FLUSHES, 0); + rocksdb_db_options.max_background_flushes, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_background_flushes for RocksDB", + nullptr, nullptr, + rocksdb_db_options.max_background_flushes, + /* min */ 1, /* max */ MAX_BACKGROUND_FLUSHES, 0); static MYSQL_SYSVAR_UINT(max_subcompactions, - rocksdb_db_options.max_subcompactions, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::max_subcompactions for RocksDB", - nullptr, nullptr, rocksdb_db_options.max_subcompactions, - /* min */ 1, /* max */ MAX_SUBCOMPACTIONS, 0); + rocksdb_db_options.max_subcompactions, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_subcompactions for RocksDB", nullptr, + nullptr, rocksdb_db_options.max_subcompactions, + /* min */ 1, /* max */ MAX_SUBCOMPACTIONS, 0); static MYSQL_SYSVAR_ULONG(max_log_file_size, - rocksdb_db_options.max_log_file_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::max_log_file_size for RocksDB", - nullptr, nullptr, rocksdb_db_options.max_log_file_size, - /* min */ 0L, /* max */ LONG_MAX, 0); + rocksdb_db_options.max_log_file_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_log_file_size for RocksDB", nullptr, + nullptr, rocksdb_db_options.max_log_file_size, + /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_ULONG(log_file_time_to_roll, - rocksdb_db_options.log_file_time_to_roll, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::log_file_time_to_roll for RocksDB", - nullptr, nullptr, rocksdb_db_options.log_file_time_to_roll, - /* min */ 0L, /* max */ LONG_MAX, 0); + rocksdb_db_options.log_file_time_to_roll, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::log_file_time_to_roll for RocksDB", + nullptr, nullptr, + rocksdb_db_options.log_file_time_to_roll, + /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_ULONG(keep_log_file_num, - rocksdb_db_options.keep_log_file_num, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::keep_log_file_num for RocksDB", - nullptr, nullptr, rocksdb_db_options.keep_log_file_num, - /* min */ 0L, /* max */ LONG_MAX, 0); + rocksdb_db_options.keep_log_file_num, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::keep_log_file_num for RocksDB", nullptr, + nullptr, rocksdb_db_options.keep_log_file_num, + /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_ULONG(max_manifest_file_size, - rocksdb_db_options.max_manifest_file_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::max_manifest_file_size for RocksDB", - nullptr, nullptr, rocksdb_db_options.max_manifest_file_size, - /* min */ 0L, /* max */ ULONG_MAX, 0); + rocksdb_db_options.max_manifest_file_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_manifest_file_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options.max_manifest_file_size, + /* min */ 0L, /* max */ ULONG_MAX, 0); static MYSQL_SYSVAR_INT(table_cache_numshardbits, - rocksdb_db_options.table_cache_numshardbits, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::table_cache_numshardbits for RocksDB", - nullptr, nullptr, rocksdb_db_options.table_cache_numshardbits, - /* min */ 0, /* max */ INT_MAX, 0); - -static MYSQL_SYSVAR_ULONG(wal_ttl_seconds, - rocksdb_db_options.WAL_ttl_seconds, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::WAL_ttl_seconds for RocksDB", - nullptr, nullptr, rocksdb_db_options.WAL_ttl_seconds, - /* min */ 0L, /* max */ LONG_MAX, 0); + rocksdb_db_options.table_cache_numshardbits, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::table_cache_numshardbits for RocksDB", + nullptr, nullptr, + rocksdb_db_options.table_cache_numshardbits, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_ULONG(wal_ttl_seconds, rocksdb_db_options.WAL_ttl_seconds, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::WAL_ttl_seconds for RocksDB", nullptr, + nullptr, rocksdb_db_options.WAL_ttl_seconds, + /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_ULONG(wal_size_limit_mb, - rocksdb_db_options.WAL_size_limit_MB, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::WAL_size_limit_MB for RocksDB", - nullptr, nullptr, rocksdb_db_options.WAL_size_limit_MB, - /* min */ 0L, /* max */ LONG_MAX, 0); + rocksdb_db_options.WAL_size_limit_MB, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::WAL_size_limit_MB for RocksDB", nullptr, + nullptr, rocksdb_db_options.WAL_size_limit_MB, + /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_ULONG(manifest_preallocation_size, - rocksdb_db_options.manifest_preallocation_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::manifest_preallocation_size for RocksDB", - nullptr, nullptr, rocksdb_db_options.manifest_preallocation_size, - /* min */ 0L, /* max */ LONG_MAX, 0); - -static MYSQL_SYSVAR_BOOL(use_direct_reads, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.use_direct_reads), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::use_direct_reads for RocksDB", - nullptr, nullptr, rocksdb_db_options.use_direct_reads); - -static MYSQL_SYSVAR_BOOL(use_direct_writes, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.use_direct_writes), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::use_direct_writes for RocksDB", - nullptr, nullptr, rocksdb_db_options.use_direct_writes); - -static MYSQL_SYSVAR_BOOL(allow_mmap_reads, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.allow_mmap_reads), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::allow_mmap_reads for RocksDB", - nullptr, nullptr, rocksdb_db_options.allow_mmap_reads); - -static MYSQL_SYSVAR_BOOL(allow_mmap_writes, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.allow_mmap_writes), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::allow_mmap_writes for RocksDB", - nullptr, nullptr, rocksdb_db_options.allow_mmap_writes); - -static MYSQL_SYSVAR_BOOL(is_fd_close_on_exec, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.is_fd_close_on_exec), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::is_fd_close_on_exec for RocksDB", - nullptr, nullptr, rocksdb_db_options.is_fd_close_on_exec); + rocksdb_db_options.manifest_preallocation_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::manifest_preallocation_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options.manifest_preallocation_size, + /* min */ 0L, /* max */ LONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + use_direct_reads, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_direct_reads), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_direct_reads for RocksDB", nullptr, nullptr, + rocksdb_db_options.use_direct_reads); + +static MYSQL_SYSVAR_BOOL( + use_direct_writes, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_direct_writes), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_direct_writes for RocksDB", nullptr, nullptr, + rocksdb_db_options.use_direct_writes); + +static MYSQL_SYSVAR_BOOL( + allow_mmap_reads, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.allow_mmap_reads), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::allow_mmap_reads for RocksDB", nullptr, nullptr, + rocksdb_db_options.allow_mmap_reads); + +static MYSQL_SYSVAR_BOOL( + allow_mmap_writes, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.allow_mmap_writes), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::allow_mmap_writes for RocksDB", nullptr, nullptr, + rocksdb_db_options.allow_mmap_writes); + +static MYSQL_SYSVAR_BOOL( + is_fd_close_on_exec, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.is_fd_close_on_exec), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::is_fd_close_on_exec for RocksDB", nullptr, nullptr, + rocksdb_db_options.is_fd_close_on_exec); static MYSQL_SYSVAR_UINT(stats_dump_period_sec, - rocksdb_db_options.stats_dump_period_sec, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::stats_dump_period_sec for RocksDB", - nullptr, nullptr, rocksdb_db_options.stats_dump_period_sec, - /* min */ 0, /* max */ INT_MAX, 0); - -static MYSQL_SYSVAR_BOOL(advise_random_on_open, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.advise_random_on_open), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::advise_random_on_open for RocksDB", - nullptr, nullptr, rocksdb_db_options.advise_random_on_open); + rocksdb_db_options.stats_dump_period_sec, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::stats_dump_period_sec for RocksDB", + nullptr, nullptr, + rocksdb_db_options.stats_dump_period_sec, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + advise_random_on_open, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.advise_random_on_open), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::advise_random_on_open for RocksDB", nullptr, nullptr, + rocksdb_db_options.advise_random_on_open); static MYSQL_SYSVAR_ULONG(db_write_buffer_size, - rocksdb_db_options.db_write_buffer_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::db_write_buffer_size for RocksDB", - nullptr, nullptr, rocksdb_db_options.db_write_buffer_size, - /* min */ 0L, /* max */ LONG_MAX, 0); - -static MYSQL_SYSVAR_BOOL(use_adaptive_mutex, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.use_adaptive_mutex), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::use_adaptive_mutex for RocksDB", - nullptr, nullptr, rocksdb_db_options.use_adaptive_mutex); - -static MYSQL_SYSVAR_ULONG(bytes_per_sync, - rocksdb_db_options.bytes_per_sync, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::bytes_per_sync for RocksDB", - nullptr, nullptr, rocksdb_db_options.bytes_per_sync, - /* min */ 0L, /* max */ LONG_MAX, 0); + rocksdb_db_options.db_write_buffer_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::db_write_buffer_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options.db_write_buffer_size, + /* min */ 0L, /* max */ LONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + use_adaptive_mutex, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_adaptive_mutex), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_adaptive_mutex for RocksDB", nullptr, nullptr, + rocksdb_db_options.use_adaptive_mutex); + +static MYSQL_SYSVAR_ULONG(bytes_per_sync, rocksdb_db_options.bytes_per_sync, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::bytes_per_sync for RocksDB", nullptr, + nullptr, rocksdb_db_options.bytes_per_sync, + /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_ULONG(wal_bytes_per_sync, - rocksdb_db_options.wal_bytes_per_sync, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::wal_bytes_per_sync for RocksDB", - nullptr, nullptr, rocksdb_db_options.wal_bytes_per_sync, - /* min */ 0L, /* max */ LONG_MAX, 0); - -static MYSQL_SYSVAR_BOOL(enable_thread_tracking, - *reinterpret_cast<my_bool*>(&rocksdb_db_options.enable_thread_tracking), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::enable_thread_tracking for RocksDB", - nullptr, nullptr, rocksdb_db_options.enable_thread_tracking); + rocksdb_db_options.wal_bytes_per_sync, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::wal_bytes_per_sync for RocksDB", nullptr, + nullptr, rocksdb_db_options.wal_bytes_per_sync, + /* min */ 0L, /* max */ LONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + enable_thread_tracking, + *reinterpret_cast<my_bool *>(&rocksdb_db_options.enable_thread_tracking), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::enable_thread_tracking for RocksDB", nullptr, nullptr, + rocksdb_db_options.enable_thread_tracking); static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "block_cache size for RocksDB", - nullptr, nullptr, - /* default */ RDB_DEFAULT_BLOCK_CACHE_SIZE, - /* min */ RDB_MIN_BLOCK_CACHE_SIZE, - /* max */ LONGLONG_MAX, /* Block size */ RDB_MIN_BLOCK_CACHE_SIZE); - -static MYSQL_SYSVAR_BOOL(cache_index_and_filter_blocks, - *reinterpret_cast<my_bool*>( - &rocksdb_tbl_options.cache_index_and_filter_blocks), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB", - nullptr, nullptr, true); + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "block_cache size for RocksDB", nullptr, nullptr, + /* default */ RDB_DEFAULT_BLOCK_CACHE_SIZE, + /* min */ RDB_MIN_BLOCK_CACHE_SIZE, + /* max */ LONGLONG_MAX, + /* Block size */ RDB_MIN_BLOCK_CACHE_SIZE); + +static MYSQL_SYSVAR_BOOL( + cache_index_and_filter_blocks, + *reinterpret_cast<my_bool *>( + &rocksdb_tbl_options.cache_index_and_filter_blocks), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB", + nullptr, nullptr, true); // When pin_l0_filter_and_index_blocks_in_cache is true, RocksDB will use the // LRU cache, but will always keep the filter & idndex block's handle checked @@ -900,406 +875,402 @@ static MYSQL_SYSVAR_BOOL(cache_index_and_filter_blocks, // This fixes the mutex contention between :ShardedLRUCache::Lookup and // ShardedLRUCache::Release which reduced the QPS ratio (QPS using secondary // index / QPS using PK). -static MYSQL_SYSVAR_BOOL(pin_l0_filter_and_index_blocks_in_cache, - *reinterpret_cast<my_bool*>( - &rocksdb_tbl_options.pin_l0_filter_and_index_blocks_in_cache), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "pin_l0_filter_and_index_blocks_in_cache for RocksDB", - nullptr, nullptr, true); - -static MYSQL_SYSVAR_ENUM(index_type, - rocksdb_index_type, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::index_type for RocksDB", - nullptr, nullptr, - (uint64_t)rocksdb_tbl_options.index_type, &index_type_typelib); - -static MYSQL_SYSVAR_BOOL(hash_index_allow_collision, - *reinterpret_cast<my_bool*>(&rocksdb_tbl_options.hash_index_allow_collision), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::hash_index_allow_collision for RocksDB", - nullptr, nullptr, rocksdb_tbl_options.hash_index_allow_collision); - -static MYSQL_SYSVAR_BOOL(no_block_cache, - *reinterpret_cast<my_bool*>(&rocksdb_tbl_options.no_block_cache), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::no_block_cache for RocksDB", - nullptr, nullptr, rocksdb_tbl_options.no_block_cache); - -static MYSQL_SYSVAR_ULONG(block_size, - rocksdb_tbl_options.block_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::block_size for RocksDB", - nullptr, nullptr, rocksdb_tbl_options.block_size, - /* min */ 1L, /* max */ LONG_MAX, 0); - -static MYSQL_SYSVAR_INT(block_size_deviation, - rocksdb_tbl_options.block_size_deviation, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::block_size_deviation for RocksDB", - nullptr, nullptr, rocksdb_tbl_options.block_size_deviation, - /* min */ 0, /* max */ INT_MAX, 0); - -static MYSQL_SYSVAR_INT(block_restart_interval, - rocksdb_tbl_options.block_restart_interval, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::block_restart_interval for RocksDB", - nullptr, nullptr, rocksdb_tbl_options.block_restart_interval, - /* min */ 1, /* max */ INT_MAX, 0); - -static MYSQL_SYSVAR_BOOL(whole_key_filtering, - *reinterpret_cast<my_bool*>(&rocksdb_tbl_options.whole_key_filtering), - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "BlockBasedTableOptions::whole_key_filtering for RocksDB", - nullptr, nullptr, rocksdb_tbl_options.whole_key_filtering); +static MYSQL_SYSVAR_BOOL( + pin_l0_filter_and_index_blocks_in_cache, + *reinterpret_cast<my_bool *>( + &rocksdb_tbl_options.pin_l0_filter_and_index_blocks_in_cache), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "pin_l0_filter_and_index_blocks_in_cache for RocksDB", nullptr, nullptr, + true); + +static MYSQL_SYSVAR_ENUM(index_type, rocksdb_index_type, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::index_type for RocksDB", + nullptr, nullptr, + (uint64_t)rocksdb_tbl_options.index_type, + &index_type_typelib); + +static MYSQL_SYSVAR_BOOL( + hash_index_allow_collision, + *reinterpret_cast<my_bool *>( + &rocksdb_tbl_options.hash_index_allow_collision), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::hash_index_allow_collision for RocksDB", nullptr, + nullptr, rocksdb_tbl_options.hash_index_allow_collision); + +static MYSQL_SYSVAR_BOOL( + no_block_cache, + *reinterpret_cast<my_bool *>(&rocksdb_tbl_options.no_block_cache), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::no_block_cache for RocksDB", nullptr, nullptr, + rocksdb_tbl_options.no_block_cache); + +static MYSQL_SYSVAR_ULONG(block_size, rocksdb_tbl_options.block_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::block_size for RocksDB", + nullptr, nullptr, rocksdb_tbl_options.block_size, + /* min */ 1L, /* max */ LONG_MAX, 0); + +static MYSQL_SYSVAR_INT( + block_size_deviation, rocksdb_tbl_options.block_size_deviation, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::block_size_deviation for RocksDB", nullptr, + nullptr, rocksdb_tbl_options.block_size_deviation, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_INT( + block_restart_interval, rocksdb_tbl_options.block_restart_interval, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::block_restart_interval for RocksDB", nullptr, + nullptr, rocksdb_tbl_options.block_restart_interval, + /* min */ 1, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + whole_key_filtering, + *reinterpret_cast<my_bool *>(&rocksdb_tbl_options.whole_key_filtering), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::whole_key_filtering for RocksDB", nullptr, nullptr, + rocksdb_tbl_options.whole_key_filtering); static MYSQL_SYSVAR_STR(default_cf_options, rocksdb_default_cf_options, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "default cf options for RocksDB", - nullptr, nullptr, ""); + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "default cf options for RocksDB", nullptr, nullptr, ""); static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "option overrides per cf for RocksDB", - nullptr, nullptr, ""); - -static MYSQL_SYSVAR_BOOL(background_sync, - rocksdb_background_sync, - PLUGIN_VAR_RQCMDARG, - "turns on background syncs for RocksDB", - nullptr, nullptr, FALSE); - -static MYSQL_THDVAR_BOOL(write_sync, - PLUGIN_VAR_RQCMDARG, - "WriteOptions::sync for RocksDB", - nullptr, nullptr, rocksdb::WriteOptions().sync); - -static MYSQL_THDVAR_BOOL(write_disable_wal, - PLUGIN_VAR_RQCMDARG, - "WriteOptions::disableWAL for RocksDB", - nullptr, nullptr, rocksdb::WriteOptions().disableWAL); - -static MYSQL_THDVAR_BOOL(write_ignore_missing_column_families, - PLUGIN_VAR_RQCMDARG, - "WriteOptions::ignore_missing_column_families for RocksDB", - nullptr, nullptr, rocksdb::WriteOptions().ignore_missing_column_families); - -static MYSQL_THDVAR_BOOL(skip_fill_cache, - PLUGIN_VAR_RQCMDARG, - "Skip filling block cache on read requests", - nullptr, nullptr, FALSE); - -static MYSQL_THDVAR_BOOL(unsafe_for_binlog, - PLUGIN_VAR_RQCMDARG, - "Allowing statement based binary logging which may break consistency", - nullptr, nullptr, FALSE); - -static MYSQL_THDVAR_UINT(records_in_range, - PLUGIN_VAR_RQCMDARG, - "Used to override the result of records_in_range(). Set to a positive number to override", - nullptr, nullptr, 0, - /* min */ 0, /* max */ INT_MAX, 0); - -static MYSQL_THDVAR_UINT(force_index_records_in_range, - PLUGIN_VAR_RQCMDARG, - "Used to override the result of records_in_range() when FORCE INDEX is used.", - nullptr, nullptr, 0, - /* min */ 0, /* max */ INT_MAX, 0); - -static MYSQL_SYSVAR_UINT(debug_optimizer_n_rows, - rocksdb_debug_optimizer_n_rows, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR, - "Test only to override rocksdb estimates of table size in a memtable", - nullptr, nullptr, 0, /* min */ 0, /* max */ INT_MAX, 0); - -static MYSQL_SYSVAR_BOOL(debug_optimizer_no_zero_cardinality, - rocksdb_debug_optimizer_no_zero_cardinality, - PLUGIN_VAR_RQCMDARG, - "In case if cardinality is zero, overrides it with some value", - nullptr, nullptr, TRUE); + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "option overrides per cf for RocksDB", nullptr, nullptr, + ""); + +static MYSQL_SYSVAR_BOOL(background_sync, rocksdb_background_sync, + PLUGIN_VAR_RQCMDARG, + "turns on background syncs for RocksDB", nullptr, + nullptr, FALSE); + +static MYSQL_THDVAR_BOOL(write_sync, PLUGIN_VAR_RQCMDARG, + "WriteOptions::sync for RocksDB", nullptr, nullptr, + rocksdb::WriteOptions().sync); + +static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG, + "WriteOptions::disableWAL for RocksDB", nullptr, + nullptr, rocksdb::WriteOptions().disableWAL); + +static MYSQL_THDVAR_BOOL( + write_ignore_missing_column_families, PLUGIN_VAR_RQCMDARG, + "WriteOptions::ignore_missing_column_families for RocksDB", nullptr, + nullptr, rocksdb::WriteOptions().ignore_missing_column_families); + +static MYSQL_THDVAR_BOOL(skip_fill_cache, PLUGIN_VAR_RQCMDARG, + "Skip filling block cache on read requests", nullptr, + nullptr, FALSE); + +static MYSQL_THDVAR_BOOL( + unsafe_for_binlog, PLUGIN_VAR_RQCMDARG, + "Allowing statement based binary logging which may break consistency", + nullptr, nullptr, FALSE); + +static MYSQL_THDVAR_UINT(records_in_range, PLUGIN_VAR_RQCMDARG, + "Used to override the result of records_in_range(). " + "Set to a positive number to override", + nullptr, nullptr, 0, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_THDVAR_UINT(force_index_records_in_range, PLUGIN_VAR_RQCMDARG, + "Used to override the result of records_in_range() " + "when FORCE INDEX is used.", + nullptr, nullptr, 0, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_UINT( + debug_optimizer_n_rows, rocksdb_debug_optimizer_n_rows, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR, + "Test only to override rocksdb estimates of table size in a memtable", + nullptr, nullptr, 0, /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + debug_optimizer_no_zero_cardinality, + rocksdb_debug_optimizer_no_zero_cardinality, PLUGIN_VAR_RQCMDARG, + "In case if cardinality is zero, overrides it with some value", nullptr, + nullptr, TRUE); static MYSQL_SYSVAR_STR(compact_cf, rocksdb_compact_cf_name, - PLUGIN_VAR_RQCMDARG, - "Compact column family", - rocksdb_compact_column_family, rocksdb_compact_column_family_stub, ""); + PLUGIN_VAR_RQCMDARG, "Compact column family", + rocksdb_compact_column_family, + rocksdb_compact_column_family_stub, ""); static MYSQL_SYSVAR_STR(create_checkpoint, rocksdb_checkpoint_name, - PLUGIN_VAR_RQCMDARG, - "Checkpoint directory", - rocksdb_create_checkpoint, rocksdb_create_checkpoint_stub, ""); + PLUGIN_VAR_RQCMDARG, "Checkpoint directory", + rocksdb_create_checkpoint, + rocksdb_create_checkpoint_stub, ""); static MYSQL_SYSVAR_BOOL(signal_drop_index_thread, - rocksdb_signal_drop_index_thread, - PLUGIN_VAR_RQCMDARG, - "Wake up drop index thread", - nullptr, rocksdb_drop_index_wakeup_thread, FALSE); - -static MYSQL_SYSVAR_BOOL(pause_background_work, - rocksdb_pause_background_work, - PLUGIN_VAR_RQCMDARG, - "Disable all rocksdb background operations", - nullptr, rocksdb_set_pause_background_work, FALSE); - -static MYSQL_SYSVAR_BOOL(disable_2pc, - rocksdb_disable_2pc, - PLUGIN_VAR_RQCMDARG, - "Disable two phase commit for MyRocks", - nullptr, nullptr, TRUE); - -static MYSQL_SYSVAR_BOOL(strict_collation_check, - rocksdb_strict_collation_check, - PLUGIN_VAR_RQCMDARG, - "Enforce case sensitive collation for MyRocks indexes", - nullptr, nullptr, TRUE); + rocksdb_signal_drop_index_thread, PLUGIN_VAR_RQCMDARG, + "Wake up drop index thread", nullptr, + rocksdb_drop_index_wakeup_thread, FALSE); + +static MYSQL_SYSVAR_BOOL(pause_background_work, rocksdb_pause_background_work, + PLUGIN_VAR_RQCMDARG, + "Disable all rocksdb background operations", nullptr, + rocksdb_set_pause_background_work, FALSE); + +static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG, + "Enable two phase commit for MyRocks", nullptr, + nullptr, TRUE); + +static MYSQL_SYSVAR_BOOL(strict_collation_check, rocksdb_strict_collation_check, + PLUGIN_VAR_RQCMDARG, + "Enforce case sensitive collation for MyRocks indexes", + nullptr, nullptr, TRUE); static MYSQL_SYSVAR_STR(strict_collation_exceptions, - rocksdb_strict_collation_exceptions, - PLUGIN_VAR_RQCMDARG|PLUGIN_VAR_MEMALLOC, - "List of tables (using regex) that are excluded " - "from the case sensitive collation enforcement", - nullptr, rocksdb_set_collation_exception_list, ""); - -static MYSQL_SYSVAR_BOOL(collect_sst_properties, - rocksdb_collect_sst_properties, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Enables collecting SST file properties on each flush", - nullptr, nullptr, rocksdb_collect_sst_properties); + rocksdb_strict_collation_exceptions, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "List of tables (using regex) that are excluded " + "from the case sensitive collation enforcement", + nullptr, rocksdb_set_collation_exception_list, ""); + +static MYSQL_SYSVAR_BOOL(collect_sst_properties, rocksdb_collect_sst_properties, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Enables collecting SST file properties on each flush", + nullptr, nullptr, rocksdb_collect_sst_properties); static MYSQL_SYSVAR_BOOL( - force_flush_memtable_now, - rocksdb_force_flush_memtable_now_var, - PLUGIN_VAR_RQCMDARG, - "Forces memstore flush which may block all write requests so be careful", - rocksdb_force_flush_memtable_now, - rocksdb_force_flush_memtable_now_stub, FALSE); + force_flush_memtable_now, rocksdb_force_flush_memtable_now_var, + PLUGIN_VAR_RQCMDARG, + "Forces memstore flush which may block all write requests so be careful", + rocksdb_force_flush_memtable_now, rocksdb_force_flush_memtable_now_stub, + FALSE); static MYSQL_THDVAR_BOOL( - flush_memtable_on_analyze, - PLUGIN_VAR_RQCMDARG, - "Forces memtable flush on ANALZYE table to get accurate cardinality", - nullptr, nullptr, true); - -static MYSQL_SYSVAR_UINT(seconds_between_stat_computes, - rocksdb_seconds_between_stat_computes, - PLUGIN_VAR_RQCMDARG, - "Sets a number of seconds to wait between optimizer stats recomputation. " - "Only changed indexes will be refreshed.", - nullptr, nullptr, rocksdb_seconds_between_stat_computes, - /* min */ 0L, /* max */ UINT_MAX, 0); + flush_memtable_on_analyze, PLUGIN_VAR_RQCMDARG, + "Forces memtable flush on ANALZYE table to get accurate cardinality", + nullptr, nullptr, true); -static MYSQL_SYSVAR_LONGLONG( - compaction_sequential_deletes, - rocksdb_compaction_sequential_deletes, - PLUGIN_VAR_RQCMDARG, - "RocksDB will trigger compaction for the file if it has more than this number sequential deletes per window", - nullptr, rocksdb_set_compaction_options, - DEFAULT_COMPACTION_SEQUENTIAL_DELETES, - /* min */ 0L, /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES, 0); +static MYSQL_SYSVAR_UINT( + seconds_between_stat_computes, rocksdb_seconds_between_stat_computes, + PLUGIN_VAR_RQCMDARG, + "Sets a number of seconds to wait between optimizer stats recomputation. " + "Only changed indexes will be refreshed.", + nullptr, nullptr, rocksdb_seconds_between_stat_computes, + /* min */ 0L, /* max */ UINT_MAX, 0); + +static MYSQL_SYSVAR_LONGLONG(compaction_sequential_deletes, + rocksdb_compaction_sequential_deletes, + PLUGIN_VAR_RQCMDARG, + "RocksDB will trigger compaction for the file if " + "it has more than this number sequential deletes " + "per window", + nullptr, rocksdb_set_compaction_options, + DEFAULT_COMPACTION_SEQUENTIAL_DELETES, + /* min */ 0L, + /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES, 0); static MYSQL_SYSVAR_LONGLONG( - compaction_sequential_deletes_window, - rocksdb_compaction_sequential_deletes_window, - PLUGIN_VAR_RQCMDARG, - "Size of the window for counting rocksdb_compaction_sequential_deletes", - nullptr, rocksdb_set_compaction_options, - DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW, - /* min */ 0L, /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW, 0); + compaction_sequential_deletes_window, + rocksdb_compaction_sequential_deletes_window, PLUGIN_VAR_RQCMDARG, + "Size of the window for counting rocksdb_compaction_sequential_deletes", + nullptr, rocksdb_set_compaction_options, + DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW, + /* min */ 0L, /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW, 0); static MYSQL_SYSVAR_LONGLONG( - compaction_sequential_deletes_file_size, - rocksdb_compaction_sequential_deletes_file_size, - PLUGIN_VAR_RQCMDARG, - "Minimum file size required for compaction_sequential_deletes", - nullptr, rocksdb_set_compaction_options, 0L, - /* min */ -1L, /* max */ LONGLONG_MAX, 0); - -static MYSQL_SYSVAR_BOOL(compaction_sequential_deletes_count_sd, - rocksdb_compaction_sequential_deletes_count_sd, - PLUGIN_VAR_RQCMDARG, - "Counting SingleDelete as rocksdb_compaction_sequential_deletes", - nullptr, nullptr, rocksdb_compaction_sequential_deletes_count_sd); - -static MYSQL_SYSVAR_BOOL(print_snapshot_conflict_queries, - rocksdb_print_snapshot_conflict_queries, - PLUGIN_VAR_RQCMDARG, - "Logging queries that got snapshot conflict errors into *.err log", - nullptr, nullptr, rocksdb_print_snapshot_conflict_queries); - -static MYSQL_THDVAR_INT(checksums_pct, - PLUGIN_VAR_RQCMDARG, - "How many percentages of rows to be checksummed", - nullptr, nullptr, RDB_MAX_CHECKSUMS_PCT, - /* min */ 0, /* max */ RDB_MAX_CHECKSUMS_PCT, 0); - -static MYSQL_THDVAR_BOOL(store_row_debug_checksums, - PLUGIN_VAR_RQCMDARG, - "Include checksums when writing index/table records", - nullptr, nullptr, false /* default value */); - -static MYSQL_THDVAR_BOOL(verify_row_debug_checksums, - PLUGIN_VAR_RQCMDARG, - "Verify checksums when reading index/table records", - nullptr, nullptr, false /* default value */); - -static MYSQL_SYSVAR_UINT(validate_tables, - rocksdb_validate_tables, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Verify all .frm files match all RocksDB tables (0 means no verification, " - "1 means verify and fail on error, and 2 means verify but continue", - nullptr, nullptr, 1 /* default value */, 0 /* min value */, - 2 /* max value */, 0); - -static MYSQL_SYSVAR_STR(datadir, - rocksdb_datadir, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "RocksDB data directory", - nullptr, nullptr, "./.rocksdb"); + compaction_sequential_deletes_file_size, + rocksdb_compaction_sequential_deletes_file_size, PLUGIN_VAR_RQCMDARG, + "Minimum file size required for compaction_sequential_deletes", nullptr, + rocksdb_set_compaction_options, 0L, + /* min */ -1L, /* max */ LONGLONG_MAX, 0); -static MYSQL_SYSVAR_UINT( - table_stats_sampling_pct, - rocksdb_table_stats_sampling_pct, - PLUGIN_VAR_RQCMDARG, - "Percentage of entries to sample when collecting statistics about table " - "properties. Specify either 0 to sample everything or percentage [" - STRINGIFY_ARG(RDB_TBL_STATS_SAMPLE_PCT_MIN) ".." - STRINGIFY_ARG(RDB_TBL_STATS_SAMPLE_PCT_MAX) "]. " "By default " - STRINGIFY_ARG(RDB_DEFAULT_TBL_STATS_SAMPLE_PCT) "% of entries are " - "sampled.", - nullptr, rocksdb_set_table_stats_sampling_pct, /* default */ - RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0, - /* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0); - -static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE= 100; - -static struct st_mysql_sys_var* rocksdb_system_variables[]= { - MYSQL_SYSVAR(lock_wait_timeout), - MYSQL_SYSVAR(deadlock_detect), - MYSQL_SYSVAR(max_row_locks), - MYSQL_SYSVAR(lock_scanned_rows), - MYSQL_SYSVAR(bulk_load), - MYSQL_SYSVAR(skip_unique_check_tables), - MYSQL_SYSVAR(trace_sst_api), - MYSQL_SYSVAR(skip_unique_check), - MYSQL_SYSVAR(commit_in_the_middle), - MYSQL_SYSVAR(read_free_rpl_tables), - MYSQL_SYSVAR(rpl_skip_tx_api), - MYSQL_SYSVAR(bulk_load_size), - MYSQL_SYSVAR(merge_buf_size), - MYSQL_SYSVAR(enable_bulk_load_api), - MYSQL_SYSVAR(tmpdir), - MYSQL_SYSVAR(merge_combine_read_size), - MYSQL_SYSVAR(skip_bloom_filter_on_read), - - MYSQL_SYSVAR(create_if_missing), - MYSQL_SYSVAR(create_missing_column_families), - MYSQL_SYSVAR(error_if_exists), - MYSQL_SYSVAR(paranoid_checks), - MYSQL_SYSVAR(rate_limiter_bytes_per_sec), - MYSQL_SYSVAR(info_log_level), - MYSQL_SYSVAR(max_open_files), - MYSQL_SYSVAR(max_total_wal_size), - MYSQL_SYSVAR(disabledatasync), - MYSQL_SYSVAR(use_fsync), - MYSQL_SYSVAR(wal_dir), - MYSQL_SYSVAR(delete_obsolete_files_period_micros), - MYSQL_SYSVAR(base_background_compactions), - MYSQL_SYSVAR(max_background_compactions), - MYSQL_SYSVAR(max_background_flushes), - MYSQL_SYSVAR(max_log_file_size), - MYSQL_SYSVAR(max_subcompactions), - MYSQL_SYSVAR(log_file_time_to_roll), - MYSQL_SYSVAR(keep_log_file_num), - MYSQL_SYSVAR(max_manifest_file_size), - MYSQL_SYSVAR(table_cache_numshardbits), - MYSQL_SYSVAR(wal_ttl_seconds), - MYSQL_SYSVAR(wal_size_limit_mb), - MYSQL_SYSVAR(manifest_preallocation_size), - MYSQL_SYSVAR(use_direct_reads), - MYSQL_SYSVAR(use_direct_writes), - MYSQL_SYSVAR(allow_mmap_reads), - MYSQL_SYSVAR(allow_mmap_writes), - MYSQL_SYSVAR(is_fd_close_on_exec), - MYSQL_SYSVAR(stats_dump_period_sec), - MYSQL_SYSVAR(advise_random_on_open), - MYSQL_SYSVAR(db_write_buffer_size), - MYSQL_SYSVAR(use_adaptive_mutex), - MYSQL_SYSVAR(bytes_per_sync), - MYSQL_SYSVAR(wal_bytes_per_sync), - MYSQL_SYSVAR(enable_thread_tracking), - MYSQL_SYSVAR(perf_context_level), - MYSQL_SYSVAR(wal_recovery_mode), - MYSQL_SYSVAR(access_hint_on_compaction_start), - MYSQL_SYSVAR(new_table_reader_for_compaction_inputs), - MYSQL_SYSVAR(compaction_readahead_size), - MYSQL_SYSVAR(allow_concurrent_memtable_write), - MYSQL_SYSVAR(enable_write_thread_adaptive_yield), - - MYSQL_SYSVAR(block_cache_size), - MYSQL_SYSVAR(cache_index_and_filter_blocks), - MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache), - MYSQL_SYSVAR(index_type), - MYSQL_SYSVAR(hash_index_allow_collision), - MYSQL_SYSVAR(no_block_cache), - MYSQL_SYSVAR(block_size), - MYSQL_SYSVAR(block_size_deviation), - MYSQL_SYSVAR(block_restart_interval), - MYSQL_SYSVAR(whole_key_filtering), - - MYSQL_SYSVAR(default_cf_options), - MYSQL_SYSVAR(override_cf_options), - - MYSQL_SYSVAR(background_sync), - - MYSQL_SYSVAR(write_sync), - MYSQL_SYSVAR(write_disable_wal), - MYSQL_SYSVAR(write_ignore_missing_column_families), - - MYSQL_SYSVAR(skip_fill_cache), - MYSQL_SYSVAR(unsafe_for_binlog), - - MYSQL_SYSVAR(records_in_range), - MYSQL_SYSVAR(force_index_records_in_range), - MYSQL_SYSVAR(debug_optimizer_n_rows), - MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality), - - MYSQL_SYSVAR(compact_cf), - MYSQL_SYSVAR(signal_drop_index_thread), - MYSQL_SYSVAR(pause_background_work), - MYSQL_SYSVAR(disable_2pc), - MYSQL_SYSVAR(strict_collation_check), - MYSQL_SYSVAR(strict_collation_exceptions), - MYSQL_SYSVAR(collect_sst_properties), - MYSQL_SYSVAR(force_flush_memtable_now), - MYSQL_SYSVAR(flush_memtable_on_analyze), - MYSQL_SYSVAR(seconds_between_stat_computes), - - MYSQL_SYSVAR(compaction_sequential_deletes), - MYSQL_SYSVAR(compaction_sequential_deletes_window), - MYSQL_SYSVAR(compaction_sequential_deletes_file_size), - MYSQL_SYSVAR(compaction_sequential_deletes_count_sd), - MYSQL_SYSVAR(print_snapshot_conflict_queries), - - MYSQL_SYSVAR(datadir), - MYSQL_SYSVAR(create_checkpoint), - - MYSQL_SYSVAR(checksums_pct), - MYSQL_SYSVAR(store_row_debug_checksums), - MYSQL_SYSVAR(verify_row_debug_checksums), - - MYSQL_SYSVAR(validate_tables), - MYSQL_SYSVAR(table_stats_sampling_pct), - nullptr -}; +static MYSQL_SYSVAR_BOOL( + compaction_sequential_deletes_count_sd, + rocksdb_compaction_sequential_deletes_count_sd, PLUGIN_VAR_RQCMDARG, + "Counting SingleDelete as rocksdb_compaction_sequential_deletes", nullptr, + nullptr, rocksdb_compaction_sequential_deletes_count_sd); +static MYSQL_SYSVAR_BOOL( + print_snapshot_conflict_queries, rocksdb_print_snapshot_conflict_queries, + PLUGIN_VAR_RQCMDARG, + "Logging queries that got snapshot conflict errors into *.err log", nullptr, + nullptr, rocksdb_print_snapshot_conflict_queries); -static rocksdb::WriteOptions rdb_get_rocksdb_write_options( - my_core::THD* const thd) -{ +static MYSQL_THDVAR_INT(checksums_pct, PLUGIN_VAR_RQCMDARG, + "How many percentages of rows to be checksummed", + nullptr, nullptr, RDB_MAX_CHECKSUMS_PCT, + /* min */ 0, /* max */ RDB_MAX_CHECKSUMS_PCT, 0); + +static MYSQL_THDVAR_BOOL(store_row_debug_checksums, PLUGIN_VAR_RQCMDARG, + "Include checksums when writing index/table records", + nullptr, nullptr, false /* default value */); + +static MYSQL_THDVAR_BOOL(verify_row_debug_checksums, PLUGIN_VAR_RQCMDARG, + "Verify checksums when reading index/table records", + nullptr, nullptr, false /* default value */); + +static MYSQL_SYSVAR_UINT( + validate_tables, rocksdb_validate_tables, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Verify all .frm files match all RocksDB tables (0 means no verification, " + "1 means verify and fail on error, and 2 means verify but continue", + nullptr, nullptr, 1 /* default value */, 0 /* min value */, + 2 /* max value */, 0); + +static MYSQL_SYSVAR_STR(datadir, rocksdb_datadir, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "RocksDB data directory", nullptr, nullptr, + "./.rocksdb"); + +static MYSQL_SYSVAR_UINT( + table_stats_sampling_pct, rocksdb_table_stats_sampling_pct, + PLUGIN_VAR_RQCMDARG, + "Percentage of entries to sample when collecting statistics about table " + "properties. Specify either 0 to sample everything or percentage " + "[" STRINGIFY_ARG(RDB_TBL_STATS_SAMPLE_PCT_MIN) ".." STRINGIFY_ARG( + RDB_TBL_STATS_SAMPLE_PCT_MAX) "]. " + "By default " STRINGIFY_ARG( + RDB_DEFAULT_TBL_STATS_SAMPLE_PCT) "% " + "of" + " e" + "nt" + "ri" + "es" + " a" + "re" + " " + "sa" + "mp" + "le" + "d" + ".", + nullptr, rocksdb_set_table_stats_sampling_pct, /* default */ + RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0, + /* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0); + +static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; + +static struct st_mysql_sys_var *rocksdb_system_variables[] = { + MYSQL_SYSVAR(lock_wait_timeout), + MYSQL_SYSVAR(deadlock_detect), + MYSQL_SYSVAR(max_row_locks), + MYSQL_SYSVAR(lock_scanned_rows), + MYSQL_SYSVAR(bulk_load), + MYSQL_SYSVAR(skip_unique_check_tables), + MYSQL_SYSVAR(trace_sst_api), + MYSQL_SYSVAR(commit_in_the_middle), + MYSQL_SYSVAR(read_free_rpl_tables), + MYSQL_SYSVAR(bulk_load_size), + MYSQL_SYSVAR(merge_buf_size), + MYSQL_SYSVAR(enable_bulk_load_api), + MYSQL_SYSVAR(tmpdir), + MYSQL_SYSVAR(merge_combine_read_size), + MYSQL_SYSVAR(skip_bloom_filter_on_read), + + MYSQL_SYSVAR(create_if_missing), + MYSQL_SYSVAR(create_missing_column_families), + MYSQL_SYSVAR(error_if_exists), + MYSQL_SYSVAR(paranoid_checks), + MYSQL_SYSVAR(rate_limiter_bytes_per_sec), + MYSQL_SYSVAR(info_log_level), + MYSQL_SYSVAR(max_open_files), + MYSQL_SYSVAR(max_total_wal_size), + MYSQL_SYSVAR(disabledatasync), + MYSQL_SYSVAR(use_fsync), + MYSQL_SYSVAR(wal_dir), + MYSQL_SYSVAR(persistent_cache_path), + MYSQL_SYSVAR(persistent_cache_size), + MYSQL_SYSVAR(delete_obsolete_files_period_micros), + MYSQL_SYSVAR(base_background_compactions), + MYSQL_SYSVAR(max_background_compactions), + MYSQL_SYSVAR(max_background_flushes), + MYSQL_SYSVAR(max_log_file_size), + MYSQL_SYSVAR(max_subcompactions), + MYSQL_SYSVAR(log_file_time_to_roll), + MYSQL_SYSVAR(keep_log_file_num), + MYSQL_SYSVAR(max_manifest_file_size), + MYSQL_SYSVAR(table_cache_numshardbits), + MYSQL_SYSVAR(wal_ttl_seconds), + MYSQL_SYSVAR(wal_size_limit_mb), + MYSQL_SYSVAR(manifest_preallocation_size), + MYSQL_SYSVAR(use_direct_reads), + MYSQL_SYSVAR(use_direct_writes), + MYSQL_SYSVAR(allow_mmap_reads), + MYSQL_SYSVAR(allow_mmap_writes), + MYSQL_SYSVAR(is_fd_close_on_exec), + MYSQL_SYSVAR(stats_dump_period_sec), + MYSQL_SYSVAR(advise_random_on_open), + MYSQL_SYSVAR(db_write_buffer_size), + MYSQL_SYSVAR(use_adaptive_mutex), + MYSQL_SYSVAR(bytes_per_sync), + MYSQL_SYSVAR(wal_bytes_per_sync), + MYSQL_SYSVAR(enable_thread_tracking), + MYSQL_SYSVAR(perf_context_level), + MYSQL_SYSVAR(wal_recovery_mode), + MYSQL_SYSVAR(access_hint_on_compaction_start), + MYSQL_SYSVAR(new_table_reader_for_compaction_inputs), + MYSQL_SYSVAR(compaction_readahead_size), + MYSQL_SYSVAR(allow_concurrent_memtable_write), + MYSQL_SYSVAR(enable_write_thread_adaptive_yield), + + MYSQL_SYSVAR(block_cache_size), + MYSQL_SYSVAR(cache_index_and_filter_blocks), + MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache), + MYSQL_SYSVAR(index_type), + MYSQL_SYSVAR(hash_index_allow_collision), + MYSQL_SYSVAR(no_block_cache), + MYSQL_SYSVAR(block_size), + MYSQL_SYSVAR(block_size_deviation), + MYSQL_SYSVAR(block_restart_interval), + MYSQL_SYSVAR(whole_key_filtering), + + MYSQL_SYSVAR(default_cf_options), + MYSQL_SYSVAR(override_cf_options), + + MYSQL_SYSVAR(background_sync), + + MYSQL_SYSVAR(write_sync), + MYSQL_SYSVAR(write_disable_wal), + MYSQL_SYSVAR(write_ignore_missing_column_families), + + MYSQL_SYSVAR(skip_fill_cache), + MYSQL_SYSVAR(unsafe_for_binlog), + + MYSQL_SYSVAR(records_in_range), + MYSQL_SYSVAR(force_index_records_in_range), + MYSQL_SYSVAR(debug_optimizer_n_rows), + MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality), + + MYSQL_SYSVAR(compact_cf), + MYSQL_SYSVAR(signal_drop_index_thread), + MYSQL_SYSVAR(pause_background_work), + MYSQL_SYSVAR(enable_2pc), + MYSQL_SYSVAR(strict_collation_check), + MYSQL_SYSVAR(strict_collation_exceptions), + MYSQL_SYSVAR(collect_sst_properties), + MYSQL_SYSVAR(force_flush_memtable_now), + MYSQL_SYSVAR(flush_memtable_on_analyze), + MYSQL_SYSVAR(seconds_between_stat_computes), + + MYSQL_SYSVAR(compaction_sequential_deletes), + MYSQL_SYSVAR(compaction_sequential_deletes_window), + MYSQL_SYSVAR(compaction_sequential_deletes_file_size), + MYSQL_SYSVAR(compaction_sequential_deletes_count_sd), + MYSQL_SYSVAR(print_snapshot_conflict_queries), + + MYSQL_SYSVAR(datadir), + MYSQL_SYSVAR(create_checkpoint), + + MYSQL_SYSVAR(checksums_pct), + MYSQL_SYSVAR(store_row_debug_checksums), + MYSQL_SYSVAR(verify_row_debug_checksums), + + MYSQL_SYSVAR(validate_tables), + MYSQL_SYSVAR(table_stats_sampling_pct), + nullptr}; + +static rocksdb::WriteOptions +rdb_get_rocksdb_write_options(my_core::THD *const thd) { rocksdb::WriteOptions opt; - opt.sync= THDVAR(thd, write_sync); - opt.disableWAL= THDVAR(thd, write_disable_wal); - opt.ignore_missing_column_families= + opt.sync = THDVAR(thd, write_sync); + opt.disableWAL = THDVAR(thd, write_disable_wal); + opt.ignore_missing_column_families = THDVAR(thd, write_ignore_missing_column_families); return opt; @@ -1312,104 +1283,93 @@ static rocksdb::WriteOptions rdb_get_rocksdb_write_options( Function we use in the creation of our hash to get key. */ -uchar* Rdb_open_tables_map::get_hash_key( - Rdb_table_handler* const table_handler, size_t* const length, - my_bool not_used __attribute__((__unused__))) -{ - *length= table_handler->m_table_name_length; - return reinterpret_cast<uchar*>(table_handler->m_table_name); +uchar * +Rdb_open_tables_map::get_hash_key(Rdb_table_handler *const table_handler, + size_t *const length, + my_bool not_used MY_ATTRIBUTE((__unused__))) { + *length = table_handler->m_table_name_length; + return reinterpret_cast<uchar *>(table_handler->m_table_name); } - /* The following is needed as an argument for mysql_stage_register, irrespectively of whether we're compiling with P_S or not. */ -PSI_stage_info stage_waiting_on_row_lock= { 0, "Waiting for row lock", 0}; +PSI_stage_info stage_waiting_on_row_lock = {0, "Waiting for row lock", 0}; #ifdef HAVE_PSI_INTERFACE static PSI_thread_key rdb_background_psi_thread_key; static PSI_thread_key rdb_drop_idx_psi_thread_key; -static PSI_stage_info *all_rocksdb_stages[]= -{ - & stage_waiting_on_row_lock -}; - +static PSI_stage_info *all_rocksdb_stages[] = {&stage_waiting_on_row_lock}; static my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, - rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key, - rdb_collation_data_mutex_key, - rdb_mem_cmp_space_mutex_key, - key_mutex_tx_list, rdb_sysvars_psi_mutex_key; - -static PSI_mutex_info all_rocksdb_mutexes[]= -{ - { &rdb_psi_open_tbls_mutex_key, "open tables", PSI_FLAG_GLOBAL}, - { &rdb_signal_bg_psi_mutex_key, "stop background", PSI_FLAG_GLOBAL}, - { &rdb_signal_drop_idx_psi_mutex_key, "signal drop index", PSI_FLAG_GLOBAL}, - { &rdb_collation_data_mutex_key, "collation data init", PSI_FLAG_GLOBAL}, - { &rdb_mem_cmp_space_mutex_key, "collation space char data init", - PSI_FLAG_GLOBAL}, - { &key_mutex_tx_list, "tx_list", PSI_FLAG_GLOBAL}, - { &rdb_sysvars_psi_mutex_key, "setting sysvar", PSI_FLAG_GLOBAL}, + rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key, + rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key, + key_mutex_tx_list, rdb_sysvars_psi_mutex_key; + +static PSI_mutex_info all_rocksdb_mutexes[] = { + {&rdb_psi_open_tbls_mutex_key, "open tables", PSI_FLAG_GLOBAL}, + {&rdb_signal_bg_psi_mutex_key, "stop background", PSI_FLAG_GLOBAL}, + {&rdb_signal_drop_idx_psi_mutex_key, "signal drop index", PSI_FLAG_GLOBAL}, + {&rdb_collation_data_mutex_key, "collation data init", PSI_FLAG_GLOBAL}, + {&rdb_mem_cmp_space_mutex_key, "collation space char data init", + PSI_FLAG_GLOBAL}, + {&key_mutex_tx_list, "tx_list", PSI_FLAG_GLOBAL}, + {&rdb_sysvars_psi_mutex_key, "setting sysvar", PSI_FLAG_GLOBAL}, }; static PSI_rwlock_key key_rwlock_collation_exception_list; static PSI_rwlock_key key_rwlock_read_free_rpl_tables; static PSI_rwlock_key key_rwlock_skip_unique_check_tables; -static PSI_rwlock_info all_rocksdb_rwlocks[]= -{ - { &key_rwlock_collation_exception_list, "collation_exception_list", - PSI_FLAG_GLOBAL}, - { &key_rwlock_read_free_rpl_tables, "read_free_rpl_tables", PSI_FLAG_GLOBAL}, - { &key_rwlock_skip_unique_check_tables, "skip_unique_check_tables", - PSI_FLAG_GLOBAL}, +static PSI_rwlock_info all_rocksdb_rwlocks[] = { + {&key_rwlock_collation_exception_list, "collation_exception_list", + PSI_FLAG_GLOBAL}, + {&key_rwlock_read_free_rpl_tables, "read_free_rpl_tables", PSI_FLAG_GLOBAL}, + {&key_rwlock_skip_unique_check_tables, "skip_unique_check_tables", + PSI_FLAG_GLOBAL}, }; PSI_cond_key rdb_signal_bg_psi_cond_key, rdb_signal_drop_idx_psi_cond_key; -static PSI_cond_info all_rocksdb_conds[]= -{ - { &rdb_signal_bg_psi_cond_key, "cond signal background", PSI_FLAG_GLOBAL}, - { &rdb_signal_drop_idx_psi_cond_key, "cond signal drop index", - PSI_FLAG_GLOBAL}, +static PSI_cond_info all_rocksdb_conds[] = { + {&rdb_signal_bg_psi_cond_key, "cond signal background", PSI_FLAG_GLOBAL}, + {&rdb_signal_drop_idx_psi_cond_key, "cond signal drop index", + PSI_FLAG_GLOBAL}, }; -static PSI_thread_info all_rocksdb_threads[]= -{ - { &rdb_background_psi_thread_key, "background", PSI_FLAG_GLOBAL}, - { &rdb_drop_idx_psi_thread_key, "drop index", PSI_FLAG_GLOBAL}, +static PSI_thread_info all_rocksdb_threads[] = { + {&rdb_background_psi_thread_key, "background", PSI_FLAG_GLOBAL}, + {&rdb_drop_idx_psi_thread_key, "drop index", PSI_FLAG_GLOBAL}, }; -static void init_rocksdb_psi_keys() -{ - const char* const category= "rocksdb"; +static void init_rocksdb_psi_keys() { + const char *const category = "rocksdb"; int count; if (PSI_server == nullptr) return; - count= array_elements(all_rocksdb_mutexes); + count = array_elements(all_rocksdb_mutexes); PSI_server->register_mutex(category, all_rocksdb_mutexes, count); - count= array_elements(all_rocksdb_rwlocks); + count = array_elements(all_rocksdb_rwlocks); PSI_server->register_rwlock(category, all_rocksdb_rwlocks, count); - count= array_elements(all_rocksdb_conds); - // TODO Disabling PFS for conditions due to the bug https://github.com/MySQLOnRocksDB/mysql-5.6/issues/92 + count = array_elements(all_rocksdb_conds); + // TODO Disabling PFS for conditions due to the bug + // https://github.com/MySQLOnRocksDB/mysql-5.6/issues/92 // PSI_server->register_cond(category, all_rocksdb_conds, count); - count= array_elements(all_rocksdb_stages); + count = array_elements(all_rocksdb_stages); mysql_stage_register(category, all_rocksdb_stages, count); - count= array_elements(all_rocksdb_threads); + count = array_elements(all_rocksdb_threads); mysql_thread_register(category, all_rocksdb_threads, count); } #endif - /* Drop index thread's control */ @@ -1417,21 +1377,18 @@ static void init_rocksdb_psi_keys() static Rdb_drop_index_thread rdb_drop_idx_thread; static void rocksdb_drop_index_wakeup_thread( - my_core::THD* const thd __attribute__((__unused__)), - struct st_mysql_sys_var* const var __attribute__((__unused__)), - void* const var_ptr __attribute__((__unused__)), - const void* const save) -{ - if (*static_cast<const bool*>(save)) { + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + if (*static_cast<const bool *>(save)) { rdb_drop_idx_thread.signal(); } } -static inline uint32_t rocksdb_perf_context_level(THD* const thd) -{ +static inline uint32_t rocksdb_perf_context_level(THD *const thd) { DBUG_ASSERT(thd != nullptr); - const int session_perf_context_level= THDVAR(thd, perf_context_level); + const int session_perf_context_level = THDVAR(thd, perf_context_level); if (session_perf_context_level > rocksdb::PerfLevel::kUninitialized) { return session_perf_context_level; } @@ -1441,7 +1398,7 @@ static inline uint32_t rocksdb_perf_context_level(THD* const thd) value. */ - const int global_perf_context_level= THDVAR(nullptr, perf_context_level); + const int global_perf_context_level = THDVAR(nullptr, perf_context_level); if (global_perf_context_level > rocksdb::PerfLevel::kUninitialized) { return global_perf_context_level; } @@ -1454,10 +1411,9 @@ static inline uint32_t rocksdb_perf_context_level(THD* const thd) Rdb_transaction::walk_tx_list() */ -interface Rdb_tx_list_walker -{ +interface Rdb_tx_list_walker { virtual ~Rdb_tx_list_walker() {} - virtual void process_tran(const Rdb_transaction* const) = 0; + virtual void process_tran(const Rdb_transaction *const) = 0; }; /* @@ -1465,74 +1421,68 @@ interface Rdb_tx_list_walker a snapshot gets created. */ -class Rdb_snapshot_notifier : public rocksdb::TransactionNotifier -{ - Rdb_transaction* m_owning_tx; +class Rdb_snapshot_notifier : public rocksdb::TransactionNotifier { + Rdb_transaction *m_owning_tx; void SnapshotCreated(const rocksdb::Snapshot *snapshot) override; - public: - Rdb_snapshot_notifier(const Rdb_snapshot_notifier&) = delete; - Rdb_snapshot_notifier& operator=(const Rdb_snapshot_notifier&) = delete; +public: + Rdb_snapshot_notifier(const Rdb_snapshot_notifier &) = delete; + Rdb_snapshot_notifier &operator=(const Rdb_snapshot_notifier &) = delete; - explicit Rdb_snapshot_notifier(Rdb_transaction* const owning_tx) - : m_owning_tx(owning_tx) {} + explicit Rdb_snapshot_notifier(Rdb_transaction *const owning_tx) + : m_owning_tx(owning_tx) {} // If the owning Rdb_transaction gets destructed we need to not reference // it anymore. - void detach() - { - m_owning_tx = nullptr; - } + void detach() { m_owning_tx = nullptr; } }; /* This is the base class for transactions when interacting with rocksdb. */ -class Rdb_transaction -{ - protected: - ulonglong m_write_count= 0; - ulonglong m_lock_count= 0; +class Rdb_transaction { +protected: + ulonglong m_write_count = 0; + ulonglong m_lock_count = 0; - bool m_is_delayed_snapshot= false; - bool m_is_two_phase= false; + bool m_is_delayed_snapshot = false; + bool m_is_two_phase = false; - THD* m_thd= nullptr; + THD *m_thd = nullptr; rocksdb::ReadOptions m_read_opts; - static std::multiset<Rdb_transaction*> s_tx_list; + static std::multiset<Rdb_transaction *> s_tx_list; static mysql_mutex_t s_tx_list_mutex; - Rdb_io_perf* m_tbl_io_perf; + Rdb_io_perf *m_tbl_io_perf; - bool m_tx_read_only= false; + bool m_tx_read_only = false; int m_timeout_sec; /* Cached value of @@rocksdb_lock_wait_timeout */ /* Maximum number of locks the transaction can have */ ulonglong m_max_row_locks; - bool m_is_tx_failed= false; - bool m_rollback_only= false; + bool m_is_tx_failed = false; + bool m_rollback_only = false; std::shared_ptr<Rdb_snapshot_notifier> m_notifier; // This should be used only when updating binlog information. - virtual rocksdb::WriteBatchBase* get_write_batch()= 0; - virtual bool commit_no_binlog()= 0; - virtual rocksdb::Iterator *get_iterator( - const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle* column_family)= 0; - + virtual rocksdb::WriteBatchBase *get_write_batch() = 0; + virtual bool commit_no_binlog() = 0; + virtual rocksdb::Iterator * + get_iterator(const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *column_family) = 0; public: - const char* m_mysql_log_file_name; + const char *m_mysql_log_file_name; my_off_t m_mysql_log_offset; - const char* m_mysql_gtid; - const char* m_mysql_max_gtid; + const char *m_mysql_gtid; + const char *m_mysql_max_gtid; String m_detailed_error; - int64_t m_snapshot_timestamp= 0; + int64_t m_snapshot_timestamp = 0; bool m_ddl_transaction; /* @@ -1541,19 +1491,16 @@ public: */ virtual bool is_writebatch_trx() const = 0; - static void init_mutex() - { + static void init_mutex() { mysql_mutex_init(key_mutex_tx_list, &s_tx_list_mutex, MY_MUTEX_INIT_FAST); } - static void term_mutex() - { + static void term_mutex() { DBUG_ASSERT(s_tx_list.size() == 0); mysql_mutex_destroy(&s_tx_list_mutex); } - static void walk_tx_list(Rdb_tx_list_walker* walker) - { + static void walk_tx_list(Rdb_tx_list_walker *walker) { DBUG_ASSERT(walker != nullptr); mysql_mutex_lock(&s_tx_list_mutex); @@ -1562,15 +1509,12 @@ public: mysql_mutex_unlock(&s_tx_list_mutex); } - int set_status_error(THD* const thd, const rocksdb::Status &s, - const Rdb_key_def& kd, - Rdb_tbl_def* const tbl_def) - { + int set_status_error(THD *const thd, const rocksdb::Status &s, + const Rdb_key_def &kd, Rdb_tbl_def *const tbl_def) { DBUG_ASSERT(!s.ok()); DBUG_ASSERT(tbl_def != nullptr); - if (s.IsTimedOut()) - { + if (s.IsTimedOut()) { /* SQL layer has weird expectations. If we return an error when doing a read in DELETE IGNORE, it will ignore the error ("because it's @@ -1580,51 +1524,44 @@ public: rollback before returning HA_ERR_LOCK_WAIT_TIMEOUT: */ my_core::thd_mark_transaction_to_rollback(thd, false /*just statement*/); - m_detailed_error.copy(timeout_message("index", - tbl_def->full_tablename().c_str(), - kd.get_name().c_str())); + m_detailed_error.copy(timeout_message( + "index", tbl_def->full_tablename().c_str(), kd.get_name().c_str())); return HA_ERR_LOCK_WAIT_TIMEOUT; } - if (s.IsDeadlock()) - { + if (s.IsDeadlock()) { my_core::thd_mark_transaction_to_rollback(thd, false /* just statement */); return HA_ERR_LOCK_DEADLOCK; - } - else if (s.IsBusy()) - { + } else if (s.IsBusy()) { rocksdb_snapshot_conflict_errors++; - if (rocksdb_print_snapshot_conflict_queries) - { + if (rocksdb_print_snapshot_conflict_queries) { char user_host_buff[MAX_USER_HOST_SIZE + 1]; make_user_name(thd, user_host_buff); // NO_LINT_DEBUG sql_print_warning("Got snapshot conflict errors: User: %s " - "Query: %s", user_host_buff, thd->query()); + "Query: %s", + user_host_buff, thd->query()); } return HA_ERR_LOCK_DEADLOCK; } - if (s.IsLockLimit()) - { + if (s.IsLockLimit()) { return HA_ERR_ROCKSDB_TOO_MANY_LOCKS; } - if (s.IsIOError() || s.IsCorruption()) - { + if (s.IsIOError() || s.IsCorruption()) { rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); } my_error(ER_INTERNAL_ERROR, MYF(0), s.ToString().c_str()); return HA_ERR_INTERNAL_ERROR; } - THD* get_thd() const { return m_thd; } + THD *get_thd() const { return m_thd; } /* Used for tracking io_perf counters */ - void io_perf_start(Rdb_io_perf* const io_perf) - { + void io_perf_start(Rdb_io_perf *const io_perf) { /* Since perf_context is tracked per thread, it is difficult and expensive to maintain perf_context on a per table basis. Therefore, roll all @@ -1641,37 +1578,31 @@ public: gather stats during commit/rollback is needed. */ if (m_tbl_io_perf == nullptr && - io_perf->start(rocksdb_perf_context_level(m_thd))) - { - m_tbl_io_perf= io_perf; + io_perf->start(rocksdb_perf_context_level(m_thd))) { + m_tbl_io_perf = io_perf; } } - void io_perf_end_and_record(void) - { - if (m_tbl_io_perf != nullptr) - { + void io_perf_end_and_record(void) { + if (m_tbl_io_perf != nullptr) { m_tbl_io_perf->end_and_record(rocksdb_perf_context_level(m_thd)); - m_tbl_io_perf= nullptr; + m_tbl_io_perf = nullptr; } } - void io_perf_end_and_record(Rdb_io_perf* const io_perf) - { - if (m_tbl_io_perf == io_perf) - { + void io_perf_end_and_record(Rdb_io_perf *const io_perf) { + if (m_tbl_io_perf == io_perf) { io_perf_end_and_record(); } } - void set_params(int timeout_sec_arg, int max_row_locks_arg) - { - m_timeout_sec= timeout_sec_arg; - m_max_row_locks= max_row_locks_arg; + void set_params(int timeout_sec_arg, int max_row_locks_arg) { + m_timeout_sec = timeout_sec_arg; + m_max_row_locks = max_row_locks_arg; set_lock_timeout(timeout_sec_arg); } - virtual void set_lock_timeout(int timeout_sec_arg)= 0; + virtual void set_lock_timeout(int timeout_sec_arg) = 0; ulonglong get_write_count() const { return m_write_count; } @@ -1679,64 +1610,53 @@ public: ulonglong get_lock_count() const { return m_lock_count; } - virtual void set_sync(bool sync)= 0; + virtual void set_sync(bool sync) = 0; - virtual void release_lock(rocksdb::ColumnFamilyHandle* const column_family, - const std::string& rowkey)= 0; + virtual void release_lock(rocksdb::ColumnFamilyHandle *const column_family, + const std::string &rowkey) = 0; - virtual bool prepare(const rocksdb::TransactionName& name)= 0; + virtual bool prepare(const rocksdb::TransactionName &name) = 0; - bool commit_or_rollback() - { + bool commit_or_rollback() { bool res; - if (m_is_tx_failed) - { + if (m_is_tx_failed) { rollback(); - res= false; - } - else - res= commit(); + res = false; + } else + res = commit(); return res; } - bool commit() - { - if (get_write_count() == 0) - { + bool commit() { + if (get_write_count() == 0) { rollback(); return false; - } - else if (m_rollback_only) - { - /* - Transactions marked as rollback_only are expected to be rolled back at - prepare(). But there are some exceptions like below that prepare() is - never called and commit() is called instead. - 1. Binlog is disabled - 2. No modification exists in binlog cache for the transaction (#195) - In both cases, rolling back transaction is safe. Nothing is written to - binlog. - */ + } else if (m_rollback_only) { + /* + Transactions marked as rollback_only are expected to be rolled back at + prepare(). But there are some exceptions like below that prepare() is + never called and commit() is called instead. + 1. Binlog is disabled + 2. No modification exists in binlog cache for the transaction (#195) + In both cases, rolling back transaction is safe. Nothing is written to + binlog. + */ my_printf_error(ER_UNKNOWN_ERROR, ERRSTR_ROLLBACK_ONLY, MYF(0)); rollback(); return true; - } - else - { + } else { my_core::thd_binlog_pos(m_thd, &m_mysql_log_file_name, &m_mysql_log_offset, &m_mysql_gtid, &m_mysql_max_gtid); - binlog_manager.update(m_mysql_log_file_name, - m_mysql_log_offset, + binlog_manager.update(m_mysql_log_file_name, m_mysql_log_offset, m_mysql_max_gtid, get_write_batch()); return commit_no_binlog(); } } - virtual void rollback()= 0; + virtual void rollback() = 0; - void snapshot_created(const rocksdb::Snapshot* const snapshot) - { + void snapshot_created(const rocksdb::Snapshot *const snapshot) { DBUG_ASSERT(snapshot != nullptr); m_read_opts.snapshot = snapshot; @@ -1744,31 +1664,25 @@ public: m_is_delayed_snapshot = false; } - virtual void acquire_snapshot(bool acquire_now)= 0; - virtual void release_snapshot()= 0; + virtual void acquire_snapshot(bool acquire_now) = 0; + virtual void release_snapshot() = 0; - bool has_snapshot() const - { - return m_read_opts.snapshot != nullptr; - } + bool has_snapshot() const { return m_read_opts.snapshot != nullptr; } - private: +private: // The tables we are currently loading. In a partitioned table this can // have more than one entry - std::vector<ha_rocksdb*> m_curr_bulk_load; + std::vector<ha_rocksdb *> m_curr_bulk_load; - public: - int finish_bulk_load() - { - int rc= 0; - - std::vector<ha_rocksdb*>::iterator it; - while ((it = m_curr_bulk_load.begin()) != m_curr_bulk_load.end()) - { - int rc2= (*it)->finalize_bulk_load(); - if (rc2 != 0 && rc == 0) - { - rc= rc2; +public: + int finish_bulk_load() { + int rc = 0; + + std::vector<ha_rocksdb *>::iterator it; + while ((it = m_curr_bulk_load.begin()) != m_curr_bulk_load.end()) { + int rc2 = (*it)->finalize_bulk_load(); + if (rc2 != 0 && rc == 0) { + rc = rc2; } } @@ -1777,8 +1691,7 @@ public: return rc; } - void start_bulk_load(ha_rocksdb* const bulk_load) - { + void start_bulk_load(ha_rocksdb *const bulk_load) { /* If we already have an open bulk load of a table and the name doesn't match the current one, close out the currently running one. This allows @@ -1788,22 +1701,18 @@ public: DBUG_ASSERT(bulk_load != nullptr); if (!m_curr_bulk_load.empty() && - !bulk_load->same_table(*m_curr_bulk_load[0])) - { - const auto res= finish_bulk_load(); + !bulk_load->same_table(*m_curr_bulk_load[0])) { + const auto res = finish_bulk_load(); SHIP_ASSERT(res == 0); } m_curr_bulk_load.push_back(bulk_load); } - void end_bulk_load(ha_rocksdb* const bulk_load) - { + void end_bulk_load(ha_rocksdb *const bulk_load) { for (auto it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); - it++) - { - if (*it == bulk_load) - { + it++) { + if (*it == bulk_load) { m_curr_bulk_load.erase(it); return; } @@ -1813,10 +1722,7 @@ public: SHIP_ASSERT(0); } - int num_ongoing_bulk_load() const - { - return m_curr_bulk_load.size(); - } + int num_ongoing_bulk_load() const { return m_curr_bulk_load.size(); } /* Flush the data accumulated so far. This assumes we're doing a bulk insert. @@ -1831,8 +1737,7 @@ public: Add test coverage for what happens when somebody attempts to do bulk inserts while inside a multi-statement transaction. */ - bool flush_batch() - { + bool flush_batch() { if (get_write_count() == 0) return false; @@ -1845,42 +1750,39 @@ public: return false; } - virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - const rocksdb::Slice& value)= 0; - virtual rocksdb::Status delete_key( - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key)= 0; - virtual rocksdb::Status single_delete( - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key)= 0; + virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + const rocksdb::Slice &value) = 0; + virtual rocksdb::Status + delete_key(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) = 0; + virtual rocksdb::Status + single_delete(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) = 0; - virtual bool has_modifications() const= 0; + virtual bool has_modifications() const = 0; - virtual rocksdb::WriteBatchBase* get_indexed_write_batch()= 0; + virtual rocksdb::WriteBatchBase *get_indexed_write_batch() = 0; /* Return a WriteBatch that one can write to. The writes will skip any transaction locking. The writes will NOT be visible to the transaction. */ - rocksdb::WriteBatchBase* get_blind_write_batch() - { + rocksdb::WriteBatchBase *get_blind_write_batch() { return get_indexed_write_batch()->GetWriteBatch(); } - virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - std::string* value) const= 0; - virtual rocksdb::Status get_for_update( - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, std::string* const value, bool exclusive)= 0; + virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + std::string *value) const = 0; + virtual rocksdb::Status + get_for_update(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, std::string *const value, + bool exclusive) = 0; - rocksdb::Iterator *get_iterator( - rocksdb::ColumnFamilyHandle* const column_family, - bool skip_bloom_filter, - bool fill_cache, - bool read_current= false, - bool create_snapshot= true) - { + rocksdb::Iterator * + get_iterator(rocksdb::ColumnFamilyHandle *const column_family, + bool skip_bloom_filter, bool fill_cache, + bool read_current = false, bool create_snapshot = true) { // Make sure we are not doing both read_current (which implies we don't // want a snapshot) and create_snapshot which makes sure we create // a snapshot @@ -1890,56 +1792,48 @@ public: if (create_snapshot) acquire_snapshot(true); - rocksdb::ReadOptions options= m_read_opts; + rocksdb::ReadOptions options = m_read_opts; - if (skip_bloom_filter) - { - options.total_order_seek= true; - } - else - { + if (skip_bloom_filter) { + options.total_order_seek = true; + } else { // With this option, Iterator::Valid() returns false if key // is outside of the prefix bloom filter range set at Seek(). // Must not be set to true if not using bloom filter. - options.prefix_same_as_start= true; + options.prefix_same_as_start = true; } - options.fill_cache= fill_cache; - if (read_current) - { - options.snapshot= nullptr; + options.fill_cache = fill_cache; + if (read_current) { + options.snapshot = nullptr; } return get_iterator(options, column_family); } - virtual bool is_tx_started() const= 0; - virtual void start_tx()= 0; - virtual void start_stmt()= 0; - virtual void rollback_stmt()= 0; + virtual bool is_tx_started() const = 0; + virtual void start_tx() = 0; + virtual void start_stmt() = 0; + virtual void rollback_stmt() = 0; - void set_tx_failed(bool failed_arg) { m_is_tx_failed= failed_arg; } + void set_tx_failed(bool failed_arg) { m_is_tx_failed = failed_arg; } - bool can_prepare() const - { - if (m_rollback_only) - { + bool can_prepare() const { + if (m_rollback_only) { my_printf_error(ER_UNKNOWN_ERROR, ERRSTR_ROLLBACK_ONLY, MYF(0)); return false; } return true; } - int rollback_to_savepoint(void* const savepoint) - { - if (has_modifications()) - { + int rollback_to_savepoint(void *const savepoint) { + if (has_modifications()) { my_printf_error(ER_UNKNOWN_ERROR, "MyRocks currently does not support ROLLBACK TO " "SAVEPOINT if modifying rows.", MYF(0)); - m_rollback_only= true; - return 1; + m_rollback_only = true; + return HA_EXIT_FAILURE; } - return 0; + return HA_EXIT_SUCCESS; } /* @@ -1948,23 +1842,14 @@ public: snapshot has to be created via DB::GetSnapshot(), not via Transaction API. */ - bool is_tx_read_only() const - { - return m_tx_read_only; - } + bool is_tx_read_only() const { return m_tx_read_only; } - bool is_two_phase() const - { - return m_is_two_phase; - } + bool is_two_phase() const { return m_is_two_phase; } - void set_tx_read_only(bool val) - { - m_tx_read_only= val; - } + void set_tx_read_only(bool val) { m_tx_read_only = val; } - explicit Rdb_transaction(THD* const thd): m_thd(thd), m_tbl_io_perf(nullptr) - { + explicit Rdb_transaction(THD *const thd) + : m_thd(thd), m_tbl_io_perf(nullptr) { mysql_mutex_lock(&s_tx_list_mutex); s_tx_list.insert(this); mysql_mutex_unlock(&s_tx_list_mutex); @@ -1986,92 +1871,79 @@ public: The changes are made to individual tables, which store them here and then this object commits them on commit. */ -class Rdb_transaction_impl : public Rdb_transaction -{ - rocksdb::Transaction *m_rocksdb_tx= nullptr; - rocksdb::Transaction *m_rocksdb_reuse_tx= nullptr; +class Rdb_transaction_impl : public Rdb_transaction { + rocksdb::Transaction *m_rocksdb_tx = nullptr; + rocksdb::Transaction *m_rocksdb_reuse_tx = nullptr; - public: - void set_lock_timeout(int timeout_sec_arg) override - { +public: + void set_lock_timeout(int timeout_sec_arg) override { if (m_rocksdb_tx) m_rocksdb_tx->SetLockTimeout(rdb_convert_sec_to_ms(m_timeout_sec)); } - void set_sync(bool sync) override - { - m_rocksdb_tx->GetWriteOptions()->sync= sync; + void set_sync(bool sync) override { + m_rocksdb_tx->GetWriteOptions()->sync = sync; } - void release_lock(rocksdb::ColumnFamilyHandle* const column_family, - const std::string &rowkey) override - { - if (!THDVAR(m_thd, lock_scanned_rows)) - { + void release_lock(rocksdb::ColumnFamilyHandle *const column_family, + const std::string &rowkey) override { + if (!THDVAR(m_thd, lock_scanned_rows)) { m_rocksdb_tx->UndoGetForUpdate(column_family, rocksdb::Slice(rowkey)); } } virtual bool is_writebatch_trx() const override { return false; } - private: - void release_tx(void) - { +private: + void release_tx(void) { // We are done with the current active transaction object. Preserve it // for later reuse. DBUG_ASSERT(m_rocksdb_reuse_tx == nullptr); - m_rocksdb_reuse_tx= m_rocksdb_tx; - m_rocksdb_tx= nullptr; + m_rocksdb_reuse_tx = m_rocksdb_tx; + m_rocksdb_tx = nullptr; } - bool prepare(const rocksdb::TransactionName& name) override - { + bool prepare(const rocksdb::TransactionName &name) override { rocksdb::Status s; - s= m_rocksdb_tx->SetName(name); - if (!s.ok()) - { + s = m_rocksdb_tx->SetName(name); + if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); return false; } - s= m_rocksdb_tx->Prepare(); - if (!s.ok()) - { + s = m_rocksdb_tx->Prepare(); + if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); return false; } return true; } - bool commit_no_binlog() override - { - bool res= false; + bool commit_no_binlog() override { + bool res = false; release_snapshot(); - const rocksdb::Status s= m_rocksdb_tx->Commit(); - if (!s.ok()) - { + const rocksdb::Status s = m_rocksdb_tx->Commit(); + if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); - res= true; + res = true; } /* Save the transaction object to be reused */ release_tx(); - m_write_count= 0; - m_lock_count= 0; + m_write_count = 0; + m_lock_count = 0; set_tx_read_only(false); - m_rollback_only= false; + m_rollback_only = false; return res; } - public: - void rollback() override - { - m_write_count= 0; - m_lock_count= 0; - m_ddl_transaction= false; - if (m_rocksdb_tx) - { +public: + void rollback() override { + m_write_count = 0; + m_lock_count = 0; + m_ddl_transaction = false; + if (m_rocksdb_tx) { release_snapshot(); /* This will also release all of the locks: */ m_rocksdb_tx->Rollback(); @@ -2080,41 +1952,33 @@ class Rdb_transaction_impl : public Rdb_transaction release_tx(); set_tx_read_only(false); - m_rollback_only= false; + m_rollback_only = false; } } - void acquire_snapshot(bool acquire_now) override - { + void acquire_snapshot(bool acquire_now) override { if (m_read_opts.snapshot == nullptr) { if (is_tx_read_only()) { snapshot_created(rdb->GetSnapshot()); - } - else if (acquire_now) { + } else if (acquire_now) { m_rocksdb_tx->SetSnapshot(); snapshot_created(m_rocksdb_tx->GetSnapshot()); - } - else if (!m_is_delayed_snapshot) { + } else if (!m_is_delayed_snapshot) { m_rocksdb_tx->SetSnapshotOnNextOperation(m_notifier); m_is_delayed_snapshot = true; } } } - void release_snapshot() override - { + void release_snapshot() override { bool need_clear = m_is_delayed_snapshot; - if (m_read_opts.snapshot != nullptr) - { + if (m_read_opts.snapshot != nullptr) { m_snapshot_timestamp = 0; - if (is_tx_read_only()) - { + if (is_tx_read_only()) { rdb->ReleaseSnapshot(m_read_opts.snapshot); need_clear = false; - } - else - { + } else { need_clear = true; } m_read_opts.snapshot = nullptr; @@ -2124,15 +1988,11 @@ class Rdb_transaction_impl : public Rdb_transaction m_rocksdb_tx->ClearSnapshot(); } - bool has_snapshot() - { - return m_read_opts.snapshot != nullptr; - } + bool has_snapshot() { return m_read_opts.snapshot != nullptr; } - rocksdb::Status put(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - const rocksdb::Slice& value) override - { + rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + const rocksdb::Slice &value) override { ++m_write_count; ++m_lock_count; if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) @@ -2140,9 +2000,8 @@ class Rdb_transaction_impl : public Rdb_transaction return m_rocksdb_tx->Put(column_family, key, value); } - rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key) override - { + rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) override { ++m_write_count; ++m_lock_count; if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) @@ -2150,10 +2009,9 @@ class Rdb_transaction_impl : public Rdb_transaction return m_rocksdb_tx->Delete(column_family, key); } - rocksdb::Status single_delete( - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key) override - { + rocksdb::Status + single_delete(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) override { ++m_write_count; ++m_lock_count; if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) @@ -2161,17 +2019,14 @@ class Rdb_transaction_impl : public Rdb_transaction return m_rocksdb_tx->SingleDelete(column_family, key); } - bool has_modifications() const override - { + bool has_modifications() const override { return m_rocksdb_tx->GetWriteBatch() && m_rocksdb_tx->GetWriteBatch()->GetWriteBatch() && m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()->Count() > 0; } - rocksdb::WriteBatchBase* get_write_batch() override - { - if (is_two_phase()) - { + rocksdb::WriteBatchBase *get_write_batch() override { + if (is_two_phase()) { return m_rocksdb_tx->GetCommitTimeWriteBatch(); } return m_rocksdb_tx->GetWriteBatch()->GetWriteBatch(); @@ -2181,24 +2036,21 @@ class Rdb_transaction_impl : public Rdb_transaction Return a WriteBatch that one can write to. The writes will skip any transaction locking. The writes WILL be visible to the transaction. */ - rocksdb::WriteBatchBase* get_indexed_write_batch() override - { + rocksdb::WriteBatchBase *get_indexed_write_batch() override { ++m_write_count; return m_rocksdb_tx->GetWriteBatch(); } - rocksdb::Status get(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - std::string* value) const override - { + rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + std::string *value) const override { return m_rocksdb_tx->Get(m_read_opts, column_family, key, value); } - rocksdb::Status get_for_update( - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - std::string* const value, bool exclusive) override - { + rocksdb::Status + get_for_update(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, std::string *const value, + bool exclusive) override { if (++m_lock_count > m_max_row_locks) return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); @@ -2206,45 +2058,40 @@ class Rdb_transaction_impl : public Rdb_transaction exclusive); } - rocksdb::Iterator *get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle* const column_family) - override - { + rocksdb::Iterator * + get_iterator(const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const column_family) override { return m_rocksdb_tx->GetIterator(options, column_family); } - const rocksdb::Transaction* get_rdb_trx() const { return m_rocksdb_tx; } + const rocksdb::Transaction *get_rdb_trx() const { return m_rocksdb_tx; } - bool is_tx_started() const override - { - return (m_rocksdb_tx != nullptr); - } + bool is_tx_started() const override { return (m_rocksdb_tx != nullptr); } - void start_tx() override - { + void start_tx() override { rocksdb::TransactionOptions tx_opts; rocksdb::WriteOptions write_opts; - tx_opts.set_snapshot= false; - tx_opts.lock_timeout= rdb_convert_sec_to_ms(m_timeout_sec); - tx_opts.deadlock_detect= THDVAR(m_thd, deadlock_detect); + tx_opts.set_snapshot = false; + tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec); + tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect); - write_opts.sync= THDVAR(m_thd, write_sync); - write_opts.disableWAL= THDVAR(m_thd, write_disable_wal); - write_opts.ignore_missing_column_families= - THDVAR(m_thd, write_ignore_missing_column_families); - m_is_two_phase= !rocksdb_disable_2pc; + write_opts.sync = THDVAR(m_thd, write_sync); + write_opts.disableWAL = THDVAR(m_thd, write_disable_wal); + write_opts.ignore_missing_column_families = + THDVAR(m_thd, write_ignore_missing_column_families); + m_is_two_phase = rocksdb_enable_2pc; /* If m_rocksdb_reuse_tx is null this will create a new transaction object. Otherwise it will reuse the existing one. */ - m_rocksdb_tx= rdb->BeginTransaction(write_opts, tx_opts, - m_rocksdb_reuse_tx); - m_rocksdb_reuse_tx= nullptr; + m_rocksdb_tx = + rdb->BeginTransaction(write_opts, tx_opts, m_rocksdb_reuse_tx); + m_rocksdb_reuse_tx = nullptr; - m_read_opts= rocksdb::ReadOptions(); + m_read_opts = rocksdb::ReadOptions(); - m_ddl_transaction= false; + m_ddl_transaction = false; } /* @@ -2256,8 +2103,7 @@ class Rdb_transaction_impl : public Rdb_transaction For hooking to start of statement that is its own transaction, see ha_rocksdb::external_lock(). */ - void start_stmt() override - { + void start_stmt() override { // Set the snapshot to delayed acquisition (SetSnapshotOnNextOperation) acquire_snapshot(false); m_rocksdb_tx->SetSavePoint(); @@ -2267,17 +2113,14 @@ class Rdb_transaction_impl : public Rdb_transaction This must be called when last statement is rolled back, but the transaction continues */ - void rollback_stmt() override - { + void rollback_stmt() override { /* TODO: here we must release the locks taken since the start_stmt() call */ - if (m_rocksdb_tx) - { - const rocksdb::Snapshot* const org_snapshot = m_rocksdb_tx->GetSnapshot(); + if (m_rocksdb_tx) { + const rocksdb::Snapshot *const org_snapshot = m_rocksdb_tx->GetSnapshot(); m_rocksdb_tx->RollbackToSavePoint(); - const rocksdb::Snapshot* const cur_snapshot = m_rocksdb_tx->GetSnapshot(); - if (org_snapshot != cur_snapshot) - { + const rocksdb::Snapshot *const cur_snapshot = m_rocksdb_tx->GetSnapshot(); + if (org_snapshot != cur_snapshot) { if (org_snapshot != nullptr) m_snapshot_timestamp = 0; @@ -2290,15 +2133,13 @@ class Rdb_transaction_impl : public Rdb_transaction } } - explicit Rdb_transaction_impl(THD* const thd) : - Rdb_transaction(thd), m_rocksdb_tx(nullptr) - { + explicit Rdb_transaction_impl(THD *const thd) + : Rdb_transaction(thd), m_rocksdb_tx(nullptr) { // Create a notifier that can be called when a snapshot gets generated. m_notifier = std::make_shared<Rdb_snapshot_notifier>(this); } - virtual ~Rdb_transaction_impl() - { + virtual ~Rdb_transaction_impl() { rollback(); // Theoretically the notifier could outlive the Rdb_transaction_impl @@ -2320,90 +2161,75 @@ class Rdb_transaction_impl : public Rdb_transaction to be non-conflicting. Any further usage of this class should completely be thought thoroughly. */ -class Rdb_writebatch_impl : public Rdb_transaction -{ - rocksdb::WriteBatchWithIndex* m_batch; +class Rdb_writebatch_impl : public Rdb_transaction { + rocksdb::WriteBatchWithIndex *m_batch; rocksdb::WriteOptions write_opts; // Called after commit/rollback. - void reset() - { + void reset() { m_batch->Clear(); m_read_opts = rocksdb::ReadOptions(); - m_ddl_transaction= false; - } - private: - bool prepare(const rocksdb::TransactionName& name) override - { - return true; + m_ddl_transaction = false; } - bool commit_no_binlog() override - { - bool res= false; +private: + bool prepare(const rocksdb::TransactionName &name) override { return true; } + + bool commit_no_binlog() override { + bool res = false; release_snapshot(); - const rocksdb::Status s= rdb->GetBaseDB()->Write(write_opts, - m_batch->GetWriteBatch()); - if (!s.ok()) - { + const rocksdb::Status s = + rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch()); + if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); - res= true; + res = true; } reset(); - m_write_count= 0; + m_write_count = 0; set_tx_read_only(false); - m_rollback_only= false; + m_rollback_only = false; return res; } - public: + +public: bool is_writebatch_trx() const override { return true; } - void set_lock_timeout(int timeout_sec_arg) override - { + void set_lock_timeout(int timeout_sec_arg) override { // Nothing to do here. } - void set_sync(bool sync) override - { - write_opts.sync= sync; - } + void set_sync(bool sync) override { write_opts.sync = sync; } - void release_lock(rocksdb::ColumnFamilyHandle* const column_family, - const std::string &rowkey) override - { + void release_lock(rocksdb::ColumnFamilyHandle *const column_family, + const std::string &rowkey) override { // Nothing to do here since we don't hold any row locks. } - void rollback() override - { - m_write_count= 0; - m_lock_count= 0; + void rollback() override { + m_write_count = 0; + m_lock_count = 0; release_snapshot(); reset(); set_tx_read_only(false); - m_rollback_only= false; + m_rollback_only = false; } - void acquire_snapshot(bool acquire_now) override - { + void acquire_snapshot(bool acquire_now) override { if (m_read_opts.snapshot == nullptr) snapshot_created(rdb->GetSnapshot()); } - void release_snapshot() override - { - if (m_read_opts.snapshot != nullptr) - { + void release_snapshot() override { + if (m_read_opts.snapshot != nullptr) { rdb->ReleaseSnapshot(m_read_opts.snapshot); m_read_opts.snapshot = nullptr; } } - rocksdb::Status put(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - const rocksdb::Slice& value) override - { + rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + const rocksdb::Slice &value) override { ++m_write_count; m_batch->Put(column_family, key, value); // Note Put/Delete in write batch doesn't return any error code. We simply @@ -2411,180 +2237,145 @@ class Rdb_writebatch_impl : public Rdb_transaction return rocksdb::Status::OK(); } - rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key) override - { + rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) override { ++m_write_count; m_batch->Delete(column_family, key); return rocksdb::Status::OK(); } - rocksdb::Status single_delete( - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key) override - { + rocksdb::Status + single_delete(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) override { ++m_write_count; m_batch->SingleDelete(column_family, key); return rocksdb::Status::OK(); } - bool has_modifications() const override - { + bool has_modifications() const override { return m_batch->GetWriteBatch()->Count() > 0; } - rocksdb::WriteBatchBase* get_write_batch() override - { - return m_batch; - } + rocksdb::WriteBatchBase *get_write_batch() override { return m_batch; } - rocksdb::WriteBatchBase* get_indexed_write_batch() override - { + rocksdb::WriteBatchBase *get_indexed_write_batch() override { ++m_write_count; return m_batch; } - rocksdb::Status get(rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - std::string* const value) const override - { - return m_batch->GetFromBatchAndDB( - rdb, m_read_opts, column_family, key, value); + rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + std::string *const value) const override { + return m_batch->GetFromBatchAndDB(rdb, m_read_opts, column_family, key, + value); } - rocksdb::Status get_for_update( - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - std::string* const value, bool exclusive) override - { + rocksdb::Status + get_for_update(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, std::string *const value, + bool exclusive) override { return get(column_family, key, value); } - rocksdb::Iterator *get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle* const column_family) - override - { + rocksdb::Iterator * + get_iterator(const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const column_family) override { const auto it = rdb->NewIterator(options); return m_batch->NewIteratorWithBase(it); } - bool is_tx_started() const override - { - return (m_batch != nullptr); - } + bool is_tx_started() const override { return (m_batch != nullptr); } - void start_tx() override - { + void start_tx() override { reset(); - write_opts.sync= THDVAR(m_thd, write_sync); - write_opts.disableWAL= THDVAR(m_thd, write_disable_wal); - write_opts.ignore_missing_column_families= - THDVAR(m_thd, write_ignore_missing_column_families); + write_opts.sync = THDVAR(m_thd, write_sync); + write_opts.disableWAL = THDVAR(m_thd, write_disable_wal); + write_opts.ignore_missing_column_families = + THDVAR(m_thd, write_ignore_missing_column_families); } - void start_stmt() override - { - m_batch->SetSavePoint(); - } + void start_stmt() override { m_batch->SetSavePoint(); } - void rollback_stmt() override - { + void rollback_stmt() override { if (m_batch) m_batch->RollbackToSavePoint(); } - explicit Rdb_writebatch_impl(THD* const thd) : - Rdb_transaction(thd), m_batch(nullptr) - { - m_batch = new rocksdb::WriteBatchWithIndex(rocksdb::BytewiseComparator(), - 0 , true); + explicit Rdb_writebatch_impl(THD *const thd) + : Rdb_transaction(thd), m_batch(nullptr) { + m_batch = new rocksdb::WriteBatchWithIndex(rocksdb::BytewiseComparator(), 0, + true); } - virtual ~Rdb_writebatch_impl() - { + virtual ~Rdb_writebatch_impl() { rollback(); delete m_batch; } }; void Rdb_snapshot_notifier::SnapshotCreated( - const rocksdb::Snapshot* const snapshot) -{ - if (m_owning_tx != nullptr) - { + const rocksdb::Snapshot *const snapshot) { + if (m_owning_tx != nullptr) { m_owning_tx->snapshot_created(snapshot); } } -std::multiset<Rdb_transaction*> Rdb_transaction::s_tx_list; +std::multiset<Rdb_transaction *> Rdb_transaction::s_tx_list; mysql_mutex_t Rdb_transaction::s_tx_list_mutex; -static Rdb_transaction* &get_tx_from_thd(THD* const thd) -{ - return *reinterpret_cast<Rdb_transaction**>( - my_core::thd_ha_data(thd, rocksdb_hton)); +static Rdb_transaction *&get_tx_from_thd(THD *const thd) { + return *reinterpret_cast<Rdb_transaction **>( + my_core::thd_ha_data(thd, rocksdb_hton)); } namespace { -class Rdb_perf_context_guard -{ +class Rdb_perf_context_guard { Rdb_io_perf m_io_perf; THD *m_thd; - public: - Rdb_perf_context_guard(const Rdb_perf_context_guard&) = delete; - Rdb_perf_context_guard& operator=(const Rdb_perf_context_guard&) = delete; +public: + Rdb_perf_context_guard(const Rdb_perf_context_guard &) = delete; + Rdb_perf_context_guard &operator=(const Rdb_perf_context_guard &) = delete; - explicit Rdb_perf_context_guard(THD* const thd) : m_thd(thd) - { - Rdb_transaction*& tx= get_tx_from_thd(m_thd); + explicit Rdb_perf_context_guard(THD *const thd) : m_thd(thd) { + Rdb_transaction *&tx = get_tx_from_thd(m_thd); /* if perf_context information is already being recorded, this becomes a no-op */ - if (tx != nullptr) - { + if (tx != nullptr) { tx->io_perf_start(&m_io_perf); } } - ~Rdb_perf_context_guard() - { - Rdb_transaction*& tx= get_tx_from_thd(m_thd); - if (tx != nullptr) - { + ~Rdb_perf_context_guard() { + Rdb_transaction *&tx = get_tx_from_thd(m_thd); + if (tx != nullptr) { tx->io_perf_end_and_record(); } } }; -} // anonymous namespace +} // anonymous namespace /* TODO: maybe, call this in external_lock() and store in ha_rocksdb.. */ -static Rdb_transaction *get_or_create_tx(THD* const thd) -{ - Rdb_transaction*& tx= get_tx_from_thd(thd); +static Rdb_transaction *get_or_create_tx(THD *const thd) { + Rdb_transaction *&tx = get_tx_from_thd(thd); // TODO: this is called too many times.. O(#rows) - if (tx == nullptr) - { - if (rpl_skip_tx_api_var && thd->rli_slave) - tx= new Rdb_writebatch_impl(thd); + if (tx == nullptr) { + if (rpl_skip_tx_api && thd->rli_slave) + tx = new Rdb_writebatch_impl(thd); else - tx= new Rdb_transaction_impl(thd); - tx->set_params(THDVAR(thd, lock_wait_timeout), - THDVAR(thd, max_row_locks)); + tx = new Rdb_transaction_impl(thd); + tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks)); tx->start_tx(); - } - else - { - tx->set_params(THDVAR(thd, lock_wait_timeout), - THDVAR(thd, max_row_locks)); - if (!tx->is_tx_started()) - { + } else { + tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks)); + if (!tx->is_tx_started()) { tx->start_tx(); } } @@ -2592,33 +2383,29 @@ static Rdb_transaction *get_or_create_tx(THD* const thd) return tx; } - -static int rocksdb_close_connection(handlerton* const hton, THD* const thd) -{ - Rdb_transaction*& tx= get_tx_from_thd(thd); - if (tx != nullptr) - { - int rc= tx->finish_bulk_load(); - if (rc != 0) - { +static int rocksdb_close_connection(handlerton *const hton, THD *const thd) { + Rdb_transaction *&tx = get_tx_from_thd(thd); + if (tx != nullptr) { + int rc = tx->finish_bulk_load(); + if (rc != 0) { // NO_LINT_DEBUG sql_print_error("RocksDB: Error %d finalizing last SST file while " - "disconnecting", rc); + "disconnecting", + rc); abort_with_stack_traces(); } delete tx; - tx= nullptr; + tx = nullptr; } - return 0; + return HA_EXIT_SUCCESS; } /* * Serializes an xid to a string so that it can * be used as a rocksdb transaction name */ -static std::string rdb_xid_to_string(const XID& src) -{ +static std::string rdb_xid_to_string(const XID &src) { DBUG_ASSERT(src.gtrid_length >= 0 && src.gtrid_length <= MAXGTRIDSIZE); DBUG_ASSERT(src.bqual_length >= 0 && src.bqual_length <= MAXBQUALSIZE); @@ -2630,10 +2417,10 @@ static std::string rdb_xid_to_string(const XID& src) * then reinterpret bit pattern as unsigned and store in network order */ uchar fidbuf[RDB_FORMATID_SZ]; - int64 signed_fid8= src.formatID; - const uint64 raw_fid8= *reinterpret_cast<uint64*>(&signed_fid8); + int64 signed_fid8 = src.formatID; + const uint64 raw_fid8 = *reinterpret_cast<uint64 *>(&signed_fid8); rdb_netbuf_store_uint64(fidbuf, raw_fid8); - buf.append(reinterpret_cast<const char*>(fidbuf), RDB_FORMATID_SZ); + buf.append(reinterpret_cast<const char *>(fidbuf), RDB_FORMATID_SZ); buf.push_back(src.gtrid_length); buf.push_back(src.bqual_length); @@ -2641,35 +2428,30 @@ static std::string rdb_xid_to_string(const XID& src) return buf; } - /** Called by hton->flush_logs after MySQL group commit prepares a set of transactions. */ -static bool rocksdb_flush_wal( - handlerton* const hton __attribute__((__unused__)), - ulonglong target_lsn __attribute__((__unused__))) -{ +static bool rocksdb_flush_wal(handlerton *const hton MY_ATTRIBUTE((__unused__)), + ulonglong target_lsn MY_ATTRIBUTE((__unused__))) { DBUG_ASSERT(rdb != nullptr); rocksdb_wal_group_syncs++; - const rocksdb::Status s= rdb->SyncWAL(); + const rocksdb::Status s = rdb->SyncWAL(); if (!s.ok()) { - return 1; + return HA_EXIT_FAILURE; } - return 0; + return HA_EXIT_SUCCESS; } /** For a slave, prepare() updates the slave_gtid_info table which tracks the replication progress. */ -static int rocksdb_prepare(handlerton* const hton, THD* const thd, - bool prepare_tx, bool async) -{ - Rdb_transaction*& tx= get_tx_from_thd(thd); - if (!tx->can_prepare()) - { - return 1; +static int rocksdb_prepare(handlerton *const hton, THD *const thd, + bool prepare_tx, bool async) { + Rdb_transaction *&tx = get_tx_from_thd(thd); + if (!tx->can_prepare()) { + return HA_EXIT_FAILURE; } if (prepare_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { @@ -2678,7 +2460,7 @@ static int rocksdb_prepare(handlerton* const hton, THD* const thd, std::vector<st_slave_gtid_info> slave_gtid_info; my_core::thd_slave_gtid_info(thd, &slave_gtid_info); for (const auto &it : slave_gtid_info) { - rocksdb::WriteBatchBase* const write_batch = tx->get_blind_write_batch(); + rocksdb::WriteBatchBase *const write_batch = tx->get_blind_write_batch(); binlog_manager.update_slave_gtid_info(it.id, it.db, it.gtid, write_batch); } @@ -2687,12 +2469,12 @@ static int rocksdb_prepare(handlerton* const hton, THD* const thd, tx->set_sync(false); } XID xid; - thd_get_xid(thd, reinterpret_cast<MYSQL_XID*>(&xid)); + thd_get_xid(thd, reinterpret_cast<MYSQL_XID *>(&xid)); if (!tx->prepare(rdb_xid_to_string(xid))) { - return 1; + return HA_EXIT_FAILURE; } - if (thd->durability_property == HA_IGNORE_DURABILITY - && THDVAR(thd, write_sync)) { + if (thd->durability_property == HA_IGNORE_DURABILITY && + THDVAR(thd, write_sync)) { /** we set the log sequence as '1' just to trigger hton->flush_logs */ @@ -2703,92 +2485,86 @@ static int rocksdb_prepare(handlerton* const hton, THD* const thd, DEBUG_SYNC(thd, "rocksdb.prepared"); } - return 0; + return HA_EXIT_SUCCESS; } /** do nothing for prepare/commit by xid this is needed to avoid crashes in XA scenarios */ -static int rocksdb_commit_by_xid(handlerton* const hton, XID* const xid) -{ - const auto name= rdb_xid_to_string(*xid); - rocksdb::Transaction* const trx= rdb->GetTransactionByName(name); +static int rocksdb_commit_by_xid(handlerton *const hton, XID *const xid) { + const auto name = rdb_xid_to_string(*xid); + rocksdb::Transaction *const trx = rdb->GetTransactionByName(name); if (trx == nullptr) { - return 1; + return HA_EXIT_FAILURE; } - const rocksdb::Status s= trx->Commit(); + const rocksdb::Status s = trx->Commit(); if (!s.ok()) { - return 1; + return HA_EXIT_FAILURE; } delete trx; - return 0; + return HA_EXIT_SUCCESS; } -static int rocksdb_rollback_by_xid( - handlerton* const hton __attribute__((__unused__)), - XID* const xid) -{ - const auto name= rdb_xid_to_string(*xid); - rocksdb::Transaction* const trx= rdb->GetTransactionByName(name); +static int +rocksdb_rollback_by_xid(handlerton *const hton MY_ATTRIBUTE((__unused__)), + XID *const xid) { + const auto name = rdb_xid_to_string(*xid); + rocksdb::Transaction *const trx = rdb->GetTransactionByName(name); if (trx == nullptr) { - return 1; + return HA_EXIT_FAILURE; } - const rocksdb::Status s= trx->Rollback(); + const rocksdb::Status s = trx->Rollback(); if (!s.ok()) { - return 1; + return HA_EXIT_FAILURE; } delete trx; - return 0; + return HA_EXIT_SUCCESS; } /** Rebuilds an XID from a serialized version stored in a string. */ -static void rdb_xid_from_string(const std::string& src, XID* const dst) -{ +static void rdb_xid_from_string(const std::string &src, XID *const dst) { DBUG_ASSERT(dst != nullptr); - uint offset= 0; - uint64 raw_fid8= - rdb_netbuf_to_uint64(reinterpret_cast<const uchar*>(src.data())); - const int64 signed_fid8= *reinterpret_cast<int64*>(&raw_fid8); - dst->formatID= signed_fid8; + uint offset = 0; + uint64 raw_fid8 = + rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(src.data())); + const int64 signed_fid8 = *reinterpret_cast<int64 *>(&raw_fid8); + dst->formatID = signed_fid8; offset += RDB_FORMATID_SZ; - dst->gtrid_length= src.at(offset); + dst->gtrid_length = src.at(offset); offset += RDB_GTRID_SZ; - dst->bqual_length= src.at(offset); + dst->bqual_length = src.at(offset); offset += RDB_BQUAL_SZ; DBUG_ASSERT(dst->gtrid_length >= 0 && dst->gtrid_length <= MAXGTRIDSIZE); DBUG_ASSERT(dst->bqual_length >= 0 && dst->bqual_length <= MAXBQUALSIZE); - src.copy(dst->data, (dst->gtrid_length)+(dst->bqual_length), RDB_XIDHDR_LEN); + src.copy(dst->data, (dst->gtrid_length) + (dst->bqual_length), + RDB_XIDHDR_LEN); } /** Reading last committed binary log info from RocksDB system row. The info is needed for crash safe slave/master to work. */ -static int rocksdb_recover(handlerton* const hton, XID* const xid_list, - uint len, char* const binlog_file, - my_off_t* const binlog_pos, - Gtid* const binlog_max_gtid) -{ - if (binlog_file && binlog_pos) - { - char file_buf[FN_REFLEN+1]= {0}; +static int rocksdb_recover(handlerton *const hton, XID *const xid_list, + uint len, char *const binlog_file, + my_off_t *const binlog_pos, + Gtid *const binlog_max_gtid) { + if (binlog_file && binlog_pos) { + char file_buf[FN_REFLEN + 1] = {0}; my_off_t pos; - char gtid_buf[FN_REFLEN+1]= {0}; - if (binlog_manager.read(file_buf, &pos, gtid_buf)) - { - if (is_binlog_advanced(binlog_file, *binlog_pos, file_buf, pos)) - { + char gtid_buf[FN_REFLEN + 1] = {0}; + if (binlog_manager.read(file_buf, &pos, gtid_buf)) { + if (is_binlog_advanced(binlog_file, *binlog_pos, file_buf, pos)) { memcpy(binlog_file, file_buf, FN_REFLEN + 1); - *binlog_pos= pos; + *binlog_pos = pos; fprintf(stderr, "RocksDB: Last binlog file position %llu," - " file name %s\n", pos, file_buf); - if (*gtid_buf) - { + " file name %s\n", + pos, file_buf); + if (*gtid_buf) { global_sid_lock->rdlock(); binlog_max_gtid->parse(global_sid_map, gtid_buf); global_sid_lock->unlock(); @@ -2798,44 +2574,41 @@ static int rocksdb_recover(handlerton* const hton, XID* const xid_list, } } - if (len == 0 || xid_list == nullptr) - { - return 0; + if (len == 0 || xid_list == nullptr) { + return HA_EXIT_SUCCESS; } - std::vector<rocksdb::Transaction*> trans_list; + std::vector<rocksdb::Transaction *> trans_list; rdb->GetAllPreparedTransactions(&trans_list); - uint count= 0; - for (auto& trans : trans_list) - { - if (count >= len) - { + uint count = 0; + for (auto &trans : trans_list) { + if (count >= len) { break; } - auto name= trans->GetName(); + auto name = trans->GetName(); rdb_xid_from_string(name, &xid_list[count]); count++; } return count; } -static int rocksdb_commit(handlerton* const hton, THD* const thd, - bool commit_tx, bool) -{ - DBUG_ENTER("rocksdb_commit"); +static int rocksdb_commit(handlerton *const hton, THD *const thd, + bool commit_tx, bool) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(hton != nullptr); + DBUG_ASSERT(thd != nullptr); /* this will trigger saving of perf_context information */ Rdb_perf_context_guard guard(thd); /* note: h->external_lock(F_UNLCK) is called after this function is called) */ - Rdb_transaction*& tx= get_tx_from_thd(thd); + Rdb_transaction *&tx = get_tx_from_thd(thd); - if (tx != nullptr) - { - if (commit_tx || - (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) - { + if (tx != nullptr) { + if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | + OPTION_BEGIN))) { /* We get here - For a COMMIT statement that finishes a multi-statement transaction @@ -2843,9 +2616,7 @@ static int rocksdb_commit(handlerton* const hton, THD* const thd, */ if (tx->commit()) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - else - { + } else { /* We get here when committing a statement within a transaction. @@ -2855,27 +2626,23 @@ static int rocksdb_commit(handlerton* const hton, THD* const thd, tx->set_tx_failed(false); } - if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) - { + if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) { // For READ_COMMITTED, we release any existing snapshot so that we will // see any changes that occurred since the last statement. tx->release_snapshot(); } } - DBUG_RETURN(0); -} + DBUG_RETURN(HA_EXIT_SUCCESS); +} -static int rocksdb_rollback(handlerton* const hton, THD* const thd, - bool rollback_tx) -{ +static int rocksdb_rollback(handlerton *const hton, THD *const thd, + bool rollback_tx) { Rdb_perf_context_guard guard(thd); - Rdb_transaction*& tx= get_tx_from_thd(thd); + Rdb_transaction *&tx = get_tx_from_thd(thd); - if (tx != nullptr) - { - if (rollback_tx) - { + if (tx != nullptr) { + if (rollback_tx) { /* We get here, when - ROLLBACK statement is issued. @@ -2883,9 +2650,7 @@ static int rocksdb_rollback(handlerton* const hton, THD* const thd, Discard the changes made by the transaction */ tx->rollback(); - } - else - { + } else { /* We get here when - a statement with AUTOCOMMIT=1 is being rolled back (because of some @@ -2897,34 +2662,27 @@ static int rocksdb_rollback(handlerton* const hton, THD* const thd, tx->set_tx_failed(true); } - if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) - { + if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) { // For READ_COMMITTED, we release any existing snapshot so that we will // see any changes that occurred since the last statement. tx->release_snapshot(); } } - return 0; + return HA_EXIT_SUCCESS; } -static bool print_stats(THD* const thd, - std::string const& type, - std::string const& name, - std::string const& status, - stat_print_fn *stat_print) -{ +static bool print_stats(THD *const thd, std::string const &type, + std::string const &name, std::string const &status, + stat_print_fn *stat_print) { return stat_print(thd, type.c_str(), type.size(), name.c_str(), name.size(), status.c_str(), status.size()); } -static std::string format_string( - const char* const format, - ...) -{ +static std::string format_string(const char *const format, ...) { std::string res; - va_list args; - va_list args_copy; - char static_buff[256]; + va_list args; + va_list args_copy; + char static_buff[256]; DBUG_ASSERT(format != nullptr); @@ -2935,32 +2693,26 @@ static std::string format_string( int len = vsnprintf(nullptr, 0, format, args); va_end(args); - if (len < 0) - { + if (len < 0) { res = std::string("<format error>"); - } - else if (len == 0) - { + } else if (len == 0) { // Shortcut for an empty string res = std::string(""); - } - else - { + } else { // For short enough output use a static buffer - char* buff= static_buff; - std::unique_ptr<char[]> dynamic_buff= nullptr; + char *buff = static_buff; + std::unique_ptr<char[]> dynamic_buff = nullptr; - len++; // Add one for null terminator + len++; // Add one for null terminator // for longer output use an allocated buffer - if (static_cast<uint>(len) > sizeof(static_buff)) - { + if (static_cast<uint>(len) > sizeof(static_buff)) { dynamic_buff.reset(new char[len]); - buff= dynamic_buff.get(); + buff = dynamic_buff.get(); } // Now re-do the vsnprintf with the buffer which is now large enough - (void) vsnprintf(buff, len, format, args_copy); + (void)vsnprintf(buff, len, format, args_copy); // Convert to a std::string. Note we could have created a std::string // large enough and then converted the buffer to a 'char*' and created @@ -2975,13 +2727,11 @@ static std::string format_string( return res; } -class Rdb_snapshot_status : public Rdb_tx_list_walker -{ - private: +class Rdb_snapshot_status : public Rdb_tx_list_walker { +private: std::string m_data; - static std::string current_timestamp(void) - { + static std::string current_timestamp(void) { static const char *const format = "%d-%02d-%02d %02d:%02d:%02d"; time_t currtime; struct tm currtm; @@ -2995,53 +2745,46 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker currtm.tm_sec); } - static std::string get_header(void) - { - return - "\n============================================================\n" + - current_timestamp() + - " ROCKSDB TRANSACTION MONITOR OUTPUT\n" - "============================================================\n" - "---------\n" - "SNAPSHOTS\n" - "---------\n" - "LIST OF SNAPSHOTS FOR EACH SESSION:\n"; - } - - static std::string get_footer(void) - { - return - "-----------------------------------------\n" - "END OF ROCKSDB TRANSACTION MONITOR OUTPUT\n" - "=========================================\n"; + static std::string get_header(void) { + return "\n============================================================\n" + + current_timestamp() + + " ROCKSDB TRANSACTION MONITOR OUTPUT\n" + "============================================================\n" + "---------\n" + "SNAPSHOTS\n" + "---------\n" + "LIST OF SNAPSHOTS FOR EACH SESSION:\n"; + } + + static std::string get_footer(void) { + return "-----------------------------------------\n" + "END OF ROCKSDB TRANSACTION MONITOR OUTPUT\n" + "=========================================\n"; } - public: +public: Rdb_snapshot_status() : m_data(get_header()) {} std::string getResult() { return m_data + get_footer(); } /* Implement Rdb_transaction interface */ /* Create one row in the snapshot status table */ - void process_tran(const Rdb_transaction* const tx) override - { + void process_tran(const Rdb_transaction *const tx) override { DBUG_ASSERT(tx != nullptr); /* Calculate the duration the snapshot has existed */ int64_t snapshot_timestamp = tx->m_snapshot_timestamp; - if (snapshot_timestamp != 0) - { + if (snapshot_timestamp != 0) { int64_t curr_time; rdb->GetEnv()->GetCurrentTime(&curr_time); - THD* thd = tx->get_thd(); - char buffer[1024]; + THD *thd = tx->get_thd(); + char buffer[1024]; thd_security_context(thd, buffer, sizeof buffer, 0); m_data += format_string("---SNAPSHOT, ACTIVE %lld sec\n" "%s\n" "lock count %llu, write count %llu\n", - curr_time - snapshot_timestamp, - buffer, + curr_time - snapshot_timestamp, buffer, tx->get_lock_count(), tx->get_write_count()); } } @@ -3052,52 +2795,47 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker * walks through all non-replication transactions and copies * out relevant information for information_schema.rocksdb_trx */ -class Rdb_trx_info_aggregator : public Rdb_tx_list_walker -{ - private: +class Rdb_trx_info_aggregator : public Rdb_tx_list_walker { +private: std::vector<Rdb_trx_info> *m_trx_info; - public: - explicit Rdb_trx_info_aggregator(std::vector<Rdb_trx_info>* const trx_info) : - m_trx_info(trx_info) {} +public: + explicit Rdb_trx_info_aggregator(std::vector<Rdb_trx_info> *const trx_info) + : m_trx_info(trx_info) {} - void process_tran(const Rdb_transaction* const tx) override - { + void process_tran(const Rdb_transaction *const tx) override { static const std::map<int, std::string> state_map = { - {rocksdb::Transaction::STARTED, "STARTED"}, - {rocksdb::Transaction::AWAITING_PREPARE, "AWAITING_PREPARE"}, - {rocksdb::Transaction::PREPARED, "PREPARED"}, - {rocksdb::Transaction::AWAITING_COMMIT, "AWAITING_COMMIT"}, - {rocksdb::Transaction::COMMITED, "COMMITED"}, - {rocksdb::Transaction::AWAITING_ROLLBACK, "AWAITING_ROLLBACK"}, - {rocksdb::Transaction::ROLLEDBACK, "ROLLEDBACK"}, + {rocksdb::Transaction::STARTED, "STARTED"}, + {rocksdb::Transaction::AWAITING_PREPARE, "AWAITING_PREPARE"}, + {rocksdb::Transaction::PREPARED, "PREPARED"}, + {rocksdb::Transaction::AWAITING_COMMIT, "AWAITING_COMMIT"}, + {rocksdb::Transaction::COMMITED, "COMMITED"}, + {rocksdb::Transaction::AWAITING_ROLLBACK, "AWAITING_ROLLBACK"}, + {rocksdb::Transaction::ROLLEDBACK, "ROLLEDBACK"}, }; DBUG_ASSERT(tx != nullptr); - THD* const thd = tx->get_thd(); + THD *const thd = tx->get_thd(); ulong thread_id = thd_thread_id(thd); if (tx->is_writebatch_trx()) { - const auto wb_impl = static_cast<const Rdb_writebatch_impl*>(tx); + const auto wb_impl = static_cast<const Rdb_writebatch_impl *>(tx); DBUG_ASSERT(wb_impl); - m_trx_info->push_back({"", /* name */ - 0, /* trx_id */ - wb_impl->get_write_count(), - 0, /* lock_count */ - 0, /* timeout_sec */ - "", /* state */ - "", /* waiting_key */ - 0, /* waiting_cf_id */ - 1, /*is_replication */ - 1, /* skip_trx_api */ - wb_impl->is_tx_read_only(), - 0, /* deadlock detection */ - wb_impl->num_ongoing_bulk_load(), - thread_id, - "" /* query string */ }); + m_trx_info->push_back( + {"", /* name */ + 0, /* trx_id */ + wb_impl->get_write_count(), 0, /* lock_count */ + 0, /* timeout_sec */ + "", /* state */ + "", /* waiting_key */ + 0, /* waiting_cf_id */ + 1, /*is_replication */ + 1, /* skip_trx_api */ + wb_impl->is_tx_read_only(), 0, /* deadlock detection */ + wb_impl->num_ongoing_bulk_load(), thread_id, "" /* query string */}); } else { - const auto tx_impl= static_cast<const Rdb_transaction_impl*>(tx); + const auto tx_impl = static_cast<const Rdb_transaction_impl *>(tx); DBUG_ASSERT(tx_impl); const rocksdb::Transaction *rdb_trx = tx_impl->get_rdb_trx(); @@ -3106,9 +2844,9 @@ class Rdb_trx_info_aggregator : public Rdb_tx_list_walker } std::string query_str; - LEX_STRING* const lex_str = thd_query_string(thd); + LEX_STRING *const lex_str = thd_query_string(thd); if (lex_str != nullptr && lex_str->str != nullptr) { - query_str = std::string(lex_str->str); + query_str = std::string(lex_str->str); } const auto state_it = state_map.find(rdb_trx->GetState()); @@ -3118,22 +2856,14 @@ class Rdb_trx_info_aggregator : public Rdb_tx_list_walker std::string waiting_key; rdb_trx->GetWaitingTxns(&waiting_cf_id, &waiting_key), - m_trx_info->push_back({rdb_trx->GetName(), - rdb_trx->GetID(), - tx_impl->get_write_count(), - tx_impl->get_lock_count(), - tx_impl->get_timeout_sec(), - state_it->second, - waiting_key, - waiting_cf_id, - is_replication, - 0, /* skip_trx_api */ - tx_impl->is_tx_read_only(), - rdb_trx->IsDeadlockDetect(), - tx_impl->num_ongoing_bulk_load(), - thread_id, - query_str}); - } + m_trx_info->push_back( + {rdb_trx->GetName(), rdb_trx->GetID(), tx_impl->get_write_count(), + tx_impl->get_lock_count(), tx_impl->get_timeout_sec(), + state_it->second, waiting_key, waiting_cf_id, is_replication, + 0, /* skip_trx_api */ + tx_impl->is_tx_read_only(), rdb_trx->IsDeadlockDetect(), + tx_impl->num_ongoing_bulk_load(), thread_id, query_str}); + } } }; @@ -3149,17 +2879,15 @@ std::vector<Rdb_trx_info> rdb_get_all_trx_info() { } /* Generate the snapshot status table */ -static bool rocksdb_show_snapshot_status(handlerton* const hton, - THD* const thd, - stat_print_fn* const stat_print) -{ +static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd, + stat_print_fn *const stat_print) { Rdb_snapshot_status showStatus; Rdb_transaction::walk_tx_list(&showStatus); /* Send the result data back to MySQL */ return print_stats(thd, "SNAPSHOTS", "rocksdb", showStatus.getResult(), - stat_print); + stat_print); } /* @@ -3169,14 +2897,11 @@ static bool rocksdb_show_snapshot_status(handlerton* const hton, what column families are there) */ -static bool rocksdb_show_status(handlerton* const hton, - THD* const thd, - stat_print_fn* const stat_print, - enum ha_stat_type stat_type) -{ - bool res= false; - if (stat_type == HA_ENGINE_STATUS) - { +static bool rocksdb_show_status(handlerton *const hton, THD *const thd, + stat_print_fn *const stat_print, + enum ha_stat_type stat_type) { + bool res = false; + if (stat_type == HA_ENGINE_STATUS) { std::string str; /* Per DB stats */ @@ -3185,16 +2910,15 @@ static bool rocksdb_show_status(handlerton* const hton, } /* Per column family stats */ - for (const auto &cf_name : cf_manager.get_cf_names()) - { - rocksdb::ColumnFamilyHandle* cfh; + for (const auto &cf_name : cf_manager.get_cf_names()) { + rocksdb::ColumnFamilyHandle *cfh; bool is_automatic; /* Only the cf name is important. Whether it was generated automatically does not matter, so is_automatic is ignored. */ - cfh= cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic); + cfh = cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic); if (cfh == nullptr) continue; @@ -3205,35 +2929,28 @@ static bool rocksdb_show_status(handlerton* const hton, } /* Memory Statistics */ - std::vector<rocksdb::DB*> dbs; - std::unordered_set<const rocksdb::Cache*> cache_set; + std::vector<rocksdb::DB *> dbs; + std::unordered_set<const rocksdb::Cache *> cache_set; size_t internal_cache_count = 0; size_t kDefaultInternalCacheSize = 8 * 1024 * 1024; char buf[100]; dbs.push_back(rdb); cache_set.insert(rocksdb_tbl_options.block_cache.get()); - for (const auto& cf_handle : cf_manager.get_all_cf()) - { + for (const auto &cf_handle : cf_manager.get_all_cf()) { rocksdb::ColumnFamilyDescriptor cf_desc; cf_handle->GetDescriptor(&cf_desc); - auto* const table_factory = cf_desc.options.table_factory.get(); - if (table_factory != nullptr) - { + auto *const table_factory = cf_desc.options.table_factory.get(); + if (table_factory != nullptr) { std::string tf_name = table_factory->Name(); - if (tf_name.find("BlockBasedTable") != std::string::npos) - { - const rocksdb::BlockBasedTableOptions* const bbt_opt = - reinterpret_cast<rocksdb::BlockBasedTableOptions*>( - table_factory->GetOptions()); - if (bbt_opt != nullptr) - { - if (bbt_opt->block_cache.get() != nullptr) - { + if (tf_name.find("BlockBasedTable") != std::string::npos) { + const rocksdb::BlockBasedTableOptions *const bbt_opt = + reinterpret_cast<rocksdb::BlockBasedTableOptions *>( + table_factory->GetOptions()); + if (bbt_opt != nullptr) { + if (bbt_opt->block_cache.get() != nullptr) { cache_set.insert(bbt_opt->block_cache.get()); - } - else - { + } else { internal_cache_count++; } cache_set.insert(bbt_opt->block_cache_compressed.get()); @@ -3244,8 +2961,8 @@ static bool rocksdb_show_status(handlerton* const hton, std::map<rocksdb::MemoryUtil::UsageType, uint64_t> temp_usage_by_type; str.clear(); - rocksdb::MemoryUtil::GetApproximateMemoryUsageByType( - dbs, cache_set, &temp_usage_by_type); + rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set, + &temp_usage_by_type); snprintf(buf, sizeof(buf), "\nMemTable Total: %lu", temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal]); str.append(buf); @@ -3262,9 +2979,7 @@ static bool rocksdb_show_status(handlerton* const hton, internal_cache_count * kDefaultInternalCacheSize); str.append(buf); res |= print_stats(thd, "Memory_Stats", "rocksdb", str, stat_print); - } - else if (stat_type == HA_ENGINE_TRX) - { + } else if (stat_type == HA_ENGINE_TRX) { /* Handle the SHOW ENGINE ROCKSDB TRANSACTION STATUS command */ res |= rocksdb_show_snapshot_status(hton, thd, stat_print); } @@ -3272,14 +2987,12 @@ static bool rocksdb_show_status(handlerton* const hton, return res; } -static inline void rocksdb_register_tx(handlerton* const hton, THD* const thd, - Rdb_transaction* const tx) -{ +static inline void rocksdb_register_tx(handlerton *const hton, THD *const thd, + Rdb_transaction *const tx) { DBUG_ASSERT(tx != nullptr); trans_register_ha(thd, FALSE, rocksdb_hton); - if (my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - { + if (my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { tx->start_stmt(); trans_register_ha(thd, TRUE, rocksdb_hton); } @@ -3307,37 +3020,36 @@ static inline void rocksdb_register_tx(handlerton* const hton, THD* const thd, InnoDB and RocksDB transactions. */ static int rocksdb_start_tx_and_assign_read_view( - handlerton* const hton, /*!< in: RocksDB handlerton */ - THD* const thd, /*!< in: MySQL thread handle of the - user for whom the transaction should - be committed */ - char* const binlog_file, /* out: binlog file for last commit */ - ulonglong* const binlog_pos, /* out: binlog pos for last commit */ - char** gtid_executed, /* out: Gtids logged until last commit */ - int* const gtid_executed_length)/*out: Length of gtid_executed string */ + handlerton *const hton, /*!< in: RocksDB handlerton */ + THD *const thd, /*!< in: MySQL thread handle of the + user for whom the transaction should + be committed */ + char *const binlog_file, /* out: binlog file for last commit */ + ulonglong *const binlog_pos, /* out: binlog pos for last commit */ + char **gtid_executed, /* out: Gtids logged until last commit */ + int *const gtid_executed_length) /*out: Length of gtid_executed string */ { Rdb_perf_context_guard guard(thd); ulong const tx_isolation = my_core::thd_tx_isolation(thd); - if (tx_isolation != ISO_REPEATABLE_READ) - { + if (tx_isolation != ISO_REPEATABLE_READ) { my_printf_error(ER_UNKNOWN_ERROR, "Only REPEATABLE READ isolation level is supported " "for START TRANSACTION WITH CONSISTENT SNAPSHOT " - "in RocksDB Storage Engine.", MYF(0)); - return 1; + "in RocksDB Storage Engine.", + MYF(0)); + return HA_EXIT_FAILURE; } - if (binlog_file) - { + if (binlog_file) { if (binlog_pos && mysql_bin_log_is_open()) mysql_bin_log_lock_commits(); else - return 1; + return HA_EXIT_FAILURE; } - Rdb_transaction* const tx= get_or_create_tx(thd); + Rdb_transaction *const tx = get_or_create_tx(thd); DBUG_ASSERT(!tx->has_snapshot()); tx->set_tx_read_only(true); rocksdb_register_tx(hton, thd, tx); @@ -3347,7 +3059,7 @@ static int rocksdb_start_tx_and_assign_read_view( mysql_bin_log_unlock_commits(binlog_file, binlog_pos, gtid_executed, gtid_executed_length); - return 0; + return HA_EXIT_SUCCESS; } /* Dummy SAVEPOINT support. This is needed for long running transactions @@ -3355,23 +3067,20 @@ static int rocksdb_start_tx_and_assign_read_view( * Current SAVEPOINT does not correctly handle ROLLBACK and does not return * errors. This needs to be addressed in future versions (Issue#96). */ -static int rocksdb_savepoint(handlerton* const hton, THD* const thd, - void* const savepoint) -{ - return 0; +static int rocksdb_savepoint(handlerton *const hton, THD *const thd, + void *const savepoint) { + return HA_EXIT_SUCCESS; } -static int rocksdb_rollback_to_savepoint(handlerton* const hton, THD* const thd, - void* const savepoint) -{ - Rdb_transaction*& tx= get_tx_from_thd(thd); +static int rocksdb_rollback_to_savepoint(handlerton *const hton, THD *const thd, + void *const savepoint) { + Rdb_transaction *&tx = get_tx_from_thd(thd); return tx->rollback_to_savepoint(savepoint); } -static bool rocksdb_rollback_to_savepoint_can_release_mdl( - handlerton* const hton, - THD* const thd) -{ +static bool +rocksdb_rollback_to_savepoint_can_release_mdl(handlerton *const hton, + THD *const thd) { return true; } @@ -3379,14 +3088,12 @@ static bool rocksdb_rollback_to_savepoint_can_release_mdl( This is called for INFORMATION_SCHEMA */ static void rocksdb_update_table_stats( - /* per-table stats callback */ - void (*cb)(const char* db, const char* tbl, bool is_partition, - my_io_perf_t* r, my_io_perf_t* w, my_io_perf_t* r_blob, - my_io_perf_t* r_primary, my_io_perf_t* r_secondary, - page_stats_t *page_stats, comp_stats_t *comp_stats, - int n_lock_wait, int n_lock_wait_timeout, - const char* engine)) -{ + /* per-table stats callback */ + void (*cb)(const char *db, const char *tbl, bool is_partition, + my_io_perf_t *r, my_io_perf_t *w, my_io_perf_t *r_blob, + my_io_perf_t *r_primary, my_io_perf_t *r_secondary, + page_stats_t *page_stats, comp_stats_t *comp_stats, + int n_lock_wait, int n_lock_wait_timeout, const char *engine)) { my_io_perf_t io_perf_read; my_io_perf_t io_perf; page_stats_t page_stats; @@ -3401,10 +3108,9 @@ static void rocksdb_update_table_stats( memset(&page_stats, 0, sizeof(page_stats)); memset(&comp_stats, 0, sizeof(comp_stats)); - tablenames= rdb_open_tables.get_table_names(); + tablenames = rdb_open_tables.get_table_names(); - for (const auto& it : tablenames) - { + for (const auto &it : tablenames) { Rdb_table_handler *table_handler; std::string str, dbname, tablename, partname; char dbname_sys[NAME_LEN + 1]; @@ -3420,37 +3126,35 @@ static void rocksdb_update_table_stats( return; } - if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) - { + if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) { continue; } - is_partition= (partname.size() != 0); + is_partition = (partname.size() != 0); - table_handler= rdb_open_tables.get_table_handler(it.c_str()); - if (table_handler == nullptr) - { + table_handler = rdb_open_tables.get_table_handler(it.c_str()); + if (table_handler == nullptr) { continue; } - io_perf_read.bytes= table_handler->m_io_perf_read.bytes.load(); - io_perf_read.requests= table_handler->m_io_perf_read.requests.load(); + io_perf_read.bytes = table_handler->m_io_perf_read.bytes.load(); + io_perf_read.requests = table_handler->m_io_perf_read.requests.load(); /* Convert from rocksdb timer to mysql timer. RocksDB values are in nanoseconds, but table statistics expect the value to be in my_timer format. */ - io_perf_read.svc_time= my_core::microseconds_to_my_timer( - table_handler->m_io_perf_read.svc_time.load() / 1000); - io_perf_read.svc_time_max= my_core::microseconds_to_my_timer( - table_handler->m_io_perf_read.svc_time_max.load() / 1000); - io_perf_read.wait_time= my_core::microseconds_to_my_timer( - table_handler->m_io_perf_read.wait_time.load() / 1000); - io_perf_read.wait_time_max= my_core::microseconds_to_my_timer( - table_handler->m_io_perf_read.wait_time_max.load() / 1000); - io_perf_read.slow_ios= table_handler->m_io_perf_read.slow_ios.load(); - rdb_open_tables.release_table_handler(table_handler); + io_perf_read.svc_time = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.svc_time.load() / 1000); + io_perf_read.svc_time_max = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.svc_time_max.load() / 1000); + io_perf_read.wait_time = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.wait_time.load() / 1000); + io_perf_read.wait_time_max = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.wait_time_max.load() / 1000); + io_perf_read.slow_ios = table_handler->m_io_perf_read.slow_ios.load(); + rdb_open_tables.release_table_handler(table_handler); /* Table stats expects our database and table name to be in system encoding, @@ -3466,19 +3170,15 @@ static void rocksdb_update_table_stats( } } - static rocksdb::Status check_rocksdb_options_compatibility( - const char* const dbpath, - const rocksdb::Options& main_opts, - const std::vector<rocksdb::ColumnFamilyDescriptor>& cf_descr) -{ + const char *const dbpath, const rocksdb::Options &main_opts, + const std::vector<rocksdb::ColumnFamilyDescriptor> &cf_descr) { DBUG_ASSERT(rocksdb_datadir != nullptr); rocksdb::DBOptions loaded_db_opt; std::vector<rocksdb::ColumnFamilyDescriptor> loaded_cf_descs; - rocksdb::Status status = LoadLatestOptions(dbpath, - rocksdb::Env::Default(), &loaded_db_opt, - &loaded_cf_descs); + rocksdb::Status status = LoadLatestOptions(dbpath, rocksdb::Env::Default(), + &loaded_db_opt, &loaded_cf_descs); // If we're starting from scratch and there are no options saved yet then this // is a valid case. Therefore we can't compare the current set of options to @@ -3492,32 +3192,32 @@ static rocksdb::Status check_rocksdb_options_compatibility( } if (loaded_cf_descs.size() != cf_descr.size()) { - return rocksdb::Status::NotSupported("Mismatched size of column family " \ - "descriptors."); + return rocksdb::Status::NotSupported("Mismatched size of column family " + "descriptors."); } // Please see RocksDB documentation for more context about why we need to set // user-defined functions and pointer-typed options manually. for (size_t i = 0; i < loaded_cf_descs.size(); i++) { loaded_cf_descs[i].options.compaction_filter = - cf_descr[i].options.compaction_filter; - loaded_cf_descs[i].options.compaction_filter_factory = - cf_descr[i].options.compaction_filter_factory; + cf_descr[i].options.compaction_filter; + loaded_cf_descs[i].options.compaction_filter_factory = + cf_descr[i].options.compaction_filter_factory; loaded_cf_descs[i].options.comparator = cf_descr[i].options.comparator; loaded_cf_descs[i].options.memtable_factory = - cf_descr[i].options.memtable_factory; + cf_descr[i].options.memtable_factory; loaded_cf_descs[i].options.merge_operator = - cf_descr[i].options.merge_operator; + cf_descr[i].options.merge_operator; loaded_cf_descs[i].options.prefix_extractor = - cf_descr[i].options.prefix_extractor; + cf_descr[i].options.prefix_extractor; loaded_cf_descs[i].options.table_factory = - cf_descr[i].options.table_factory; + cf_descr[i].options.table_factory; } // This is the essence of the function - determine if it's safe to open the // database or not. - status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), - main_opts, loaded_cf_descs); + status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), main_opts, + loaded_cf_descs); return status; } @@ -3526,9 +3226,8 @@ static rocksdb::Status check_rocksdb_options_compatibility( Storage Engine initialization function, invoked when plugin is loaded. */ -static int rocksdb_init_func(void* const p) -{ - DBUG_ENTER("rocksdb_init_func"); +static int rocksdb_init_func(void *const p) { + DBUG_ENTER_FUNC(); // Validate the assumption about the size of ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN. static_assert(sizeof(longlong) == 8, "Assuming that longlong is 8 bytes."); @@ -3537,12 +3236,11 @@ static int rocksdb_init_func(void* const p) init_rocksdb_psi_keys(); #endif - rocksdb_hton= (handlerton *)p; + rocksdb_hton = (handlerton *)p; mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &rdb_open_tables.m_mutex, MY_MUTEX_INIT_FAST); #ifdef HAVE_PSI_INTERFACE - rdb_bg_thread.init(rdb_signal_bg_psi_mutex_key, - rdb_signal_bg_psi_cond_key); + rdb_bg_thread.init(rdb_signal_bg_psi_mutex_key, rdb_signal_bg_psi_cond_key); rdb_drop_idx_thread.init(rdb_signal_drop_idx_psi_mutex_key, rdb_signal_drop_idx_psi_cond_key); #else @@ -3555,8 +3253,8 @@ static int rocksdb_init_func(void* const p) MY_MUTEX_INIT_FAST); #if defined(HAVE_PSI_INTERFACE) - rdb_collation_exceptions = new Regex_list_handler( - key_rwlock_collation_exception_list); + rdb_collation_exceptions = + new Regex_list_handler(key_rwlock_collation_exception_list); #else rdb_collation_exceptions = new Regex_list_handler(); #endif @@ -3566,151 +3264,153 @@ static int rocksdb_init_func(void* const p) rdb_open_tables.init_hash(); Rdb_transaction::init_mutex(); - rocksdb_hton->state= SHOW_OPTION_YES; - rocksdb_hton->create= rocksdb_create_handler; - rocksdb_hton->close_connection= rocksdb_close_connection; - rocksdb_hton->prepare= rocksdb_prepare; - rocksdb_hton->commit_by_xid= rocksdb_commit_by_xid; - rocksdb_hton->rollback_by_xid= rocksdb_rollback_by_xid; - rocksdb_hton->recover= rocksdb_recover; - rocksdb_hton->commit= rocksdb_commit; - rocksdb_hton->rollback= rocksdb_rollback; - rocksdb_hton->db_type= DB_TYPE_ROCKSDB; - rocksdb_hton->show_status= rocksdb_show_status; - rocksdb_hton->start_consistent_snapshot= - rocksdb_start_tx_and_assign_read_view; - rocksdb_hton->savepoint_set= rocksdb_savepoint; - rocksdb_hton->savepoint_rollback= rocksdb_rollback_to_savepoint; - rocksdb_hton->savepoint_rollback_can_release_mdl= - rocksdb_rollback_to_savepoint_can_release_mdl; + rocksdb_hton->state = SHOW_OPTION_YES; + rocksdb_hton->create = rocksdb_create_handler; + rocksdb_hton->close_connection = rocksdb_close_connection; + rocksdb_hton->prepare = rocksdb_prepare; + rocksdb_hton->commit_by_xid = rocksdb_commit_by_xid; + rocksdb_hton->rollback_by_xid = rocksdb_rollback_by_xid; + rocksdb_hton->recover = rocksdb_recover; + rocksdb_hton->commit = rocksdb_commit; + rocksdb_hton->rollback = rocksdb_rollback; + rocksdb_hton->db_type = DB_TYPE_ROCKSDB; + rocksdb_hton->show_status = rocksdb_show_status; + rocksdb_hton->start_consistent_snapshot = + rocksdb_start_tx_and_assign_read_view; + rocksdb_hton->savepoint_set = rocksdb_savepoint; + rocksdb_hton->savepoint_rollback = rocksdb_rollback_to_savepoint; + rocksdb_hton->savepoint_rollback_can_release_mdl = + rocksdb_rollback_to_savepoint_can_release_mdl; rocksdb_hton->update_table_stats = rocksdb_update_table_stats; - rocksdb_hton->flush_logs= rocksdb_flush_wal; + rocksdb_hton->flush_logs = rocksdb_flush_wal; - rocksdb_hton->flags= HTON_TEMPORARY_NOT_SUPPORTED | - HTON_SUPPORTS_EXTENDED_KEYS | - HTON_CAN_RECREATE; + rocksdb_hton->flags = HTON_TEMPORARY_NOT_SUPPORTED | + HTON_SUPPORTS_EXTENDED_KEYS | HTON_CAN_RECREATE; DBUG_ASSERT(!mysqld_embedded); - rocksdb_stats= rocksdb::CreateDBStatistics(); - rocksdb_db_options.statistics= rocksdb_stats; + rocksdb_stats = rocksdb::CreateDBStatistics(); + rocksdb_db_options.statistics = rocksdb_stats; if (rocksdb_rate_limiter_bytes_per_sec != 0) { - rocksdb_rate_limiter.reset(rocksdb::NewGenericRateLimiter( - rocksdb_rate_limiter_bytes_per_sec)); - rocksdb_db_options.rate_limiter= rocksdb_rate_limiter; + rocksdb_rate_limiter.reset( + rocksdb::NewGenericRateLimiter(rocksdb_rate_limiter_bytes_per_sec)); + rocksdb_db_options.rate_limiter = rocksdb_rate_limiter; } - std::shared_ptr<Rdb_logger> myrocks_logger= std::make_shared<Rdb_logger>(); - rocksdb::Status s= rocksdb::CreateLoggerFromOptions( + std::shared_ptr<Rdb_logger> myrocks_logger = std::make_shared<Rdb_logger>(); + rocksdb::Status s = rocksdb::CreateLoggerFromOptions( rocksdb_datadir, rocksdb_db_options, &rocksdb_db_options.info_log); if (s.ok()) { myrocks_logger->SetRocksDBLogger(rocksdb_db_options.info_log); } - rocksdb_db_options.info_log= myrocks_logger; + rocksdb_db_options.info_log = myrocks_logger; myrocks_logger->SetInfoLogLevel( - static_cast<rocksdb::InfoLogLevel>(rocksdb_info_log_level)); - rocksdb_db_options.wal_dir= rocksdb_wal_dir; + static_cast<rocksdb::InfoLogLevel>(rocksdb_info_log_level)); + rocksdb_db_options.wal_dir = rocksdb_wal_dir; - rocksdb_db_options.wal_recovery_mode= - static_cast<rocksdb::WALRecoveryMode>(rocksdb_wal_recovery_mode); + rocksdb_db_options.wal_recovery_mode = + static_cast<rocksdb::WALRecoveryMode>(rocksdb_wal_recovery_mode); - rocksdb_db_options.access_hint_on_compaction_start= - static_cast<rocksdb::Options::AccessHint> - (rocksdb_access_hint_on_compaction_start); + rocksdb_db_options.access_hint_on_compaction_start = + static_cast<rocksdb::Options::AccessHint>( + rocksdb_access_hint_on_compaction_start); if (rocksdb_db_options.allow_mmap_reads && - rocksdb_db_options.use_direct_reads) - { + rocksdb_db_options.use_direct_reads) { // allow_mmap_reads implies !use_direct_reads and RocksDB will not open if // mmap_reads and direct_reads are both on. (NO_LINT_DEBUG) sql_print_error("RocksDB: Can't enable both use_direct_reads " "and allow_mmap_reads\n"); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } if (rocksdb_db_options.allow_mmap_writes && - rocksdb_db_options.use_direct_writes) - { + rocksdb_db_options.use_direct_writes) { // See above comment for allow_mmap_reads. (NO_LINT_DEBUG) sql_print_error("RocksDB: Can't enable both use_direct_writes " "and allow_mmap_writes\n"); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } std::vector<std::string> cf_names; rocksdb::Status status; - status= rocksdb::DB::ListColumnFamilies(rocksdb_db_options, rocksdb_datadir, - &cf_names); - if (!status.ok()) - { + status = rocksdb::DB::ListColumnFamilies(rocksdb_db_options, rocksdb_datadir, + &cf_names); + if (!status.ok()) { /* When we start on an empty datadir, ListColumnFamilies returns IOError, and RocksDB doesn't provide any way to check what kind of error it was. Checking system errno happens to work right now. */ - if (status.IsIOError() && errno == ENOENT) - { + if (status.IsIOError() && errno == ENOENT) { sql_print_information("RocksDB: Got ENOENT when listing column families"); - sql_print_information("RocksDB: assuming that we're creating a new database"); - } - else - { - std::string err_text= status.ToString(); - sql_print_error("RocksDB: Error listing column families: %s", err_text.c_str()); + sql_print_information( + "RocksDB: assuming that we're creating a new database"); + } else { + std::string err_text = status.ToString(); + sql_print_error("RocksDB: Error listing column families: %s", + err_text.c_str()); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } - } - else - sql_print_information("RocksDB: %ld column families found", cf_names.size()); + } else + sql_print_information("RocksDB: %ld column families found", + cf_names.size()); std::vector<rocksdb::ColumnFamilyDescriptor> cf_descr; - std::vector<rocksdb::ColumnFamilyHandle*> cf_handles; + std::vector<rocksdb::ColumnFamilyHandle *> cf_handles; - rocksdb_tbl_options.index_type= - (rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type; + rocksdb_tbl_options.index_type = + (rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type; if (!rocksdb_tbl_options.no_block_cache) { - rocksdb_tbl_options.block_cache= + rocksdb_tbl_options.block_cache = rocksdb::NewLRUCache(rocksdb_block_cache_size); } // Using newer BlockBasedTable format version for better compression // and better memory allocation. - // See: https://github.com/facebook/rocksdb/commit/9ab5adfc59a621d12357580c94451d9f7320c2dd - rocksdb_tbl_options.format_version= 2; + // See: + // https://github.com/facebook/rocksdb/commit/9ab5adfc59a621d12357580c94451d9f7320c2dd + rocksdb_tbl_options.format_version = 2; if (rocksdb_collect_sst_properties) { - properties_collector_factory = std::make_shared - <Rdb_tbl_prop_coll_factory>( - &ddl_manager - ); + properties_collector_factory = + std::make_shared<Rdb_tbl_prop_coll_factory>(&ddl_manager); rocksdb_set_compaction_options(nullptr, nullptr, nullptr, nullptr); mysql_mutex_lock(&rdb_sysvars_mutex); - DBUG_ASSERT(rocksdb_table_stats_sampling_pct - <= RDB_TBL_STATS_SAMPLE_PCT_MAX); + DBUG_ASSERT(rocksdb_table_stats_sampling_pct <= + RDB_TBL_STATS_SAMPLE_PCT_MAX); properties_collector_factory->SetTableStatsSamplingPct( - rocksdb_table_stats_sampling_pct); + rocksdb_table_stats_sampling_pct); mysql_mutex_unlock(&rdb_sysvars_mutex); } + if (rocksdb_persistent_cache_size > 0) { + std::shared_ptr<rocksdb::PersistentCache> pcache; + rocksdb::NewPersistentCache( + rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path), + rocksdb_persistent_cache_size, myrocks_logger, true, &pcache); + rocksdb_tbl_options.persistent_cache = pcache; + } else if (strlen(rocksdb_persistent_cache_path)) { + sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size"); + DBUG_RETURN(1); + } + if (!rocksdb_cf_options_map.init( - rocksdb_tbl_options, - properties_collector_factory, - rocksdb_default_cf_options, - rocksdb_override_cf_options)) - { + rocksdb_tbl_options, properties_collector_factory, + rocksdb_default_cf_options, rocksdb_override_cf_options)) { // NO_LINT_DEBUG sql_print_error("RocksDB: Failed to initialize CF options map."); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } /* @@ -3722,8 +3422,7 @@ static int rocksdb_init_func(void* const p) std::vector<int> compaction_enabled_cf_indices; sql_print_information("RocksDB: Column Families at start:"); - for (size_t i = 0; i < cf_names.size(); ++i) - { + for (size_t i = 0; i < cf_names.size(); ++i) { rocksdb::ColumnFamilyOptions opts; rocksdb_cf_options_map.get_cf_options(cf_names[i], &opts); @@ -3736,8 +3435,7 @@ static int rocksdb_init_func(void* const p) Temporarily disable compactions to prevent a race condition where compaction starts before compaction filter is ready. */ - if (!opts.disable_auto_compactions) - { + if (!opts.disable_auto_compactions) { compaction_enabled_cf_indices.push_back(i); opts.disable_auto_compactions = true; } @@ -3752,57 +3450,53 @@ static int rocksdb_init_func(void* const p) main_opts.env->SetBackgroundThreads(main_opts.max_background_compactions, rocksdb::Env::Priority::LOW); rocksdb::TransactionDBOptions tx_db_options; - tx_db_options.transaction_lock_timeout= 2; // 2 seconds - tx_db_options.custom_mutex_factory= std::make_shared<Rdb_mutex_factory>(); + tx_db_options.transaction_lock_timeout = 2; // 2 seconds + tx_db_options.custom_mutex_factory = std::make_shared<Rdb_mutex_factory>(); - status= check_rocksdb_options_compatibility(rocksdb_datadir, main_opts, - cf_descr); + status = + check_rocksdb_options_compatibility(rocksdb_datadir, main_opts, cf_descr); // We won't start if we'll determine that there's a chance of data corruption // because of incompatible options. if (!status.ok()) { // NO_LINT_DEBUG - sql_print_error("RocksDB: compatibility check against existing database " \ - "options failed. %s", status.ToString().c_str()); + sql_print_error("RocksDB: compatibility check against existing database " + "options failed. %s", + status.ToString().c_str()); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } - status= rocksdb::TransactionDB::Open(main_opts, tx_db_options, - rocksdb_datadir, cf_descr, - &cf_handles, &rdb); + status = rocksdb::TransactionDB::Open( + main_opts, tx_db_options, rocksdb_datadir, cf_descr, &cf_handles, &rdb); - if (!status.ok()) - { - std::string err_text= status.ToString(); + if (!status.ok()) { + std::string err_text = status.ToString(); sql_print_error("RocksDB: Error opening instance: %s", err_text.c_str()); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } cf_manager.init(&rocksdb_cf_options_map, &cf_handles); - if (dict_manager.init(rdb->GetBaseDB(), &cf_manager)) - { + if (dict_manager.init(rdb->GetBaseDB(), &cf_manager)) { // NO_LINT_DEBUG sql_print_error("RocksDB: Failed to initialize data dictionary."); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } - if (binlog_manager.init(&dict_manager)) - { + if (binlog_manager.init(&dict_manager)) { // NO_LINT_DEBUG sql_print_error("RocksDB: Failed to initialize binlog manager."); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } - if (ddl_manager.init(&dict_manager, &cf_manager, rocksdb_validate_tables)) - { + if (ddl_manager.init(&dict_manager, &cf_manager, rocksdb_validate_tables)) { // NO_LINT_DEBUG sql_print_error("RocksDB: Failed to initialize DDL manager."); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } Rdb_sst_info::init(rdb); @@ -3811,46 +3505,46 @@ static int rocksdb_init_func(void* const p) Enable auto compaction, things needed for compaction filter are finished initializing */ - std::vector<rocksdb::ColumnFamilyHandle*> compaction_enabled_cf_handles; + std::vector<rocksdb::ColumnFamilyHandle *> compaction_enabled_cf_handles; compaction_enabled_cf_handles.reserve(compaction_enabled_cf_indices.size()); - for (const auto &index : compaction_enabled_cf_indices) - { + for (const auto &index : compaction_enabled_cf_indices) { compaction_enabled_cf_handles.push_back(cf_handles[index]); } - status= rdb->EnableAutoCompaction(compaction_enabled_cf_handles); + status = rdb->EnableAutoCompaction(compaction_enabled_cf_handles); - if (!status.ok()) - { - const std::string err_text= status.ToString(); + if (!status.ok()) { + const std::string err_text = status.ToString(); // NO_LINT_DEBUG sql_print_error("RocksDB: Error enabling compaction: %s", err_text.c_str()); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } - auto err= rdb_bg_thread.create_thread( + auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME #ifdef HAVE_PSI_INTERFACE - rdb_background_psi_thread_key + , + rdb_background_psi_thread_key #endif - ); + ); if (err != 0) { sql_print_error("RocksDB: Couldn't start the background thread: (errno=%d)", err); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } - err= rdb_drop_idx_thread.create_thread( + err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME #ifdef HAVE_PSI_INTERFACE - rdb_drop_idx_psi_thread_key + , + rdb_drop_idx_psi_thread_key #endif - ); + ); if (err != 0) { sql_print_error("RocksDB: Couldn't start the drop index thread: (errno=%d)", err); rdb_open_tables.free_hash(); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } rdb_set_collation_exception_list(rocksdb_strict_collation_exceptions); @@ -3861,28 +3555,28 @@ static int rocksdb_init_func(void* const p) // NO_LINT_DEBUG sql_print_information("RocksDB: global statistics using %s indexer", - STRINGIFY_ARG(RDB_INDEXER)); + STRINGIFY_ARG(RDB_INDEXER)); #if defined(HAVE_SCHED_GETCPU) - if (sched_getcpu() == -1) - { + if (sched_getcpu() == -1) { // NO_LINT_DEBUG - sql_print_information("RocksDB: sched_getcpu() failed - " + sql_print_information( + "RocksDB: sched_getcpu() failed - " "global statistics will use thread_id_indexer_t instead"); } #endif sql_print_information("RocksDB instance opened"); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } /* Storage Engine deinitialization function, invoked when plugin is unloaded. */ -static int rocksdb_done_func(void* const p) -{ - int error= 0; - DBUG_ENTER("rocksdb_done_func"); +static int rocksdb_done_func(void *const p) { + DBUG_ENTER_FUNC(); + + int error = 0; // signal the drop index thread to stop rdb_drop_idx_thread.signal(true); @@ -3901,7 +3595,7 @@ static int rocksdb_done_func(void* const p) rdb_bg_thread.signal(true); // Wait for the background thread to finish. - auto err= rdb_bg_thread.join(); + auto err = rdb_bg_thread.join(); if (err != 0) { // We'll log the message and continue because we're shutting down and // continuation is the optimal strategy. @@ -3911,18 +3605,16 @@ static int rocksdb_done_func(void* const p) } // Wait for the drop index thread to finish. - err= rdb_drop_idx_thread.join(); + err = rdb_drop_idx_thread.join(); if (err != 0) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Couldn't stop the index thread: (errno=%d)", - err); + sql_print_error("RocksDB: Couldn't stop the index thread: (errno=%d)", err); } - if (rdb_open_tables.m_hash.records) - { + if (rdb_open_tables.m_hash.records) { // Looks like we are getting unloaded and yet we have some open tables // left behind. - error= 1; + error = 1; } rdb_open_tables.free_hash(); @@ -3935,10 +3627,9 @@ static int rocksdb_done_func(void* const p) Rdb_transaction::term_mutex(); - for (auto& it : rdb_collation_data) - { + for (auto &it : rdb_collation_data) { delete it; - it= nullptr; + it = nullptr; } ddl_manager.cleanup(); @@ -3947,14 +3638,13 @@ static int rocksdb_done_func(void* const p) cf_manager.cleanup(); delete rdb; - rdb= nullptr; + rdb = nullptr; - // Disown the cache data since we're shutting down. - // This results in memory leaks but it improved the shutdown time. - // Don't disown when running under valgrind +// Disown the cache data since we're shutting down. +// This results in memory leaks but it improved the shutdown time. +// Don't disown when running under valgrind #ifndef HAVE_purify - if (rocksdb_tbl_options.block_cache) - { + if (rocksdb_tbl_options.block_cache) { rocksdb_tbl_options.block_cache->DisownData(); } #endif /* HAVE_purify */ @@ -3962,7 +3652,6 @@ static int rocksdb_done_func(void* const p) DBUG_RETURN(error); } - /** @brief Example of simple lock controls. The "table_handler" it creates is a @@ -3971,42 +3660,35 @@ static int rocksdb_done_func(void* const p) they are needed to function. */ -Rdb_table_handler* Rdb_open_tables_map::get_table_handler( - const char* const table_name) -{ +Rdb_table_handler * +Rdb_open_tables_map::get_table_handler(const char *const table_name) { Rdb_table_handler *table_handler; uint length; char *tmp_name; DBUG_ASSERT(table_name != nullptr); - length=(uint) strlen(table_name); + length = (uint)strlen(table_name); // First, look up the table in the hash map. mysql_mutex_lock(&m_mutex); - if (!(table_handler= reinterpret_cast<Rdb_table_handler*>( - my_hash_search(&m_hash, reinterpret_cast<const uchar*>(table_name), - length)))) - { + if (!(table_handler = reinterpret_cast<Rdb_table_handler *>(my_hash_search( + &m_hash, reinterpret_cast<const uchar *>(table_name), length)))) { // Since we did not find it in the hash map, attempt to create and add it // to the hash map. - if (!(table_handler= reinterpret_cast<Rdb_table_handler*>( - my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), - &table_handler, sizeof(*table_handler), - &tmp_name, length+1, - NullS)))) - { + if (!(table_handler = reinterpret_cast<Rdb_table_handler *>(my_multi_malloc( + MYF(MY_WME | MY_ZEROFILL), &table_handler, sizeof(*table_handler), + &tmp_name, length + 1, NullS)))) { // Allocating a new Rdb_table_handler and a new table name failed. mysql_mutex_unlock(&m_mutex); return nullptr; } - table_handler->m_ref_count= 0; - table_handler->m_table_name_length= length; - table_handler->m_table_name= tmp_name; + table_handler->m_ref_count = 0; + table_handler->m_table_name_length = length; + table_handler->m_table_name = tmp_name; strmov(table_handler->m_table_name, table_name); - if (my_hash_insert(&m_hash, reinterpret_cast<uchar*>(table_handler))) - { + if (my_hash_insert(&m_hash, reinterpret_cast<uchar *>(table_handler))) { // Inserting into the hash map failed. mysql_mutex_unlock(&m_mutex); my_free(table_handler); @@ -4024,25 +3706,19 @@ Rdb_table_handler* Rdb_open_tables_map::get_table_handler( return table_handler; } - -std::vector<std::string> rdb_get_open_table_names(void) -{ +std::vector<std::string> rdb_get_open_table_names(void) { return rdb_open_tables.get_table_names(); } - -std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const -{ +std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const { ulong i; const Rdb_table_handler *table_handler; std::vector<std::string> names; mysql_mutex_lock(&m_mutex); - for (i = 0; - (table_handler= reinterpret_cast<const Rdb_table_handler*>( - my_hash_const_element(&m_hash, i))); - i++) - { + for (i = 0; (table_handler = reinterpret_cast<const Rdb_table_handler *>( + my_hash_const_element(&m_hash, i))); + i++) { DBUG_ASSERT(table_handler != nullptr); names.push_back(table_handler->m_table_name); } @@ -4052,12 +3728,10 @@ std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const return names; } - -void ha_rocksdb::load_auto_incr_value() -{ - const int save_active_index= active_index; - active_index= table->s->next_number_index; - const uint8 save_table_status= table->status; +void ha_rocksdb::load_auto_incr_value() { + const int save_active_index = active_index; + active_index = table->s->next_number_index; + const uint8 save_table_status = table->status; /* load_auto_incr_value() may be called by statements that @@ -4065,26 +3739,25 @@ void ha_rocksdb::load_auto_incr_value() index_last() creates a snapshot. When a snapshot is created here, it has to be released as well. (GitHub issue#189) */ - Rdb_transaction* const tx= get_or_create_tx(table->in_use); - const bool is_new_snapshot= !tx->has_snapshot(); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + const bool is_new_snapshot = !tx->has_snapshot(); // Do a lookup. We only need index column, so it should be index-only. // (another reason to make it index-only is that table->read_set is // not set appropriately and non-index-only lookup will not read the value) - const bool save_keyread_only= m_keyread_only; - m_keyread_only= true; + const bool save_keyread_only = m_keyread_only; + m_keyread_only = true; if (!index_last(table->record[0])) update_auto_incr_val(); - m_keyread_only= save_keyread_only; - if (is_new_snapshot) - { + m_keyread_only = save_keyread_only; + if (is_new_snapshot) { tx->release_snapshot(); } - table->status= save_table_status; - active_index= save_active_index; + table->status = save_table_status; + active_index = save_active_index; /* Do what ha_rocksdb::index_end() does. @@ -4100,47 +3773,41 @@ void ha_rocksdb::load_auto_incr_value() end of ha_rocksdb::create. Also see opened issue here: https://github.com/facebook/mysql-5.6/issues/153 */ -void ha_rocksdb::update_auto_incr_val() -{ +void ha_rocksdb::update_auto_incr_val() { Field *field; longlong new_val; - field= table->key_info[table->s->next_number_index].key_part[0].field; + field = table->key_info[table->s->next_number_index].key_part[0].field; - my_bitmap_map* const old_map= - dbug_tmp_use_all_columns(table, table->read_set); - new_val= field->val_int() + 1; + my_bitmap_map *const old_map = + dbug_tmp_use_all_columns(table, table->read_set); + new_val = field->val_int() + 1; dbug_tmp_restore_column_map(table->read_set, old_map); - longlong auto_incr_val= m_tbl_def->m_auto_incr_val; + longlong auto_incr_val = m_tbl_def->m_auto_incr_val; while (auto_incr_val < new_val && - !m_tbl_def->m_auto_incr_val.compare_exchange_weak( - auto_incr_val, new_val)) - { + !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, + new_val)) { // Do nothing - just loop until auto_incr_val is >= new_val or // we successfully set it } } -int ha_rocksdb::load_hidden_pk_value() -{ - const int save_active_index= active_index; - active_index= m_tbl_def->m_key_count - 1; - const uint8 save_table_status= table->status; +int ha_rocksdb::load_hidden_pk_value() { + const int save_active_index = active_index; + active_index = m_tbl_def->m_key_count - 1; + const uint8 save_table_status = table->status; - Rdb_transaction* const tx= get_or_create_tx(table->in_use); - const bool is_new_snapshot= !tx->has_snapshot(); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + const bool is_new_snapshot = !tx->has_snapshot(); // Do a lookup. - if (!index_last(table->record[0])) - { + if (!index_last(table->record[0])) { /* Decode PK field from the key */ - longlong hidden_pk_id= 0; - if (read_hidden_pk_id_from_rowkey(&hidden_pk_id)) - { - if (is_new_snapshot) - { + longlong hidden_pk_id = 0; + if (read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { + if (is_new_snapshot) { tx->release_snapshot(); } return HA_ERR_INTERNAL_ERROR; @@ -4148,35 +3815,33 @@ int ha_rocksdb::load_hidden_pk_value() hidden_pk_id++; longlong old = m_tbl_def->m_hidden_pk_val; - while (old < hidden_pk_id && - !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) - {} + while ( + old < hidden_pk_id && + !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) { + } } - if (is_new_snapshot) - { + if (is_new_snapshot) { tx->release_snapshot(); } - table->status= save_table_status; - active_index= save_active_index; + table->status = save_table_status; + active_index = save_active_index; release_scan_iterator(); - return 0; + return HA_EXIT_SUCCESS; } /* Get PK value from m_tbl_def->m_hidden_pk_info. */ -longlong ha_rocksdb::update_hidden_pk_val() -{ +longlong ha_rocksdb::update_hidden_pk_val() { DBUG_ASSERT(has_hidden_pk(table)); - const longlong new_val= m_tbl_def->m_hidden_pk_val++; + const longlong new_val = m_tbl_def->m_hidden_pk_val++; return new_val; } /* Get the id of the hidden pk id from m_last_rowkey */ -int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong* const hidden_pk_id) -{ +int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) { DBUG_ASSERT(hidden_pk_id != nullptr); DBUG_ASSERT(table != nullptr); DBUG_ASSERT(has_hidden_pk(table)); @@ -4186,20 +3851,18 @@ int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong* const hidden_pk_id) // Get hidden primary key from old key slice Rdb_string_reader reader(&rowkey_slice); if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) - return 1; + return HA_EXIT_FAILURE; - const int length= Field_longlong::PACK_LENGTH; - const uchar* from= reinterpret_cast<const uchar*>(reader.read(length)); - if (from == nullptr) - { - return 1; /* Mem-comparable image doesn't have enough bytes */ + const int length = Field_longlong::PACK_LENGTH; + const uchar *from = reinterpret_cast<const uchar *>(reader.read(length)); + if (from == nullptr) { + return HA_EXIT_FAILURE; /* Mem-comparable image doesn't have enough bytes */ } - *hidden_pk_id= rdb_netbuf_read_uint64(&from); - return 0; + *hidden_pk_id = rdb_netbuf_read_uint64(&from); + return HA_EXIT_SUCCESS; } - /** @brief Free lock controls. We call this whenever we close a table. If the table had @@ -4208,18 +3871,16 @@ int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong* const hidden_pk_id) */ void Rdb_open_tables_map::release_table_handler( - Rdb_table_handler* const table_handler) -{ + Rdb_table_handler *const table_handler) { mysql_mutex_lock(&m_mutex); DBUG_ASSERT(table_handler != nullptr); DBUG_ASSERT(table_handler->m_ref_count > 0); - if (!--table_handler->m_ref_count) - { + if (!--table_handler->m_ref_count) { // Last rereference was released. Tear down the hash entry. - const auto ret __attribute__((__unused__)) = - my_hash_delete(&m_hash, reinterpret_cast<uchar*>(table_handler)); - DBUG_ASSERT(!ret); // the hash entry must actually be found and deleted + const auto ret MY_ATTRIBUTE((__unused__)) = + my_hash_delete(&m_hash, reinterpret_cast<uchar *>(table_handler)); + DBUG_ASSERT(!ret); // the hash entry must actually be found and deleted my_core::thr_lock_delete(&table_handler->m_thr_lock); my_free(table_handler); } @@ -4227,67 +3888,57 @@ void Rdb_open_tables_map::release_table_handler( mysql_mutex_unlock(&m_mutex); } - -static handler* rocksdb_create_handler(my_core::handlerton* const hton, - my_core::TABLE_SHARE* const table_arg, - my_core::MEM_ROOT* const mem_root) -{ +static handler *rocksdb_create_handler(my_core::handlerton *const hton, + my_core::TABLE_SHARE *const table_arg, + my_core::MEM_ROOT *const mem_root) { return new (mem_root) ha_rocksdb(hton, table_arg); } - -ha_rocksdb::ha_rocksdb(my_core::handlerton* const hton, - my_core::TABLE_SHARE* const table_arg) - : handler(hton, table_arg), m_table_handler(nullptr), m_scan_it(nullptr), - m_scan_it_skips_bloom(false), m_scan_it_snapshot(nullptr), - m_tbl_def(nullptr), m_pk_descr(nullptr), m_key_descr_arr(nullptr), - m_pk_can_be_decoded(false), - m_maybe_unpack_info(false), - m_pk_tuple(nullptr), m_pk_packed_tuple(nullptr), - m_sk_packed_tuple(nullptr), - m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr), - m_sk_packed_tuple_old(nullptr), - m_pack_buffer(nullptr), m_lock_rows(RDB_LOCK_NONE), - m_keyread_only(FALSE), m_bulk_load_tx(nullptr), m_encoder_arr(nullptr), - m_row_checksums_checked(0), - m_in_rpl_delete_rows(false), m_in_rpl_update_rows(false), - m_force_skip_unique_check(false) -{ +ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton, + my_core::TABLE_SHARE *const table_arg) + : handler(hton, table_arg), m_table_handler(nullptr), m_scan_it(nullptr), + m_scan_it_skips_bloom(false), m_scan_it_snapshot(nullptr), + m_tbl_def(nullptr), m_pk_descr(nullptr), m_key_descr_arr(nullptr), + m_pk_can_be_decoded(false), m_maybe_unpack_info(false), + m_pk_tuple(nullptr), m_pk_packed_tuple(nullptr), + m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr), + m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr), + m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr), + m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr), + m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), + m_bulk_load_tx(nullptr), m_encoder_arr(nullptr), + m_row_checksums_checked(0), m_in_rpl_delete_rows(false), + m_in_rpl_update_rows(false), m_force_skip_unique_check(false) { // TODO(alexyang): create a valid PSI_mutex_key for this mutex mysql_mutex_init(0, &m_bulk_load_mutex, MY_MUTEX_INIT_FAST); } +static const char *ha_rocksdb_exts[] = {NullS}; -static const char *ha_rocksdb_exts[] = { - NullS -}; - +const char **ha_rocksdb::bas_ext() const { + DBUG_ENTER_FUNC(); -const char **ha_rocksdb::bas_ext() const -{ - return ha_rocksdb_exts; + DBUG_RETURN(ha_rocksdb_exts); } -bool ha_rocksdb::same_table(const ha_rocksdb& other) const -{ +bool ha_rocksdb::same_table(const ha_rocksdb &other) const { return m_tbl_def->base_tablename() == other.m_tbl_def->base_tablename(); } -bool ha_rocksdb::init_with_fields() -{ - const uint pk= table_share->primary_key; - if (pk != MAX_KEY) - { - const uint key_parts= table_share->key_info[pk].user_defined_key_parts; - check_keyread_allowed(pk /*PK*/, key_parts-1, true); - } - else - m_pk_can_be_decoded= false; +bool ha_rocksdb::init_with_fields() { + DBUG_ENTER_FUNC(); - cached_table_flags= table_flags(); - return false; /* Ok */ -} + const uint pk = table_share->primary_key; + if (pk != MAX_KEY) { + const uint key_parts = table_share->key_info[pk].user_defined_key_parts; + check_keyread_allowed(pk /*PK*/, key_parts - 1, true); + } else + m_pk_can_be_decoded = false; + + cached_table_flags = table_flags(); + DBUG_RETURN(false); /* Ok */ +} /** Convert record from table->record[0] form into a form that can be written @@ -4299,10 +3950,8 @@ bool ha_rocksdb::init_with_fields() */ void ha_rocksdb::convert_record_to_storage_format( - const rocksdb::Slice& pk_packed_slice, - Rdb_string_writer* const pk_unpack_info, - rocksdb::Slice* const packed_rec) -{ + const rocksdb::Slice &pk_packed_slice, + Rdb_string_writer *const pk_unpack_info, rocksdb::Slice *const packed_rec) { DBUG_ASSERT_IMP(m_maybe_unpack_info, pk_unpack_info); m_storage_record.length(0); @@ -4312,93 +3961,78 @@ void ha_rocksdb::convert_record_to_storage_format( // If a primary key may have non-empty unpack_info for certain values, // (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block // itself was prepared in Rdb_key_def::pack_record. - if (m_maybe_unpack_info) - { - m_storage_record.append(reinterpret_cast<char*>(pk_unpack_info->ptr()), + if (m_maybe_unpack_info) { + m_storage_record.append(reinterpret_cast<char *>(pk_unpack_info->ptr()), pk_unpack_info->get_current_pos()); } - for (uint i=0; i < table->s->fields; i++) - { + for (uint i = 0; i < table->s->fields; i++) { /* Don't pack decodable PK key parts */ - if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) - { + if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { continue; } - Field* const field= table->field[i]; - if (m_encoder_arr[i].maybe_null()) - { - char* const data= (char*)m_storage_record.ptr(); - if (field->is_null()) - { - data[m_encoder_arr[i].m_null_offset]|= m_encoder_arr[i].m_null_mask; + Field *const field = table->field[i]; + if (m_encoder_arr[i].maybe_null()) { + char *const data = (char *)m_storage_record.ptr(); + if (field->is_null()) { + data[m_encoder_arr[i].m_null_offset] |= m_encoder_arr[i].m_null_mask; /* Don't write anything for NULL values */ continue; } } - if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_BLOB) - { - my_core::Field_blob *blob= (my_core::Field_blob*)field; + if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_BLOB) { + my_core::Field_blob *blob = (my_core::Field_blob *)field; /* Get the number of bytes needed to store length*/ - const uint length_bytes= blob->pack_length() - portable_sizeof_char_ptr; + const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; /* Store the length of the value */ - m_storage_record.append(reinterpret_cast<char*>(blob->ptr), length_bytes); + m_storage_record.append(reinterpret_cast<char *>(blob->ptr), + length_bytes); /* Store the blob value itself */ char *data_ptr; - memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar**)); + memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar **)); m_storage_record.append(data_ptr, blob->get_length()); - } - else if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_VARCHAR) - { - Field_varstring* const field_var= (Field_varstring*)field; + } else if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_VARCHAR) { + Field_varstring *const field_var = (Field_varstring *)field; uint data_len; /* field_var->length_bytes is 1 or 2 */ - if (field_var->length_bytes == 1) - { - data_len= field_var->ptr[0]; - } - else - { - DBUG_ASSERT(field_var->length_bytes==2); - data_len= uint2korr(field_var->ptr); + if (field_var->length_bytes == 1) { + data_len = field_var->ptr[0]; + } else { + DBUG_ASSERT(field_var->length_bytes == 2); + data_len = uint2korr(field_var->ptr); } - m_storage_record.append(reinterpret_cast<char*>(field_var->ptr), + m_storage_record.append(reinterpret_cast<char *>(field_var->ptr), field_var->length_bytes + data_len); - } - else - { + } else { /* Copy the field data */ - const uint len= field->pack_length_in_rec(); - m_storage_record.append(reinterpret_cast<char*>(field->ptr), len); + const uint len = field->pack_length_in_rec(); + m_storage_record.append(reinterpret_cast<char *>(field->ptr), len); } } - if (should_store_row_debug_checksums()) - { - const uint32_t key_crc32= my_core::crc32(0, - rdb_slice_to_uchar_ptr(&pk_packed_slice), - pk_packed_slice.size()); - const uint32_t val_crc32= my_core::crc32( - 0, rdb_mysql_str_to_uchar_str(&m_storage_record), - m_storage_record.length()); + if (should_store_row_debug_checksums()) { + const uint32_t key_crc32 = my_core::crc32( + 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size()); + const uint32_t val_crc32 = + my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record), + m_storage_record.length()); uchar key_crc_buf[RDB_CHECKSUM_SIZE]; uchar val_crc_buf[RDB_CHECKSUM_SIZE]; rdb_netbuf_store_uint32(key_crc_buf, key_crc32); rdb_netbuf_store_uint32(val_crc_buf, val_crc32); - m_storage_record.append((const char*)&RDB_CHECKSUM_DATA_TAG, 1); - m_storage_record.append((const char*)key_crc_buf, RDB_CHECKSUM_SIZE); - m_storage_record.append((const char*)val_crc_buf, RDB_CHECKSUM_SIZE); + m_storage_record.append((const char *)&RDB_CHECKSUM_DATA_TAG, 1); + m_storage_record.append((const char *)key_crc_buf, RDB_CHECKSUM_SIZE); + m_storage_record.append((const char *)val_crc_buf, RDB_CHECKSUM_SIZE); } - *packed_rec= rocksdb::Slice(m_storage_record.ptr(), - m_storage_record.length()); + *packed_rec = + rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length()); } - /* @brief Setup which fields will be unpacked when reading rows @@ -4406,7 +4040,8 @@ void ha_rocksdb::convert_record_to_storage_format( @detail Two special cases when we still unpack all fields: - When this table is being updated (m_lock_rows==RDB_LOCK_WRITE). - - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to read all + - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to + read all fields to find whether there is a row checksum at the end. We could skip the fields instead of decoding them, but currently we do decoding.) @@ -4414,40 +4049,31 @@ void ha_rocksdb::convert_record_to_storage_format( ha_rocksdb::setup_field_converters() ha_rocksdb::convert_record_from_storage_format() */ -void ha_rocksdb::setup_read_decoders() -{ +void ha_rocksdb::setup_read_decoders() { m_decoders_vect.clear(); - int last_useful= 0; - int skip_size= 0; + int last_useful = 0; + int skip_size = 0; - for (uint i= 0; i < table->s->fields; i++) - { + for (uint i = 0; i < table->s->fields; i++) { // We only need the decoder if the whole record is stored. - if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) - { + if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { continue; } if (m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || - bitmap_is_set(table->read_set, table->field[i]->field_index)) - { + bitmap_is_set(table->read_set, table->field[i]->field_index)) { // We will need to decode this field m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size}); - last_useful= m_decoders_vect.size(); - skip_size= 0; - } - else - { + last_useful = m_decoders_vect.size(); + skip_size = 0; + } else { if (m_encoder_arr[i].uses_variable_len_encoding() || - m_encoder_arr[i].maybe_null()) - { + m_encoder_arr[i].maybe_null()) { // For variable-length field, we need to read the data and skip it m_decoders_vect.push_back({&m_encoder_arr[i], false, skip_size}); - skip_size= 0; - } - else - { + skip_size = 0; + } else { // Fixed-width field can be skipped without looking at it. // Add appropriate skip_size to the next field. skip_size += m_encoder_arr[i].m_pack_length_in_rec; @@ -4461,20 +4087,14 @@ void ha_rocksdb::setup_read_decoders() m_decoders_vect.end()); } - #ifndef NDEBUG -void dbug_append_garbage_at_end(std::string &on_disk_rec) -{ +void dbug_append_garbage_at_end(std::string &on_disk_rec) { on_disk_rec.append("abc"); } -void dbug_truncate_record(std::string &on_disk_rec) -{ - on_disk_rec.resize(0); -} +void dbug_truncate_record(std::string &on_disk_rec) { on_disk_rec.resize(0); } -void dbug_modify_rec_varchar12(std::string &on_disk_rec) -{ +void dbug_modify_rec_varchar12(std::string &on_disk_rec) { std::string res; // The record is NULL-byte followed by VARCHAR(10). // Put the NULL-byte @@ -4486,9 +4106,7 @@ void dbug_modify_rec_varchar12(std::string &on_disk_rec) on_disk_rec.assign(res); } - -void dbug_modify_key_varchar8(String &on_disk_rec) -{ +void dbug_modify_key_varchar8(String &on_disk_rec) { std::string res; // The key starts with index number res.append(on_disk_rec.ptr(), Rdb_key_def::INDEX_NUMBER_SIZE); @@ -4499,13 +4117,14 @@ void dbug_modify_key_varchar8(String &on_disk_rec) on_disk_rec.append(res.data(), res.size()); } +void dbug_create_err_inplace_alter() { + my_printf_error(ER_UNKNOWN_ERROR, + "Intentional failure in inplace alter occurred.", MYF(0)); +} #endif - int ha_rocksdb::convert_record_from_storage_format( - const rocksdb::Slice* const key, - uchar* const buf) -{ + const rocksdb::Slice *const key, uchar *const buf) { DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1", dbug_append_garbage_at_end(m_retrieved_record);); DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2", @@ -4518,7 +4137,6 @@ int ha_rocksdb::convert_record_from_storage_format( return convert_record_from_storage_format(key, &retrieved_rec_slice, buf); } - /* @brief Unpack the record in this->m_retrieved_record and this->m_last_rowkey from @@ -4544,15 +4162,13 @@ int ha_rocksdb::convert_record_from_storage_format( */ int ha_rocksdb::convert_record_from_storage_format( - const rocksdb::Slice* const key, - const rocksdb::Slice* const value, - uchar* const buf) -{ + const rocksdb::Slice *const key, const rocksdb::Slice *const value, + uchar *const buf) { DBUG_ASSERT(key != nullptr); DBUG_ASSERT(buf != nullptr); Rdb_string_reader reader(value); - const my_ptrdiff_t ptr_diff= buf - table->record[0]; + const my_ptrdiff_t ptr_diff = buf - table->record[0]; /* Decode PK fields from the key @@ -4562,152 +4178,127 @@ int ha_rocksdb::convert_record_from_storage_format( const rocksdb::Slice rowkey_slice(m_last_rowkey.ptr(), m_last_rowkey.length()); - const char *unpack_info= nullptr; - uint16 unpack_info_len= 0; + const char *unpack_info = nullptr; + uint16 unpack_info_len = 0; rocksdb::Slice unpack_slice; /* Other fields are decoded from the value */ - const char *null_bytes= nullptr; - if (m_null_bytes_in_rec && !(null_bytes= reader.read(m_null_bytes_in_rec))) - { + const char *null_bytes = nullptr; + if (m_null_bytes_in_rec && !(null_bytes = reader.read(m_null_bytes_in_rec))) { return HA_ERR_INTERNAL_ERROR; } - if (m_maybe_unpack_info) - { - unpack_info= reader.read(RDB_UNPACK_HEADER_SIZE); + if (m_maybe_unpack_info) { + unpack_info = reader.read(RDB_UNPACK_HEADER_SIZE); - if (!unpack_info || unpack_info[0] != RDB_UNPACK_DATA_TAG) - { + if (!unpack_info || unpack_info[0] != RDB_UNPACK_DATA_TAG) { return HA_ERR_INTERNAL_ERROR; } - unpack_info_len= rdb_netbuf_to_uint16( - reinterpret_cast<const uchar *>(unpack_info + 1)); - unpack_slice= rocksdb::Slice(unpack_info, unpack_info_len); + unpack_info_len = + rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1)); + unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len); reader.read(unpack_info_len - RDB_UNPACK_HEADER_SIZE); } if (m_pk_descr->unpack_record(table, buf, &rowkey_slice, unpack_info ? &unpack_slice : nullptr, - false /* verify_checksum */)) - { + false /* verify_checksum */)) { return HA_ERR_INTERNAL_ERROR; } - for (auto it= m_decoders_vect.begin(); it != m_decoders_vect.end(); it++) - { - const Rdb_field_encoder* const field_dec= it->m_field_enc; - const bool decode= it->m_decode; - const bool isNull = field_dec->maybe_null() && - ((null_bytes[field_dec->m_null_offset] & field_dec->m_null_mask) != 0); + for (auto it = m_decoders_vect.begin(); it != m_decoders_vect.end(); it++) { + const Rdb_field_encoder *const field_dec = it->m_field_enc; + const bool decode = it->m_decode; + const bool isNull = + field_dec->maybe_null() && + ((null_bytes[field_dec->m_null_offset] & field_dec->m_null_mask) != 0); - Field* const field= table->field[field_dec->m_field_index]; + Field *const field = table->field[field_dec->m_field_index]; /* Skip the bytes we need to skip */ if (it->m_skip && !reader.read(it->m_skip)) return HA_ERR_INTERNAL_ERROR; - if (isNull) - { - if (decode) - { + if (isNull) { + if (decode) { /* This sets the NULL-bit of this record */ field->set_null(ptr_diff); /* Besides that, set the field value to default value. CHECKSUM TABLE depends on this. */ - uint field_offset= field->ptr - table->record[0]; - memcpy(buf + field_offset, - table->s->default_values + field_offset, + uint field_offset = field->ptr - table->record[0]; + memcpy(buf + field_offset, table->s->default_values + field_offset, field->pack_length()); } continue; - } - else - { + } else { if (decode) field->set_notnull(ptr_diff); } - if (field_dec->m_field_type == MYSQL_TYPE_BLOB) - { - my_core::Field_blob* const blob= (my_core::Field_blob*)field; + if (field_dec->m_field_type == MYSQL_TYPE_BLOB) { + my_core::Field_blob *const blob = (my_core::Field_blob *)field; /* Get the number of bytes needed to store length*/ - const uint length_bytes= blob->pack_length() - portable_sizeof_char_ptr; + const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; blob->move_field_offset(ptr_diff); const char *data_len_str; - if (!(data_len_str= reader.read(length_bytes))) - { + if (!(data_len_str = reader.read(length_bytes))) { blob->move_field_offset(-ptr_diff); return HA_ERR_INTERNAL_ERROR; } memcpy(blob->ptr, data_len_str, length_bytes); - const uint32 data_len= blob->get_length((uchar*)data_len_str, - length_bytes, - table->s->db_low_byte_first); + const uint32 data_len = blob->get_length( + (uchar *)data_len_str, length_bytes, table->s->db_low_byte_first); const char *blob_ptr; - if (!(blob_ptr= reader.read(data_len))) - { + if (!(blob_ptr = reader.read(data_len))) { blob->move_field_offset(-ptr_diff); return HA_ERR_INTERNAL_ERROR; } - if (decode) - { + if (decode) { // set 8-byte pointer to 0, like innodb does (relevant for 32-bit // platforms) memset(blob->ptr + length_bytes, 0, 8); - memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar**)); + memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar **)); blob->move_field_offset(-ptr_diff); } - } - else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) - { - Field_varstring* const field_var= (Field_varstring*)field; + } else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) { + Field_varstring *const field_var = (Field_varstring *)field; const char *data_len_str; - if (!(data_len_str= reader.read(field_var->length_bytes))) + if (!(data_len_str = reader.read(field_var->length_bytes))) return HA_ERR_INTERNAL_ERROR; uint data_len; /* field_var->length_bytes is 1 or 2 */ - if (field_var->length_bytes == 1) - { - data_len= (uchar)data_len_str[0]; - } - else - { + if (field_var->length_bytes == 1) { + data_len = (uchar)data_len_str[0]; + } else { DBUG_ASSERT(field_var->length_bytes == 2); - data_len= uint2korr(data_len_str); + data_len = uint2korr(data_len_str); } - if (data_len > field->field_length) - { + if (data_len > field->field_length) { /* The data on disk is longer than table DDL allows? */ return HA_ERR_INTERNAL_ERROR; } if (!reader.read(data_len)) return HA_ERR_INTERNAL_ERROR; - if (decode) - { + if (decode) { memcpy(field_var->ptr + ptr_diff, data_len_str, field_var->length_bytes + data_len); } - } - else - { + } else { const char *data_bytes; - const uint len= field_dec->m_pack_length_in_rec; - if (len > 0) - { - if ((data_bytes= reader.read(len)) == nullptr) - { + const uint len = field_dec->m_pack_length_in_rec; + if (len > 0) { + if ((data_bytes = reader.read(len)) == nullptr) { return HA_ERR_INTERNAL_ERROR; } if (decode) @@ -4716,35 +4307,31 @@ int ha_rocksdb::convert_record_from_storage_format( } } - if (m_verify_row_debug_checksums) - { + if (m_verify_row_debug_checksums) { if (reader.remaining_bytes() == RDB_CHECKSUM_CHUNK_SIZE && - reader.read(1)[0] == RDB_CHECKSUM_DATA_TAG) - { - uint32_t stored_key_chksum= - rdb_netbuf_to_uint32((const uchar*)reader.read(RDB_CHECKSUM_SIZE)); - uint32_t stored_val_chksum= - rdb_netbuf_to_uint32((const uchar*)reader.read(RDB_CHECKSUM_SIZE)); - - const uint32_t computed_key_chksum= + reader.read(1)[0] == RDB_CHECKSUM_DATA_TAG) { + uint32_t stored_key_chksum = + rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE)); + uint32_t stored_val_chksum = + rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE)); + + const uint32_t computed_key_chksum = my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size()); - const uint32_t computed_val_chksum= + const uint32_t computed_val_chksum = my_core::crc32(0, rdb_slice_to_uchar_ptr(value), value->size() - RDB_CHECKSUM_CHUNK_SIZE); DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", stored_key_chksum++;); - if (stored_key_chksum != computed_key_chksum) - { + if (stored_key_chksum != computed_key_chksum) { m_pk_descr->report_checksum_mismatch(true, key->data(), key->size()); return HA_ERR_INTERNAL_ERROR; } DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2", stored_val_chksum++;); - if (stored_val_chksum != computed_val_chksum) - { + if (stored_val_chksum != computed_val_chksum) { m_pk_descr->report_checksum_mismatch(false, value->data(), value->size()); return HA_ERR_INTERNAL_ERROR; @@ -4756,52 +4343,45 @@ int ha_rocksdb::convert_record_from_storage_format( return HA_ERR_INTERNAL_ERROR; } - return 0; + return HA_EXIT_SUCCESS; } - -void ha_rocksdb::get_storage_type(Rdb_field_encoder* const encoder, - const uint &kp) -{ +void ha_rocksdb::get_storage_type(Rdb_field_encoder *const encoder, + const uint &kp) { // STORE_SOME uses unpack_info. - if (m_pk_descr->has_unpack_info(kp)) - { + if (m_pk_descr->has_unpack_info(kp)) { DBUG_ASSERT(m_pk_descr->can_unpack(kp)); - encoder->m_storage_type= Rdb_field_encoder::STORE_SOME; - m_maybe_unpack_info= true; - } - else if (m_pk_descr->can_unpack(kp)) - { - encoder->m_storage_type= Rdb_field_encoder::STORE_NONE; + encoder->m_storage_type = Rdb_field_encoder::STORE_SOME; + m_maybe_unpack_info = true; + } else if (m_pk_descr->can_unpack(kp)) { + encoder->m_storage_type = Rdb_field_encoder::STORE_NONE; } } /* - Setup data needed to convert table->record[] to and from record storage format. + Setup data needed to convert table->record[] to and from record storage + format. @seealso ha_rocksdb::convert_record_to_storage_format, ha_rocksdb::convert_record_from_storage_format */ -void ha_rocksdb::setup_field_converters() -{ +void ha_rocksdb::setup_field_converters() { uint i; - uint null_bytes= 0; - uchar cur_null_mask= 0x1; + uint null_bytes = 0; + uchar cur_null_mask = 0x1; DBUG_ASSERT(m_encoder_arr == nullptr); - m_encoder_arr= static_cast<Rdb_field_encoder*>( + m_encoder_arr = static_cast<Rdb_field_encoder *>( my_malloc(table->s->fields * sizeof(Rdb_field_encoder), MYF(0))); - if (m_encoder_arr == nullptr) - { + if (m_encoder_arr == nullptr) { return; } - for (i= 0; i < table->s->fields; i++) - { - Field* const field= table->field[i]; - m_encoder_arr[i].m_storage_type= Rdb_field_encoder::STORE_ALL; + for (i = 0; i < table->s->fields; i++) { + Field *const field = table->field[i]; + m_encoder_arr[i].m_storage_type = Rdb_field_encoder::STORE_ALL; /* Check if this field is @@ -4814,39 +4394,31 @@ void ha_rocksdb::setup_field_converters() part of the hidden pk. */ if (!has_hidden_pk(table) && - field->part_of_key.is_set(table->s->primary_key)) - { - KEY* const pk_info= &table->key_info[table->s->primary_key]; - for (uint kp= 0; kp < pk_info->user_defined_key_parts; kp++) - { + field->part_of_key.is_set(table->s->primary_key)) { + KEY *const pk_info = &table->key_info[table->s->primary_key]; + for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) { /* key_part->fieldnr is counted from 1 */ - if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) - { + if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) { get_storage_type(&m_encoder_arr[i], kp); break; } } } - m_encoder_arr[i].m_field_type= field->real_type(); - m_encoder_arr[i].m_field_index= i; - m_encoder_arr[i].m_pack_length_in_rec= field->pack_length_in_rec(); + m_encoder_arr[i].m_field_type = field->real_type(); + m_encoder_arr[i].m_field_index = i; + m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec(); - if (field->real_maybe_null()) - { - m_encoder_arr[i].m_null_mask= cur_null_mask; - m_encoder_arr[i].m_null_offset= null_bytes; - if (cur_null_mask == 0x80) - { - cur_null_mask= 0x1; + if (field->real_maybe_null()) { + m_encoder_arr[i].m_null_mask = cur_null_mask; + m_encoder_arr[i].m_null_offset = null_bytes; + if (cur_null_mask == 0x80) { + cur_null_mask = 0x1; null_bytes++; - } - else - cur_null_mask= cur_null_mask << 1; - } - else - { - m_encoder_arr[i].m_null_mask= 0; + } else + cur_null_mask = cur_null_mask << 1; + } else { + m_encoder_arr[i].m_null_mask = 0; } } @@ -4854,107 +4426,128 @@ void ha_rocksdb::setup_field_converters() if (cur_null_mask != 0x1) null_bytes++; - m_null_bytes_in_rec= null_bytes; + m_null_bytes_in_rec = null_bytes; } -int ha_rocksdb::alloc_key_buffers(const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) -{ - DBUG_ENTER("ha_rocksdb::alloc_key_buffers"); +int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, + bool alloc_alter_buffers) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(m_pk_tuple == nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); - std::shared_ptr<Rdb_key_def>* const kd_arr= tbl_def_arg->m_key_descr_arr; + std::shared_ptr<Rdb_key_def> *const kd_arr = tbl_def_arg->m_key_descr_arr; - uint key_len= 0; - m_pk_descr= kd_arr[pk_index(table_arg, tbl_def_arg)]; - if (has_hidden_pk(table_arg)) - { - m_pk_key_parts= 1; - } - else - { - m_pk_key_parts= + uint key_len = 0; + uint max_packed_sk_len = 0; + uint pack_key_len = 0; + + m_pk_descr = kd_arr[pk_index(table_arg, tbl_def_arg)]; + if (has_hidden_pk(table_arg)) { + m_pk_key_parts = 1; + } else { + m_pk_key_parts = table->key_info[table->s->primary_key].user_defined_key_parts; - key_len= table->key_info[table->s->primary_key].key_length; + key_len = table->key_info[table->s->primary_key].key_length; } // move this into get_table_handler() ?? m_pk_descr->setup(table_arg, tbl_def_arg); - m_pk_tuple= reinterpret_cast<uchar*>(my_malloc(key_len, MYF(0))); - if (m_pk_tuple == nullptr) - { - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + m_pk_tuple = reinterpret_cast<uchar *>(my_malloc(key_len, MYF(0))); + if (m_pk_tuple == nullptr) { + goto error; } - const uint pack_key_len= m_pk_descr->max_storage_fmt_length(); - m_pk_packed_tuple= reinterpret_cast<uchar*>(my_malloc(pack_key_len, MYF(0))); - if (m_pk_packed_tuple == nullptr) - { - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + pack_key_len = m_pk_descr->max_storage_fmt_length(); + m_pk_packed_tuple = + reinterpret_cast<uchar *>(my_malloc(pack_key_len, MYF(0))); + if (m_pk_packed_tuple == nullptr) { + goto error; } /* Sometimes, we may use m_sk_packed_tuple for storing packed PK */ - uint max_packed_sk_len= pack_key_len; - for (uint i= 0; i < table_arg->s->keys; i++) - { + max_packed_sk_len = pack_key_len; + for (uint i = 0; i < table_arg->s->keys; i++) { if (i == table_arg->s->primary_key) /* Primary key was processed above */ continue; // TODO: move this into get_table_handler() ?? kd_arr[i]->setup(table_arg, tbl_def_arg); - const uint packed_len= kd_arr[i]->max_storage_fmt_length(); - if (packed_len > max_packed_sk_len) - { - max_packed_sk_len= packed_len; + const uint packed_len = kd_arr[i]->max_storage_fmt_length(); + if (packed_len > max_packed_sk_len) { + max_packed_sk_len = packed_len; } } - if (!(m_sk_packed_tuple= - reinterpret_cast<uchar*>(my_malloc(max_packed_sk_len, MYF(0)))) || - !(m_sk_match_prefix_buf= - reinterpret_cast<uchar*>(my_malloc(max_packed_sk_len, MYF(0)))) || - !(m_sk_packed_tuple_old= - reinterpret_cast<uchar*>(my_malloc(max_packed_sk_len, MYF(0)))) || - !(m_end_key_packed_tuple= - reinterpret_cast<uchar*>(my_malloc(max_packed_sk_len, MYF(0)))) || - !((m_pack_buffer= - reinterpret_cast<uchar*>(my_malloc(max_packed_sk_len, MYF(0)))))) - { - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + if (!(m_sk_packed_tuple = + reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) || + !(m_sk_match_prefix_buf = + reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) || + !(m_sk_packed_tuple_old = + reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) || + !(m_end_key_packed_tuple = + reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) || + !((m_pack_buffer = reinterpret_cast<uchar *>( + my_malloc(max_packed_sk_len, MYF(0)))))) { + goto error; } - DBUG_RETURN(0); + /* + If inplace alter is happening, allocate special buffers for unique + secondary index duplicate checking. + */ + if (alloc_alter_buffers && + (!(m_dup_sk_packed_tuple = + reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) || + !(m_dup_sk_packed_tuple_old = reinterpret_cast<uchar *>( + my_malloc(max_packed_sk_len, MYF(0)))))) { + goto error; + } + + DBUG_RETURN(HA_EXIT_SUCCESS); + +error: + // If we're here then this means that at some point above an allocation may + // have failed. To avoid any resource leaks and maintain a clear contract + // we'll clean up before returning the error code. + free_key_buffers(); + + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } -void ha_rocksdb::free_key_buffers() -{ +void ha_rocksdb::free_key_buffers() { my_free(m_pk_tuple); - m_pk_tuple= nullptr; + m_pk_tuple = nullptr; my_free(m_pk_packed_tuple); - m_pk_packed_tuple= nullptr; + m_pk_packed_tuple = nullptr; my_free(m_sk_packed_tuple); - m_sk_packed_tuple= nullptr; + m_sk_packed_tuple = nullptr; my_free(m_sk_match_prefix_buf); - m_sk_match_prefix_buf= nullptr; + m_sk_match_prefix_buf = nullptr; my_free(m_sk_packed_tuple_old); - m_sk_packed_tuple_old= nullptr; + m_sk_packed_tuple_old = nullptr; my_free(m_end_key_packed_tuple); - m_end_key_packed_tuple= nullptr; + m_end_key_packed_tuple = nullptr; my_free(m_pack_buffer); - m_pack_buffer= nullptr; + m_pack_buffer = nullptr; + + my_free(m_dup_sk_packed_tuple); + m_dup_sk_packed_tuple = nullptr; + + my_free(m_dup_sk_packed_tuple_old); + m_dup_sk_packed_tuple_old = nullptr; } -void ha_rocksdb::set_use_read_free_rpl(const char* const whitelist) -{ +void ha_rocksdb::set_use_read_free_rpl(const char *const whitelist) { DBUG_ASSERT(whitelist != nullptr); #if defined(HAVE_PSI_INTERFACE) @@ -4963,16 +4556,14 @@ void ha_rocksdb::set_use_read_free_rpl(const char* const whitelist) Regex_list_handler regex_handler; #endif - if (!regex_handler.set_patterns(whitelist)) - { + if (!regex_handler.set_patterns(whitelist)) { warn_about_bad_patterns(®ex_handler, "read_free_rpl_tables"); } m_use_read_free_rpl = regex_handler.matches(m_tbl_def->base_tablename()); } -void ha_rocksdb::set_skip_unique_check_tables(const char* const whitelist) -{ +void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) { DBUG_ASSERT(whitelist != nullptr); #if defined(HAVE_PSI_INTERFACE) @@ -4981,25 +4572,23 @@ void ha_rocksdb::set_skip_unique_check_tables(const char* const whitelist) Regex_list_handler regex_handler; #endif - if (!regex_handler.set_patterns(whitelist)) - { + if (!regex_handler.set_patterns(whitelist)) { warn_about_bad_patterns(®ex_handler, "skip_unique_check_tables"); } - m_skip_unique_check= regex_handler.matches(m_tbl_def->base_tablename()); + m_skip_unique_check = regex_handler.matches(m_tbl_def->base_tablename()); } -int ha_rocksdb::open(const char* const name, int mode, uint test_if_locked) -{ - DBUG_ENTER("ha_rocksdb::open"); +int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { + DBUG_ENTER_FUNC(); - if (m_table_handler != nullptr) - { - rdb_open_tables.release_table_handler(m_table_handler); + if (close()) { + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - m_table_handler= rdb_open_tables.get_table_handler(name); - if (m_table_handler == nullptr) - { + + m_table_handler = rdb_open_tables.get_table_handler(name); + + if (m_table_handler == nullptr) { DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } @@ -5008,38 +4597,33 @@ int ha_rocksdb::open(const char* const name, int mode, uint test_if_locked) m_io_perf.init(&m_table_handler->m_table_perf_context, &m_table_handler->m_io_perf_read, &stats); - /* - note: m_pk_descr may be non-NULL here, when using partitions. It seems, - it's ok to discard it - */ std::string fullname; - if (rdb_normalize_tablename(name, &fullname)) - { - return HA_ERR_INTERNAL_ERROR; + if (rdb_normalize_tablename(name, &fullname)) { + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - m_tbl_def= ddl_manager.find(fullname); - if (m_tbl_def == nullptr) - { + m_tbl_def = ddl_manager.find(fullname); + if (m_tbl_def == nullptr) { my_error(ER_INTERNAL_ERROR, MYF(0), - "Attempt to open a table that is not present in RocksDB-SE data " - "dictionary"); + "Attempt to open a table that is not present in RocksDB-SE data " + "dictionary"); DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - m_lock_rows= RDB_LOCK_NONE; + m_lock_rows = RDB_LOCK_NONE; - m_key_descr_arr= m_tbl_def->m_key_descr_arr; + m_key_descr_arr = m_tbl_def->m_key_descr_arr; /* Full table scan actually uses primary key (UPDATE needs to know this, otherwise it will go into infinite loop on queries like "UPDATE tbl SET pk=pk+100") */ - key_used_on_scan= table->s->primary_key; + key_used_on_scan = table->s->primary_key; - int err; - if ((err = alloc_key_buffers(table, m_tbl_def))) - { + // close() above has already called free_key_buffers(). No need to do it here. + int err = alloc_key_buffers(table, m_tbl_def); + + if (err) { DBUG_RETURN(err); } @@ -5067,14 +4651,16 @@ int ha_rocksdb::open(const char* const name, int mode, uint test_if_locked) Initialize the necessary variables for them: */ - m_verify_row_debug_checksums= false; + m_verify_row_debug_checksums = false; /* TODO: move the following to where TABLE_SHARE is opened: */ if (table->found_next_number_field) load_auto_incr_value(); - if (has_hidden_pk(table) && load_hidden_pk_value()) + if (has_hidden_pk(table) && load_hidden_pk_value()) { + free_key_buffers(); DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + } /* Index block size in MyRocks: used by MySQL in query optimization */ stats.block_size = rocksdb_tbl_options.block_size; @@ -5085,78 +4671,78 @@ int ha_rocksdb::open(const char* const name, int mode, uint test_if_locked) /* Determine at open whether we should skip unique checks for this table */ set_skip_unique_check_tables(THDVAR(ha_thd(), skip_unique_check_tables)); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } +int ha_rocksdb::close(void) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::close(void) -{ - DBUG_ENTER("ha_rocksdb::close"); - - m_pk_descr= nullptr; - m_key_descr_arr= nullptr; + m_pk_descr = nullptr; + m_key_descr_arr = nullptr; free_key_buffers(); my_free(m_encoder_arr); - m_encoder_arr= nullptr; + m_encoder_arr = nullptr; - rdb_open_tables.release_table_handler(m_table_handler); - m_table_handler= nullptr; + if (m_table_handler != nullptr) { + rdb_open_tables.release_table_handler(m_table_handler); + m_table_handler = nullptr; + } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } -bool ha_rocksdb::get_error_message(const int error, String* const buf) -{ +bool ha_rocksdb::get_error_message(const int error, String *const buf) { + DBUG_ENTER_FUNC(); + static_assert(HA_ERR_ROCKSDB_LAST > HA_ERR_FIRST, - "HA_ERR_ROCKSDB_LAST > HA_ERR_FIRST"); + "HA_ERR_ROCKSDB_LAST > HA_ERR_FIRST"); static_assert(HA_ERR_ROCKSDB_LAST > HA_ERR_LAST, - "HA_ERR_ROCKSDB_LAST > HA_ERR_LAST"); + "HA_ERR_ROCKSDB_LAST > HA_ERR_LAST"); DBUG_ASSERT(error > 0); DBUG_ASSERT(error <= HA_ERR_ROCKSDB_LAST); DBUG_ASSERT(buf != nullptr); - Rdb_transaction* const tx = get_tx_from_thd(ha_thd()); + Rdb_transaction *const tx = get_tx_from_thd(ha_thd()); bool temp_error = false; switch (error) { - case HA_ERR_ROCKSDB_PK_REQUIRED: - buf->append("Table must have a PRIMARY KEY."); - break; - case HA_ERR_ROCKSDB_UNIQUE_NOT_SUPPORTED: - buf->append("Unique indexes are not supported."); - break; - case HA_ERR_ROCKSDB_TOO_MANY_LOCKS: - buf->append("Number of locks held reached @@rocksdb_max_row_locks."); - break; - case HA_ERR_LOCK_WAIT_TIMEOUT: - DBUG_ASSERT(tx != nullptr); - buf->append(tx->m_detailed_error); - temp_error = true; - break; - default: - // We can be called with the values which are < HA_ERR_FIRST because most - // MySQL internal functions will just return 1 in case of an error. - break; + case HA_ERR_ROCKSDB_PK_REQUIRED: + buf->append("Table must have a PRIMARY KEY."); + break; + case HA_ERR_ROCKSDB_UNIQUE_NOT_SUPPORTED: + buf->append("Unique indexes are not supported."); + break; + case HA_ERR_ROCKSDB_TOO_MANY_LOCKS: + buf->append("Number of locks held reached @@rocksdb_max_row_locks."); + break; + case HA_ERR_LOCK_WAIT_TIMEOUT: + DBUG_ASSERT(tx != nullptr); + buf->append(tx->m_detailed_error); + temp_error = true; + break; + default: + // We can be called with the values which are < HA_ERR_FIRST because most + // MySQL internal functions will just return HA_EXIT_FAILURE in case of + // an error. + break; } - return temp_error; + DBUG_RETURN(temp_error); } /* MyRocks supports only the following collations for indexed columns */ -static const std::set<const my_core::CHARSET_INFO *> RDB_INDEX_COLLATIONS= - {&my_charset_bin, &my_charset_utf8_bin, &my_charset_latin1_bin}; +static const std::set<const my_core::CHARSET_INFO *> RDB_INDEX_COLLATIONS = { + &my_charset_bin, &my_charset_utf8_bin, &my_charset_latin1_bin}; - -static bool rdb_is_index_collation_supported(const my_core::Field* const field) -{ - const my_core::enum_field_types type= field->real_type(); +static bool +rdb_is_index_collation_supported(const my_core::Field *const field) { + const my_core::enum_field_types type = field->real_type(); /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING || - type == MYSQL_TYPE_BLOB) - { + type == MYSQL_TYPE_BLOB) { return RDB_INDEX_COLLATIONS.find(field->charset()) != RDB_INDEX_COLLATIONS.end(); } @@ -5179,17 +4765,16 @@ static bool rdb_is_index_collation_supported(const my_core::Field* const field) 0 - Ok other - error, either given table ddl is not supported by rocksdb or OOM. */ -int ha_rocksdb::create_key_defs(const TABLE* const table_arg, - Rdb_tbl_def* const tbl_def_arg, - const TABLE* const old_table_arg /* = nullptr */, - const Rdb_tbl_def* const old_tbl_def_arg - /* = nullptr */) const -{ +int ha_rocksdb::create_key_defs( + const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg /* = nullptr */, + const Rdb_tbl_def *const old_tbl_def_arg + /* = nullptr */) const { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(table_arg->s != nullptr); - DBUG_ENTER("ha_rocksdb::create_key_defs"); - uint i; /* @@ -5204,13 +4789,11 @@ int ha_rocksdb::create_key_defs(const TABLE* const table_arg, allocated to each key definition. See below for more details. http://github.com/MySQLOnRocksDB/mysql-5.6/issues/86#issuecomment-138515501 */ - if (create_cfs(table_arg, tbl_def_arg, &cfs)) - { - DBUG_RETURN(1); + if (create_cfs(table_arg, tbl_def_arg, &cfs)) { + DBUG_RETURN(HA_EXIT_FAILURE); }; - if (!old_tbl_def_arg) - { + if (!old_tbl_def_arg) { /* old_tbl_def doesn't exist. this means we are in the process of creating a new table. @@ -5218,30 +4801,25 @@ int ha_rocksdb::create_key_defs(const TABLE* const table_arg, Get the index numbers (this will update the next_index_number) and create Rdb_key_def structures. */ - for (i= 0; i < tbl_def_arg->m_key_count; i++) - { - if (create_key_def(table_arg, i, tbl_def_arg, - &m_key_descr_arr[i], cfs[i])) - { - DBUG_RETURN(1); + for (i = 0; i < tbl_def_arg->m_key_count; i++) { + if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], + cfs[i])) { + DBUG_RETURN(HA_EXIT_FAILURE); } } - } - else - { + } else { /* old_tbl_def exists. This means we are creating a new tbl_def as part of in-place alter table. Copy over existing keys from the old_tbl_def and generate the necessary new key definitions if any. */ if (create_inplace_key_defs(table_arg, tbl_def_arg, old_table_arg, - old_tbl_def_arg, cfs)) - { - DBUG_RETURN(1); + old_tbl_def_arg, cfs)) { + DBUG_RETURN(HA_EXIT_FAILURE); } } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } /* @@ -5260,15 +4838,14 @@ int ha_rocksdb::create_key_defs(const TABLE* const table_arg, 0 - Ok other - error */ -int ha_rocksdb::create_cfs(const TABLE* const table_arg, - Rdb_tbl_def* const tbl_def_arg, - std::array<struct key_def_cf_info, MAX_INDEXES + 1>* const cfs) const -{ +int ha_rocksdb::create_cfs( + const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + std::array<struct key_def_cf_info, MAX_INDEXES + 1> *const cfs) const { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(table_arg->s != nullptr); - DBUG_ENTER("ha_rocksdb::create_cfs"); - char tablename_sys[NAME_LEN + 1]; my_core::filename_to_tablename(tbl_def_arg->base_tablename().c_str(), @@ -5278,36 +4855,31 @@ int ha_rocksdb::create_cfs(const TABLE* const table_arg, The first loop checks the index parameters and creates column families if necessary. */ - for (uint i= 0; i < tbl_def_arg->m_key_count; i++) - { - rocksdb::ColumnFamilyHandle* cf_handle; + for (uint i = 0; i < tbl_def_arg->m_key_count; i++) { + rocksdb::ColumnFamilyHandle *cf_handle; if (rocksdb_strict_collation_check && !is_hidden_pk(i, table_arg, tbl_def_arg) && - tbl_def_arg->base_tablename().find(tmp_file_prefix) != 0) - { - for (uint part= 0; part < table_arg->key_info[i].actual_key_parts; part++) - { + tbl_def_arg->base_tablename().find(tmp_file_prefix) != 0) { + for (uint part = 0; part < table_arg->key_info[i].actual_key_parts; + part++) { if (!rdb_is_index_collation_supported( - table_arg->key_info[i].key_part[part].field) && - !rdb_collation_exceptions->matches(tablename_sys)) - { + table_arg->key_info[i].key_part[part].field) && + !rdb_collation_exceptions->matches(tablename_sys)) { std::string collation_err; - for (const auto &coll : RDB_INDEX_COLLATIONS) - { - if (collation_err != "") - { + for (const auto &coll : RDB_INDEX_COLLATIONS) { + if (collation_err != "") { collation_err += ", "; } collation_err += coll->name; } - my_printf_error(ER_UNKNOWN_ERROR, - "Unsupported collation on string indexed " - "column %s.%s Use binary collation (%s).", MYF(0), - tbl_def_arg->full_tablename().c_str(), - table_arg->key_info[i].key_part[part].field->field_name, - collation_err.c_str()); - DBUG_RETURN(1); + my_printf_error( + ER_UNKNOWN_ERROR, "Unsupported collation on string indexed " + "column %s.%s Use binary collation (%s).", + MYF(0), tbl_def_arg->full_tablename().c_str(), + table_arg->key_info[i].key_part[part].field->field_name, + collation_err.c_str()); + DBUG_RETURN(HA_EXIT_FAILURE); } } } @@ -5316,36 +4888,34 @@ int ha_rocksdb::create_cfs(const TABLE* const table_arg, index comment has Column Family name. If there was no comment, we get NULL, and it means use the default column family. */ - const char* const comment = get_key_comment(i, table_arg, tbl_def_arg); - const char* const key_name = get_key_name(i, table_arg, tbl_def_arg); + const char *const comment = get_key_comment(i, table_arg, tbl_def_arg); + const char *const key_name = get_key_name(i, table_arg, tbl_def_arg); - if (looks_like_per_index_cf_typo(comment)) - { + if (looks_like_per_index_cf_typo(comment)) { my_error(ER_NOT_SUPPORTED_YET, MYF(0), "column family name looks like a typo of $per_index_cf"); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } /* Prevent create from using the system column family */ - if (comment && strcmp(DEFAULT_SYSTEM_CF_NAME, comment) == 0) - { + if (comment && strcmp(DEFAULT_SYSTEM_CF_NAME, comment) == 0) { my_error(ER_WRONG_ARGUMENTS, MYF(0), "column family not valid for storing index data"); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } bool is_auto_cf_flag; - cf_handle = cf_manager.get_or_create_cf(rdb, comment, - tbl_def_arg->full_tablename(), - key_name, &is_auto_cf_flag); + cf_handle = + cf_manager.get_or_create_cf(rdb, comment, tbl_def_arg->full_tablename(), + key_name, &is_auto_cf_flag); if (!cf_handle) - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); - auto& cf = (*cfs)[i]; + auto &cf = (*cfs)[i]; cf.cf_handle = cf_handle; cf.is_reverse_cf = Rdb_cf_manager::is_cf_name_reverse(comment); cf.is_auto_cf = is_auto_cf_flag; } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } /* @@ -5362,51 +4932,46 @@ int ha_rocksdb::create_cfs(const TABLE* const table_arg, 0 - Ok other - error, either given table ddl is not supported by rocksdb or OOM. */ -int ha_rocksdb::create_inplace_key_defs(const TABLE* const table_arg, - Rdb_tbl_def* const tbl_def_arg, - const TABLE* const old_table_arg, - const Rdb_tbl_def* const old_tbl_def_arg, - const std::array<key_def_cf_info, MAX_INDEXES + 1>& cfs) const -{ +int ha_rocksdb::create_inplace_key_defs( + const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg, + const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs) const { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); DBUG_ASSERT(old_tbl_def_arg != nullptr); - DBUG_ENTER("create_key_def"); - - std::shared_ptr<Rdb_key_def>* const old_key_descr= + std::shared_ptr<Rdb_key_def> *const old_key_descr = old_tbl_def_arg->m_key_descr_arr; - std::shared_ptr<Rdb_key_def>* const new_key_descr= + std::shared_ptr<Rdb_key_def> *const new_key_descr = tbl_def_arg->m_key_descr_arr; const std::unordered_map<std::string, uint> old_key_pos = - get_old_key_positions(table_arg, tbl_def_arg, old_table_arg, - old_tbl_def_arg); + get_old_key_positions(table_arg, tbl_def_arg, old_table_arg, + old_tbl_def_arg); uint i; - for (i= 0; i < tbl_def_arg->m_key_count; i++) - { + for (i = 0; i < tbl_def_arg->m_key_count; i++) { const auto &it = old_key_pos.find(get_key_name(i, table_arg, tbl_def_arg)); - if (it != old_key_pos.end()) - { + if (it != old_key_pos.end()) { /* Found matching index in old table definition, so copy it over to the new one created. */ - const Rdb_key_def& okd= *old_key_descr[it->second]; + const Rdb_key_def &okd = *old_key_descr[it->second]; - uint16 index_dict_version= 0; - uchar index_type= 0; - uint16 kv_version= 0; - const GL_INDEX_ID gl_index_id= okd.get_gl_index_id(); + uint16 index_dict_version = 0; + uchar index_type = 0; + uint16 kv_version = 0; + const GL_INDEX_ID gl_index_id = okd.get_gl_index_id(); if (!dict_manager.get_index_info(gl_index_id, &index_dict_version, - &index_type, &kv_version)) - { + &index_type, &kv_version)) { // NO_LINT_DEBUG sql_print_error("RocksDB: Could not get index information " "for Index Number (%u,%u), table %s", gl_index_id.cf_id, gl_index_id.index_id, old_tbl_def_arg->full_tablename().c_str()); - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } /* @@ -5414,59 +4979,45 @@ int ha_rocksdb::create_inplace_key_defs(const TABLE* const table_arg, keynr within the pack_info for each field and the keyno of the keydef itself. */ - new_key_descr[i]= std::make_shared<Rdb_key_def>( - okd.get_index_number(), - i, - okd.get_cf(), - index_dict_version, - index_type, - kv_version, - okd.m_is_reverse_cf, - okd.m_is_auto_cf, - okd.m_name.c_str(), - dict_manager.get_stats(gl_index_id)); - } - else if (create_key_def(table_arg, i, tbl_def_arg, - &new_key_descr[i], cfs[i])) - { - DBUG_RETURN(1); + new_key_descr[i] = std::make_shared<Rdb_key_def>( + okd.get_index_number(), i, okd.get_cf(), index_dict_version, + index_type, kv_version, okd.m_is_reverse_cf, okd.m_is_auto_cf, + okd.m_name.c_str(), dict_manager.get_stats(gl_index_id)); + } else if (create_key_def(table_arg, i, tbl_def_arg, &new_key_descr[i], + cfs[i])) { + DBUG_RETURN(HA_EXIT_FAILURE); } DBUG_ASSERT(new_key_descr[i] != nullptr); new_key_descr[i]->setup(table_arg, tbl_def_arg); } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions( - const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg, - const TABLE* const old_table_arg, - const Rdb_tbl_def* const old_tbl_def_arg) const -{ + const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg, + const Rdb_tbl_def *const old_tbl_def_arg) const { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(old_table_arg != nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); DBUG_ASSERT(old_tbl_def_arg != nullptr); - DBUG_ENTER("get_old_key_positions"); - - std::shared_ptr<Rdb_key_def>* const old_key_descr= + std::shared_ptr<Rdb_key_def> *const old_key_descr = old_tbl_def_arg->m_key_descr_arr; std::unordered_map<std::string, uint> old_key_pos; std::unordered_map<std::string, uint> new_key_pos; uint i; - for (i= 0; i < tbl_def_arg->m_key_count; i++) - { + for (i = 0; i < tbl_def_arg->m_key_count; i++) { new_key_pos[get_key_name(i, table_arg, tbl_def_arg)] = i; } - for (i= 0; i < old_tbl_def_arg->m_key_count; i++) - { - if (is_hidden_pk(i, old_table_arg, old_tbl_def_arg)) - { + for (i = 0; i < old_tbl_def_arg->m_key_count; i++) { + if (is_hidden_pk(i, old_table_arg, old_tbl_def_arg)) { old_key_pos[old_key_descr[i]->m_name] = i; continue; } @@ -5480,17 +5031,15 @@ std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions( CREATE TABLE t1 (a INT, b INT, KEY ka(a)) ENGINE=RocksDB; ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE; */ - const KEY* const old_key = &old_table_arg->key_info[i]; + const KEY *const old_key = &old_table_arg->key_info[i]; const auto &it = new_key_pos.find(old_key->name); - if (it == new_key_pos.end()) - { + if (it == new_key_pos.end()) { continue; } - KEY* const new_key = &table_arg->key_info[it->second]; + KEY *const new_key = &table_arg->key_info[it->second]; - if (!compare_key_parts(old_key, new_key)) - { + if (!compare_key_parts(old_key, new_key)) { old_key_pos[old_key->name] = i; } } @@ -5499,31 +5048,27 @@ std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions( } /* Check two keys to ensure that key parts within keys match */ -int ha_rocksdb::compare_key_parts(const KEY* const old_key, - const KEY* const new_key) const -{ +int ha_rocksdb::compare_key_parts(const KEY *const old_key, + const KEY *const new_key) const { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(old_key != nullptr); DBUG_ASSERT(new_key != nullptr); - DBUG_ENTER("compare_key_parts"); - /* Skip if key parts do not match, as it is a different key */ - if (new_key->user_defined_key_parts != old_key->user_defined_key_parts) - { - DBUG_RETURN(1); + if (new_key->user_defined_key_parts != old_key->user_defined_key_parts) { + DBUG_RETURN(HA_EXIT_FAILURE); } /* Check to see that key parts themselves match */ - for (uint i= 0; i < old_key->user_defined_key_parts; i++) - { + for (uint i = 0; i < old_key->user_defined_key_parts; i++) { if (strcmp(old_key->key_part[i].field->field_name, - new_key->key_part[i].field->field_name) != 0) - { - DBUG_RETURN(1); + new_key->key_part[i].field->field_name) != 0) { + DBUG_RETURN(HA_EXIT_FAILURE); } } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } /* @@ -5543,86 +5088,75 @@ int ha_rocksdb::compare_key_parts(const KEY* const old_key, 0 - Ok other - error, either given table ddl is not supported by rocksdb or OOM. */ -int ha_rocksdb::create_key_def(const TABLE* const table_arg, const uint &i, - const Rdb_tbl_def* const tbl_def_arg, - std::shared_ptr<Rdb_key_def>* const new_key_def, - const struct key_def_cf_info& cf_info) const -{ - DBUG_ENTER("create_key_def"); +int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i, + const Rdb_tbl_def *const tbl_def_arg, + std::shared_ptr<Rdb_key_def> *const new_key_def, + const struct key_def_cf_info &cf_info) const { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(new_key_def != nullptr); DBUG_ASSERT(*new_key_def == nullptr); - const uint index_id= ddl_manager.get_and_update_next_number(&dict_manager); - const uint16_t index_dict_version= Rdb_key_def::INDEX_INFO_VERSION_LATEST; + const uint index_id = ddl_manager.get_and_update_next_number(&dict_manager); + const uint16_t index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST; uchar index_type; uint16_t kv_version; - if (is_hidden_pk(i, table_arg, tbl_def_arg)) - { - index_type= Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY; - kv_version= Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; - } - else if (i == table_arg->s->primary_key) - { - index_type= Rdb_key_def::INDEX_TYPE_PRIMARY; - uint16 pk_latest_version= Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; - kv_version= pk_latest_version; - } - else - { - index_type= Rdb_key_def::INDEX_TYPE_SECONDARY; - uint16 sk_latest_version= Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; - kv_version= sk_latest_version; + if (is_hidden_pk(i, table_arg, tbl_def_arg)) { + index_type = Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY; + kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; + } else if (i == table_arg->s->primary_key) { + index_type = Rdb_key_def::INDEX_TYPE_PRIMARY; + uint16 pk_latest_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; + kv_version = pk_latest_version; + } else { + index_type = Rdb_key_def::INDEX_TYPE_SECONDARY; + uint16 sk_latest_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; + kv_version = sk_latest_version; } - const char* const key_name = get_key_name(i, table_arg, m_tbl_def); - *new_key_def= std::make_shared<Rdb_key_def>( + const char *const key_name = get_key_name(i, table_arg, m_tbl_def); + *new_key_def = std::make_shared<Rdb_key_def>( index_id, i, cf_info.cf_handle, index_dict_version, index_type, kv_version, cf_info.is_reverse_cf, cf_info.is_auto_cf, key_name); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } -int rdb_normalize_tablename(const std::string& tablename, - std::string* const strbuf) -{ +int rdb_normalize_tablename(const std::string &tablename, + std::string *const strbuf) { DBUG_ASSERT(strbuf != nullptr); - if (tablename.size() < 2 || tablename[0] != '.' || tablename[1] != '/') - { - DBUG_ASSERT(0); // We were not passed table name? + if (tablename.size() < 2 || tablename[0] != '.' || tablename[1] != '/') { + DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_INTERNAL_ERROR; } size_t pos = tablename.find_first_of('/', 2); - if (pos == std::string::npos) - { - DBUG_ASSERT(0); // We were not passed table name? + if (pos == std::string::npos) { + DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_INTERNAL_ERROR; } *strbuf = tablename.substr(2, pos - 2) + "." + tablename.substr(pos + 1); - return 0; + return HA_EXIT_SUCCESS; } /* Check to see if the user's original statement includes foreign key references */ -bool ha_rocksdb::contains_foreign_key(THD* const thd) -{ +bool ha_rocksdb::contains_foreign_key(THD *const thd) { bool success; - const char* str = thd_query_string(thd)->str; + const char *str = thd_query_string(thd)->str; DBUG_ASSERT(str != nullptr); - while (*str != '\0') - { + while (*str != '\0') { // Scan from our current pos looking for 'FOREIGN' str = rdb_find_in_string(str, "FOREIGN", &success); - if (!success) - { + if (!success) { return false; } @@ -5630,22 +5164,19 @@ bool ha_rocksdb::contains_foreign_key(THD* const thd) str = rdb_check_next_token(&my_charset_bin, str, "FOREIGN", &success); DBUG_ASSERT(success); - if (!my_isspace(&my_charset_bin, *str)) - { + if (!my_isspace(&my_charset_bin, *str)) { return false; } // See if the next token is 'KEY' str = rdb_check_next_token(&my_charset_bin, str, "KEY", &success); - if (!success) - { + if (!success) { continue; } // See if the next token is '(' str = rdb_check_next_token(&my_charset_bin, str, "(", &success); - if (!success) - { + if (!success) { // There is an optional index id after 'FOREIGN KEY', skip it str = rdb_skip_id(&my_charset_bin, str); @@ -5670,13 +5201,12 @@ bool ha_rocksdb::contains_foreign_key(THD* const thd) @param dbbuf returns database name/table_schema @param tablebuf returns tablename @param partitionbuf returns partition suffix if there is one - @return 0 on success, non-zero on failure to split + @return HA_EXIT_SUCCESS on success, non-zero on failure to split */ -int rdb_split_normalized_tablename(const std::string& fullname, - std::string* const db, - std::string* const table, - std::string* const partition) -{ +int rdb_split_normalized_tablename(const std::string &fullname, + std::string *const db, + std::string *const table, + std::string *const partition) { DBUG_ASSERT(!fullname.empty()); #define RDB_PARTITION_STR "#P#" @@ -5685,47 +5215,39 @@ int rdb_split_normalized_tablename(const std::string& fullname, size_t dotpos = fullname.find('.'); /* Invalid table name? */ - if (dotpos == std::string::npos) - { + if (dotpos == std::string::npos) { return HA_ERR_INTERNAL_ERROR; } // Table must have a database name associated with it. DBUG_ASSERT(dotpos > 0); - if (db != nullptr) - { + if (db != nullptr) { *db = fullname.substr(0, dotpos); } dotpos++; - const size_t partpos = fullname.find(RDB_PARTITION_STR, dotpos, - strlen(RDB_PARTITION_STR)); + const size_t partpos = + fullname.find(RDB_PARTITION_STR, dotpos, strlen(RDB_PARTITION_STR)); - if (partpos != std::string::npos) - { + if (partpos != std::string::npos) { DBUG_ASSERT(partpos >= dotpos); - if (table != nullptr) - { + if (table != nullptr) { *table = fullname.substr(dotpos, partpos - dotpos); } - if (partition != nullptr) - { + if (partition != nullptr) { *partition = fullname.substr(partpos + strlen(RDB_PARTITION_STR)); } - } - else if (table != nullptr) - { + } else if (table != nullptr) { *table = fullname.substr(dotpos); } - return 0; + return HA_EXIT_SUCCESS; } - /** @brief create() is called to create a database. The variable name will have the name @@ -5745,14 +5267,14 @@ int rdb_split_normalized_tablename(const std::string& fullname, ha_create_table() in handle.cc */ -int ha_rocksdb::create(const char* const name, TABLE* const table_arg, - HA_CREATE_INFO* const create_info) -{ +int ha_rocksdb::create(const char *const name, TABLE *const table_arg, + HA_CREATE_INFO *const create_info) { + DBUG_ENTER_FUNC(); + int res; std::string str; - DBUG_ENTER("ha_rocksdb::create"); - THD* const thd = my_core::thd_get_current_thd(); + THD *const thd = my_core::thd_get_current_thd(); if (get_table_if_exists(name)) { if (thd->lex->sql_command == SQLCOM_TRUNCATE) { @@ -5763,11 +5285,12 @@ int ha_rocksdb::create(const char* const name, TABLE* const table_arg, if (rdb_normalize_tablename(name, &str)) { DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - my_printf_error(ER_UNKNOWN_ERROR, - "Table '%s' does not exist, but metadata information exists inside " - "MyRocks. This is a sign of data inconsistency. Please check if " - "'%s.frm' exists, and try to restore it if it does not exist.", - MYF(0), str.c_str(), name); + my_printf_error( + ER_UNKNOWN_ERROR, + "Table '%s' does not exist, but metadata information exists inside " + "MyRocks. This is a sign of data inconsistency. Please check if " + "'%s.frm' exists, and try to restore it if it does not exist.", + MYF(0), str.c_str(), name); DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } } @@ -5777,15 +5300,14 @@ int ha_rocksdb::create(const char* const name, TABLE* const table_arg, passes strings like "./test/t14#P#p0" for individual partitions, while table_arg->s->table_name has none of that. */ - if (rdb_normalize_tablename(name, &str)) - { + if (rdb_normalize_tablename(name, &str)) { DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - if (contains_foreign_key(thd)) - { + if (contains_foreign_key(thd)) { my_printf_error(ER_NOT_SUPPORTED_YET, "MyRocks does not currently support " - "foreign key constraints", MYF(0)); + "foreign key constraints", + MYF(0)); DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } @@ -5793,65 +5315,60 @@ int ha_rocksdb::create(const char* const name, TABLE* const table_arg, TODO(alexyang): Temporarily disable unique indexes support when there is no Primary Key */ - for (uint keyno= 0; keyno < table_arg->s->keys; keyno++) - { + for (uint keyno = 0; keyno < table_arg->s->keys; keyno++) { if ((table_arg->key_info[keyno].flags & HA_NOSAME) && - has_hidden_pk(table_arg)) - { - my_printf_error(ER_UNKNOWN_ERROR, - "Unique index support is disabled when the table has no primary key. ", - MYF(0)); + has_hidden_pk(table_arg)) { + my_printf_error(ER_UNKNOWN_ERROR, "Unique index support is disabled when " + "the table has no primary key. ", + MYF(0)); DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } } - bool write_err= false; - const std::unique_ptr<rocksdb::WriteBatch> wb= dict_manager.begin(); - rocksdb::WriteBatch* const batch= wb.get(); + bool write_err = false; + const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); /* Create table/key descriptions and put them into the data dictionary */ - m_tbl_def= new Rdb_tbl_def(str); + m_tbl_def = new Rdb_tbl_def(str); - uint n_keys= table_arg->s->keys; + uint n_keys = table_arg->s->keys; /* If no primary key found, create a hidden PK and place it inside table definition */ - if (has_hidden_pk(table_arg)) - { + if (has_hidden_pk(table_arg)) { n_keys += 1; } - m_key_descr_arr= new std::shared_ptr<Rdb_key_def>[n_keys]; - m_tbl_def->m_key_count= n_keys; - m_tbl_def->m_key_descr_arr= m_key_descr_arr; + m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[n_keys]; + m_tbl_def->m_key_count = n_keys; + m_tbl_def->m_key_descr_arr = m_key_descr_arr; - if ((res= create_key_defs(table_arg, m_tbl_def))) - { + if ((res = create_key_defs(table_arg, m_tbl_def))) { goto error; } - m_pk_descr= m_key_descr_arr[pk_index(table_arg, m_tbl_def)]; + m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)]; dict_manager.lock(); - write_err= ddl_manager.put_and_write(m_tbl_def, batch) - || dict_manager.commit(batch); + write_err = + ddl_manager.put_and_write(m_tbl_def, batch) || dict_manager.commit(batch); dict_manager.unlock(); - if (write_err) - { + if (write_err) { goto error; } if (create_info->auto_increment_value) - m_tbl_def->m_auto_incr_val= create_info->auto_increment_value; + m_tbl_def->m_auto_incr_val = create_info->auto_increment_value; /* We only support auto_increment at start of the PRIMARY KEY. */ - //Field *field; - //if ((field= table_arg->next_number_field)) + // Field *field; + // if ((field= table_arg->next_number_field)) /* TODO mdcallag: disable this for now to let UNIQUE indexes kind of work if ((field= table_arg->found_next_number_field)) { @@ -5861,18 +5378,17 @@ int ha_rocksdb::create(const char* const name, TABLE* const table_arg, DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } */ - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); error: /* Delete what we have allocated so far */ delete m_tbl_def; - m_tbl_def= nullptr; - m_key_descr_arr= nullptr; + m_tbl_def = nullptr; + m_key_descr_arr = nullptr; DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - /** @note This function is used only when the table has not yet been opened, and @@ -5881,48 +5397,43 @@ error: See comment in ha_rocksdb::index_flags() for details. */ -bool ha_rocksdb::check_keyread_allowed(uint inx, uint part, bool all_parts) const -{ - bool res= true; - KEY* const key_info= &table_share->key_info[inx]; +bool ha_rocksdb::check_keyread_allowed(uint inx, uint part, + bool all_parts) const { + bool res = true; + KEY *const key_info = &table_share->key_info[inx]; Rdb_field_packing dummy1; - res= dummy1.setup(nullptr, key_info->key_part[part].field, inx, part, - key_info->key_part[part].length); + res = dummy1.setup(nullptr, key_info->key_part[part].field, inx, part, + key_info->key_part[part].length); - if (res && all_parts) - { - for (uint i= 0; i < part; i++) - { + if (res && all_parts) { + for (uint i = 0; i < part; i++) { Field *field; - if ((field= key_info->key_part[i].field)) - { + if ((field = key_info->key_part[i].field)) { Rdb_field_packing dummy; - if (!dummy.setup(nullptr, field, inx, i, key_info->key_part[i].length)) - { + if (!dummy.setup(nullptr, field, inx, i, + key_info->key_part[i].length)) { /* Cannot do index-only reads for this column */ - res= false; + res = false; break; } } } } - const uint pk= table_share->primary_key; + const uint pk = table_share->primary_key; if (inx == pk && all_parts && - part + 1 == table_share->key_info[pk].user_defined_key_parts) - { - m_pk_can_be_decoded= res; + part + 1 == table_share->key_info[pk].user_defined_key_parts) { + m_pk_can_be_decoded = res; } return res; } -int ha_rocksdb::read_key_exact(const Rdb_key_def& kd, - rocksdb::Iterator* const iter, +int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, const bool &full_key_match, - const rocksdb::Slice& key_slice) const -{ + const rocksdb::Slice &key_slice) const { DBUG_ASSERT(iter != nullptr); /* @@ -5930,10 +5441,8 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def& kd, index_tuple= lookup_tuple. lookup_tuple may be a prefix of the index. */ - if (kd.m_is_reverse_cf) - { - if (!full_key_match) - { + if (kd.m_is_reverse_cf) { + if (!full_key_match) { if (!iter->Valid()) iter->SeekToLast(); else @@ -5941,49 +5450,42 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def& kd, } } - if (!iter->Valid() || !kd.value_matches_prefix(iter->key(), key_slice)) - { + if (!iter->Valid() || !kd.value_matches_prefix(iter->key(), key_slice)) { /* Got a record that is not equal to the lookup value, or even a record from another table.index. */ return HA_ERR_KEY_NOT_FOUND; } - return 0; + return HA_EXIT_SUCCESS; } -int ha_rocksdb::read_before_key(const Rdb_key_def& kd, +int ha_rocksdb::read_before_key(const Rdb_key_def &kd, const bool &full_key_match, - const rocksdb::Slice& key_slice) -{ + const rocksdb::Slice &key_slice) { /* We are looking for record with the biggest t.key such that t.key < lookup_tuple. */ - if (kd.m_is_reverse_cf) - { + if (kd.m_is_reverse_cf) { if (m_scan_it->Valid() && full_key_match && - kd.value_matches_prefix(m_scan_it->key(), key_slice)) - { + kd.value_matches_prefix(m_scan_it->key(), key_slice)) { /* We are using full key and we've hit an exact match */ m_scan_it->Next(); } - } - else - { + } else { if (m_scan_it->Valid()) m_scan_it->Prev(); else m_scan_it->SeekToLast(); } - return m_scan_it->Valid() ? 0 : HA_ERR_KEY_NOT_FOUND; + return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; } -int ha_rocksdb::read_after_key(const Rdb_key_def& kd, +int ha_rocksdb::read_after_key(const Rdb_key_def &kd, const bool &full_key_match, - const rocksdb::Slice& key_slice) -{ + const rocksdb::Slice &key_slice) { /* We are looking for the first record such that @@ -5992,73 +5494,64 @@ int ha_rocksdb::read_after_key(const Rdb_key_def& kd, with HA_READ_AFTER_KEY, $GT = '>', with HA_READ_KEY_OR_NEXT, $GT = '>=' */ - if (kd.m_is_reverse_cf) - { - if (!m_scan_it->Valid()) - { + if (kd.m_is_reverse_cf) { + if (!m_scan_it->Valid()) { m_scan_it->SeekToLast(); - } - else - { + } else { /* We should step back - when not using full extended key - when using full extended key and when we've got an exact match */ if (!full_key_match || - !kd.value_matches_prefix(m_scan_it->key(), key_slice)) - { + !kd.value_matches_prefix(m_scan_it->key(), key_slice)) { m_scan_it->Prev(); } } } - return m_scan_it->Valid() ? 0 : HA_ERR_KEY_NOT_FOUND; + return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; } -int ha_rocksdb::position_to_correct_key( - const Rdb_key_def& kd, - const enum ha_rkey_function &find_flag, - const bool &full_key_match, - const uchar* const key, - const key_part_map &keypart_map, - const rocksdb::Slice& key_slice, - bool* const move_forward) -{ - int rc= 0; +int ha_rocksdb::position_to_correct_key(const Rdb_key_def &kd, + const enum ha_rkey_function &find_flag, + const bool &full_key_match, + const uchar *const key, + const key_part_map &keypart_map, + const rocksdb::Slice &key_slice, + bool *const move_forward) { + int rc = 0; - *move_forward= true; + *move_forward = true; switch (find_flag) { case HA_READ_KEY_EXACT: - rc= read_key_exact(kd, m_scan_it, full_key_match, key_slice); + rc = read_key_exact(kd, m_scan_it, full_key_match, key_slice); break; case HA_READ_BEFORE_KEY: - *move_forward= false; - rc= read_before_key(kd, full_key_match, key_slice); - if (rc == 0 && !kd.covers_key(m_scan_it->key())) - { + *move_forward = false; + rc = read_before_key(kd, full_key_match, key_slice); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { /* The record we've got is not from this index */ - rc= HA_ERR_KEY_NOT_FOUND; + rc = HA_ERR_KEY_NOT_FOUND; } break; case HA_READ_AFTER_KEY: case HA_READ_KEY_OR_NEXT: - rc= read_after_key(kd, full_key_match, key_slice); - if (rc == 0 && !kd.covers_key(m_scan_it->key())) - { + rc = read_after_key(kd, full_key_match, key_slice); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { /* The record we've got is not from this index */ - rc= HA_ERR_KEY_NOT_FOUND; + rc = HA_ERR_KEY_NOT_FOUND; } break; case HA_READ_KEY_OR_PREV: case HA_READ_PREFIX: /* This flag is not used by the SQL layer, so we don't support it yet. */ - rc= HA_ERR_UNSUPPORTED; + rc = HA_ERR_UNSUPPORTED; break; case HA_READ_PREFIX_LAST: case HA_READ_PREFIX_LAST_OR_PREV: - *move_forward= false; + *move_forward = false; /* Find the last record with the specified index prefix lookup. - HA_READ_PREFIX_LAST requires that the record has the @@ -6068,27 +5561,21 @@ int ha_rocksdb::position_to_correct_key( records with prefix=lookup, we should return the last record before that. */ - rc= read_before_key(kd, full_key_match, key_slice); - if (rc == 0) - { - const rocksdb::Slice& rkey= m_scan_it->key(); - if (!kd.covers_key(rkey)) - { + rc = read_before_key(kd, full_key_match, key_slice); + if (rc == 0) { + const rocksdb::Slice &rkey = m_scan_it->key(); + if (!kd.covers_key(rkey)) { /* The record we've got is not from this index */ - rc= HA_ERR_KEY_NOT_FOUND; - } - else if (find_flag == HA_READ_PREFIX_LAST) - { - uint size = kd.pack_index_tuple(table, m_pack_buffer, - m_sk_packed_tuple, key, - keypart_map); - rocksdb::Slice lookup_tuple( - reinterpret_cast<char*>(m_sk_packed_tuple), size); + rc = HA_ERR_KEY_NOT_FOUND; + } else if (find_flag == HA_READ_PREFIX_LAST) { + uint size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, + key, keypart_map); + rocksdb::Slice lookup_tuple(reinterpret_cast<char *>(m_sk_packed_tuple), + size); // We need to compare the key we've got with the original search prefix. - if (!kd.value_matches_prefix(rkey, lookup_tuple)) - { - rc= HA_ERR_KEY_NOT_FOUND; + if (!kd.value_matches_prefix(rkey, lookup_tuple)) { + rc = HA_ERR_KEY_NOT_FOUND; } } } @@ -6101,18 +5588,16 @@ int ha_rocksdb::position_to_correct_key( return rc; } -int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def& kd, +int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const rocksdb::Slice& slice, + const rocksdb::Slice &slice, const int &bytes_changed_by_succ, - const key_range* const end_key, - uint* const end_key_packed_size) -{ + const key_range *const end_key, + uint *const end_key_packed_size) { if (find_flag == HA_READ_KEY_EXACT) return slice.size(); - if (find_flag == HA_READ_PREFIX_LAST) - { + if (find_flag == HA_READ_PREFIX_LAST) { /* We have made the kd.successor(m_sk_packed_tuple) call above. @@ -6121,12 +5606,10 @@ int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def& kd, return slice.size() - bytes_changed_by_succ; } - if (end_key) - { - *end_key_packed_size= kd.pack_index_tuple(table, m_pack_buffer, - m_end_key_packed_tuple, - end_key->key, - end_key->keypart_map); + if (end_key) { + *end_key_packed_size = + kd.pack_index_tuple(table, m_pack_buffer, m_end_key_packed_tuple, + end_key->key, end_key->keypart_map); /* Calculating length of the equal conditions here. 4 byte index id is @@ -6137,7 +5620,7 @@ int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def& kd, Example2: id1 VARCHAR(30), id2 INT, PRIMARY KEY (id1, id2) WHERE id1 = 'AAA' and id2 < 3; => eq_cond_len=13 (varchar used 9 bytes) */ - rocksdb::Slice end_slice(reinterpret_cast<char*>(m_end_key_packed_tuple), + rocksdb::Slice end_slice(reinterpret_cast<char *>(m_end_key_packed_tuple), *end_key_packed_size); return slice.difference_offset(end_slice); } @@ -6151,82 +5634,67 @@ int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def& kd, return Rdb_key_def::INDEX_NUMBER_SIZE; } -int ha_rocksdb::read_row_from_primary_key(uchar* const buf) -{ +int ha_rocksdb::read_row_from_primary_key(uchar *const buf) { DBUG_ASSERT(buf != nullptr); int rc; - const rocksdb::Slice& rkey= m_scan_it->key(); - const uint pk_size= rkey.size(); - const char *pk_data= rkey.data(); + const rocksdb::Slice &rkey = m_scan_it->key(); + const uint pk_size = rkey.size(); + const char *pk_data = rkey.data(); memcpy(m_pk_packed_tuple, pk_data, pk_size); m_last_rowkey.copy(pk_data, pk_size, &my_charset_bin); - if (m_lock_rows != RDB_LOCK_NONE) - { + if (m_lock_rows != RDB_LOCK_NONE) { /* We need to put a lock and re-read */ - rc= get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); - } - else - { + rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); + } else { /* Unpack from the row we've read */ - const rocksdb::Slice& value = m_scan_it->value(); - rc= convert_record_from_storage_format(&rkey, &value, buf); + const rocksdb::Slice &value = m_scan_it->value(); + rc = convert_record_from_storage_format(&rkey, &value, buf); } return rc; } -int ha_rocksdb::read_row_from_secondary_key(uchar* const buf, - const Rdb_key_def& kd, bool move_forward) -{ +int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, + const Rdb_key_def &kd, + bool move_forward) { DBUG_ASSERT(buf != nullptr); - int rc= 0; + int rc = 0; uint pk_size; - if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) - { + if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) { /* Get the key columns and primary key value */ - const rocksdb::Slice& rkey= m_scan_it->key(); - pk_size= kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, - m_pk_packed_tuple); - const rocksdb::Slice& value= m_scan_it->value(); - if (pk_size == RDB_INVALID_KEY_LEN) - { - rc= HA_ERR_INTERNAL_ERROR; + const rocksdb::Slice &rkey = m_scan_it->key(); + pk_size = + kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); + const rocksdb::Slice &value = m_scan_it->value(); + if (pk_size == RDB_INVALID_KEY_LEN || + kd.unpack_record(table, buf, &rkey, &value, + m_verify_row_debug_checksums)) { + rc = HA_ERR_INTERNAL_ERROR; } - else - { - rc= try_keyonly_read_from_sk(buf, kd, rkey, value, pk_size); - } - } - else - { + } else { if (kd.m_is_reverse_cf) - move_forward= !move_forward; + move_forward = !move_forward; - rc= find_icp_matching_index_rec(move_forward, buf); - if (!rc) - { - const rocksdb::Slice& rkey= m_scan_it->key(); - pk_size= kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, + rc = find_icp_matching_index_rec(move_forward, buf); + if (!rc) { + const rocksdb::Slice &rkey = m_scan_it->key(); + pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); - if (pk_size == RDB_INVALID_KEY_LEN) - { - rc= HA_ERR_INTERNAL_ERROR; - } - else - { - rc= get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); + if (pk_size == RDB_INVALID_KEY_LEN) { + rc = HA_ERR_INTERNAL_ERROR; + } else { + rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); } } } - if (!rc) - { - m_last_rowkey.copy((const char*)m_pk_packed_tuple, pk_size, + if (!rc) { + m_last_rowkey.copy((const char *)m_pk_packed_tuple, pk_size, &my_charset_bin); } @@ -6235,42 +5703,6 @@ int ha_rocksdb::read_row_from_secondary_key(uchar* const buf, /** @note - Some data types require unpack_info in order to do index-only reads. - - However, support for this may have been added after the database was built. - This means that we need to support the case where unpack_info is not - available. In this case, we should just do a primary key lookup. -*/ -int ha_rocksdb::try_keyonly_read_from_sk(uchar* buf, - const Rdb_key_def& kd, - const rocksdb::Slice& key, - const rocksdb::Slice& value, - const uint rowid_size) -{ - const int res= - kd.unpack_record(table, buf, &key, &value, m_verify_row_debug_checksums); - uint size= rowid_size; - if (res == UNPACK_INFO_MISSING) - { - // If we can't unpack, then do a PK lookup. - if (rowid_size == RDB_INVALID_KEY_LEN) - { - size= kd.get_primary_key_tuple( - table, *m_pk_descr, &key, m_pk_packed_tuple); - } - return get_row_by_rowid(buf, m_pk_packed_tuple, size); - } - else if (res == UNPACK_FAILURE) - { - return HA_ERR_INTERNAL_ERROR; - } - DBUG_ASSERT(res == UNPACK_SUCCESS); - return res; -} - - -/** - @note The problem with this function is that SQL layer calls it, when - the table has not been yet opened (no ::open() call done) - this->table_share already exists, but it is in the process of being @@ -6290,27 +5722,23 @@ int ha_rocksdb::try_keyonly_read_from_sk(uchar* buf, yet). */ -ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const -{ - ulong base_flags= HA_READ_NEXT | // doesn't seem to be used - HA_READ_ORDER | - HA_READ_RANGE | - HA_READ_PREV; +ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const { + DBUG_ENTER_FUNC(); + + ulong base_flags = HA_READ_NEXT | // doesn't seem to be used + HA_READ_ORDER | HA_READ_RANGE | HA_READ_PREV; if (check_keyread_allowed(inx, part, all_parts)) base_flags |= HA_KEYREAD_ONLY; - if (inx == table_share->primary_key) - { + if (inx == table_share->primary_key) { /* Index-only reads on primary key are the same as table scan for us. Still, we need to explicitly "allow" them, otherwise SQL layer will miss some plans. */ base_flags |= HA_KEYREAD_ONLY; - } - else - { + } else { /* We can Index Condition Pushdown any key except the primary. With primary key, we get (pk, record) pair immediately, there is no place to put the @@ -6319,10 +5747,9 @@ ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const base_flags |= HA_DO_INDEX_COND_PUSHDOWN; } - return base_flags; + DBUG_RETURN(base_flags); } - /** @brief Read next index tuple through the secondary index. @@ -6331,49 +5758,45 @@ ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const m_scan_it points at the index key-value pair that we should read the (pk,row) pair for. */ -int ha_rocksdb::secondary_index_read(const int keyno, uchar* const buf) -{ +int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { DBUG_ASSERT(buf != nullptr); DBUG_ASSERT(table != nullptr); stats.rows_requested++; /* Use STATUS_NOT_FOUND when record not found or some error occurred */ - table->status= STATUS_NOT_FOUND; + table->status = STATUS_NOT_FOUND; - if (m_scan_it->Valid()) - { - rocksdb::Slice key= m_scan_it->key(); + if (m_scan_it->Valid()) { + rocksdb::Slice key = m_scan_it->key(); /* Check if we've ran out of records of this index */ - if (m_key_descr_arr[keyno]->covers_key(key)) - { - int rc; + if (m_key_descr_arr[keyno]->covers_key(key)) { + int rc = 0; - //TODO: We could here check if we have ran out of range we're scanning - const uint size= m_key_descr_arr[keyno]->get_primary_key_tuple( + // TODO: We could here check if we have ran out of range we're scanning + const uint size = m_key_descr_arr[keyno]->get_primary_key_tuple( table, *m_pk_descr, &key, m_pk_packed_tuple); - if (size == RDB_INVALID_KEY_LEN) - { + if (size == RDB_INVALID_KEY_LEN) { return HA_ERR_INTERNAL_ERROR; } - m_last_rowkey.copy((const char*)m_pk_packed_tuple, size, &my_charset_bin); + m_last_rowkey.copy((const char *)m_pk_packed_tuple, size, + &my_charset_bin); if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && - !has_hidden_pk(table)) - { - rc= try_keyonly_read_from_sk(buf, *m_key_descr_arr[keyno], - key, m_scan_it->value(), size); - } - else - { - rc= get_row_by_rowid(buf, m_pk_packed_tuple, size); + !has_hidden_pk(table)) { + rocksdb::Slice value = m_scan_it->value(); + if (m_key_descr_arr[keyno]->unpack_record( + table, buf, &key, &value, m_verify_row_debug_checksums)) { + return HA_ERR_INTERNAL_ERROR; + } + } else { + rc = get_row_by_rowid(buf, m_pk_packed_tuple, size); } - if (!rc) - { - table->status= 0; + if (!rc) { + table->status = 0; stats.rows_read++; stats.rows_index_next++; update_row_stats(ROWS_READ); @@ -6384,56 +5807,46 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar* const buf) return HA_ERR_END_OF_FILE; } - /* ha_rocksdb::read_range_first overrides handler::read_range_first. The only difference from handler::read_range_first is that ha_rocksdb::read_range_first passes end_key to ha_rocksdb::index_read_map_impl function. */ -int ha_rocksdb::read_range_first(const key_range* const start_key, - const key_range* const end_key, - bool eq_range_arg, bool sorted) -{ +int ha_rocksdb::read_range_first(const key_range *const start_key, + const key_range *const end_key, + bool eq_range_arg, bool sorted) { + DBUG_ENTER_FUNC(); + int result; - DBUG_ENTER("ha_rocksdb::read_range_first"); - eq_range= eq_range_arg; + eq_range = eq_range_arg; set_end_range(end_key, RANGE_SCAN_ASC); - range_key_part= table->key_info[active_index].key_part; + range_key_part = table->key_info[active_index].key_part; - if (!start_key) // Read first record - result= ha_index_first(table->record[0]); - else - { - if (is_using_prohibited_gap_locks(table, - is_using_full_unique_key( - active_index, - start_key->keypart_map, - start_key->flag))) - { + if (!start_key) // Read first record + result = ha_index_first(table->record[0]); + else { + if (is_using_prohibited_gap_locks( + table, + is_using_full_unique_key(active_index, start_key->keypart_map, + start_key->flag))) { DBUG_RETURN(HA_ERR_LOCK_DEADLOCK); } - MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0, - { result= index_read_map_impl(table->record[0], - start_key->key, - start_key->keypart_map, - start_key->flag, - end_key); }) + MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0, { + result = + index_read_map_impl(table->record[0], start_key->key, + start_key->keypart_map, start_key->flag, end_key); + }) } if (result) - DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) - ? HA_ERR_END_OF_FILE - : result); + DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result); - if (compare_key(end_range) <= 0) - { - DBUG_RETURN(0); - } - else - { + if (compare_key(end_range) <= 0) { + DBUG_RETURN(HA_EXIT_SUCCESS); + } else { /* The last read row does not fall in the range. So request storage engine to release row lock if possible. @@ -6443,12 +5856,12 @@ int ha_rocksdb::read_range_first(const key_range* const start_key, } } - -int ha_rocksdb::index_read_map(uchar* const buf, const uchar* const key, +int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, key_part_map keypart_map, - enum ha_rkey_function find_flag) -{ - return index_read_map_impl(buf, key, keypart_map, find_flag, nullptr); + enum ha_rkey_function find_flag) { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(index_read_map_impl(buf, key, keypart_map, find_flag, nullptr)); } /* @@ -6457,45 +5870,43 @@ int ha_rocksdb::index_read_map(uchar* const buf, const uchar* const key, This function takes end_key as an argument, and it is set on range scan. MyRocks needs to decide whether prefix bloom filter can be used or not. - To decide to use prefix bloom filter or not, calculating equal condition length + To decide to use prefix bloom filter or not, calculating equal condition + length is needed. On equal lookups (find_flag == HA_READ_KEY_EXACT), equal condition length is the same as rocksdb::Slice.size() of the start key. On range scan, equal condition length is MIN(start_key, end_key) of the rocksdb::Slice expression. */ -int ha_rocksdb::index_read_map_impl(uchar* const buf, const uchar* const key, +int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, key_part_map keypart_map, enum ha_rkey_function find_flag, - const key_range* end_key) -{ - int rc= 0; - DBUG_ENTER("ha_rocksdb::index_read_map"); + const key_range *end_key) { + DBUG_ENTER_FUNC(); + + int rc = 0; ha_statistic_increment(&SSV::ha_read_key_count); - const Rdb_key_def& kd= *m_key_descr_arr[active_index]; - const uint actual_key_parts= kd.get_key_parts(); - bool using_full_key= is_using_full_key(keypart_map, actual_key_parts); + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const uint actual_key_parts = kd.get_key_parts(); + bool using_full_key = is_using_full_key(keypart_map, actual_key_parts); if (!end_key) - end_key= end_range; + end_key = end_range; /* By default, we don't need the retrieved records to match the prefix */ - m_sk_match_prefix= nullptr; + m_sk_match_prefix = nullptr; stats.rows_requested++; - if (active_index == table->s->primary_key && - find_flag == HA_READ_KEY_EXACT && using_full_key) - { + if (active_index == table->s->primary_key && find_flag == HA_READ_KEY_EXACT && + using_full_key) { /* Equality lookup over primary key, using full tuple. This is a special case, use DB::Get. */ - const uint size= kd.pack_index_tuple( - table, m_pack_buffer, m_pk_packed_tuple, - key, keypart_map); - rc= get_row_by_rowid(buf, m_pk_packed_tuple, size); - if (!rc) - { + const uint size = kd.pack_index_tuple(table, m_pack_buffer, + m_pk_packed_tuple, key, keypart_map); + rc = get_row_by_rowid(buf, m_pk_packed_tuple, size); + if (!rc) { stats.rows_read++; stats.rows_index_first++; update_row_stats(ROWS_READ); @@ -6509,25 +5920,22 @@ int ha_rocksdb::index_read_map_impl(uchar* const buf, const uchar* const key, uint packed_size; if (active_index != table->s->primary_key && table->key_info[active_index].flags & HA_NOSAME && - find_flag == HA_READ_KEY_EXACT && using_full_key) - { - key_part_map tmp_map= (key_part_map(1) << - table->key_info[active_index].user_defined_key_parts) - 1; - packed_size= kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, + find_flag == HA_READ_KEY_EXACT && using_full_key) { + key_part_map tmp_map = (key_part_map(1) << table->key_info[active_index] + .user_defined_key_parts) - + 1; + packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, key, tmp_map); if (table->key_info[active_index].user_defined_key_parts != kd.get_key_parts()) - using_full_key= false; - } - else - { - packed_size= kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, + using_full_key = false; + } else { + packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, key, keypart_map); } if ((pushed_idx_cond && pushed_idx_cond_keyno == active_index) && - (find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX_LAST)) - { + (find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX_LAST)) { /* We are doing a point index lookup, and ICP is enabled. It is possible that this call will be followed by ha_rocksdb->index_next_same() call. @@ -6540,39 +5948,36 @@ int ha_rocksdb::index_read_map_impl(uchar* const buf, const uchar* const key, that don't match the lookup prefix are not returned. row matches the lookup prefix. */ - m_sk_match_prefix= m_sk_match_prefix_buf; - m_sk_match_length= packed_size; + m_sk_match_prefix = m_sk_match_prefix_buf; + m_sk_match_length = packed_size; memcpy(m_sk_match_prefix, m_sk_packed_tuple, packed_size); } - int bytes_changed_by_succ= 0; + int bytes_changed_by_succ = 0; if (find_flag == HA_READ_PREFIX_LAST_OR_PREV || - find_flag == HA_READ_PREFIX_LAST || - find_flag == HA_READ_AFTER_KEY) - { + find_flag == HA_READ_PREFIX_LAST || find_flag == HA_READ_AFTER_KEY) { /* See below */ - bytes_changed_by_succ= kd.successor(m_sk_packed_tuple, packed_size); + bytes_changed_by_succ = kd.successor(m_sk_packed_tuple, packed_size); } - rocksdb::Slice slice( - reinterpret_cast<const char*>(m_sk_packed_tuple), packed_size); + rocksdb::Slice slice(reinterpret_cast<const char *>(m_sk_packed_tuple), + packed_size); - uint end_key_packed_size= 0; - const uint eq_cond_len= calc_eq_cond_len(kd, find_flag, slice, - bytes_changed_by_succ, end_key, - &end_key_packed_size); + uint end_key_packed_size = 0; + const uint eq_cond_len = + calc_eq_cond_len(kd, find_flag, slice, bytes_changed_by_succ, end_key, + &end_key_packed_size); - bool use_all_keys= false; + bool use_all_keys = false; if (find_flag == HA_READ_KEY_EXACT && my_count_bits(keypart_map) == kd.get_key_parts()) - use_all_keys= true; + use_all_keys = true; - Rdb_transaction* const tx= get_or_create_tx(table->in_use); - const bool is_new_snapshot= !tx->has_snapshot(); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + const bool is_new_snapshot = !tx->has_snapshot(); // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) - for (;;) - { + for (;;) { /* This will open the iterator and position it at a record that's equal or greater than the lookup tuple. @@ -6588,14 +5993,13 @@ int ha_rocksdb::index_read_map_impl(uchar* const buf, const uchar* const key, rc = position_to_correct_key(kd, find_flag, using_full_key, key, keypart_map, slice, &move_forward); - if (rc) - { + if (rc) { /* This status is returned on any error */ - table->status= STATUS_NOT_FOUND; + table->status = STATUS_NOT_FOUND; DBUG_RETURN(rc); } - m_skip_scan_it_next_call= false; + m_skip_scan_it_next_call = false; /* Now get the data for the row into 'buf'. If we were using a primary key @@ -6603,9 +6007,9 @@ int ha_rocksdb::index_read_map_impl(uchar* const buf, const uchar* const key, lookup the primary key. */ if (active_index == table->s->primary_key) - rc= read_row_from_primary_key(buf); + rc = read_row_from_primary_key(buf); else - rc= read_row_from_secondary_key(buf, kd, move_forward); + rc = read_row_from_secondary_key(buf, kd, move_forward); if (rc != HA_ERR_LOCK_DEADLOCK || !is_new_snapshot) break; /* Exit the loop */ @@ -6615,22 +6019,19 @@ int ha_rocksdb::index_read_map_impl(uchar* const buf, const uchar* const key, release_scan_iterator(); } - if (rc) - { + if (rc) { /* the only possible error condition is record-not-found */ - table->status= STATUS_NOT_FOUND; - } - else - { - table->status= 0; + table->status = STATUS_NOT_FOUND; + } else { + table->status = 0; stats.rows_read++; stats.rows_index_first++; update_row_stats(ROWS_READ); } + DBUG_RETURN(rc); } - /* @brief Scan the secondary index until we find an index record that satisfies ICP @@ -6652,64 +6053,54 @@ int ha_rocksdb::index_read_map_impl(uchar* const buf, const uchar* const key, */ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, - uchar* const buf) -{ + uchar *const buf) { DBUG_ASSERT(buf != nullptr); - if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) - { - const Rdb_key_def& kd= *m_key_descr_arr[active_index]; + if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) { + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - while (1) - { - if (!m_scan_it->Valid()) - { - table->status= STATUS_NOT_FOUND; + while (1) { + if (!m_scan_it->Valid()) { + table->status = STATUS_NOT_FOUND; return HA_ERR_END_OF_FILE; } - const rocksdb::Slice rkey= m_scan_it->key(); + const rocksdb::Slice rkey = m_scan_it->key(); - if (!kd.covers_key(rkey)) - { - table->status= STATUS_NOT_FOUND; + if (!kd.covers_key(rkey)) { + table->status = STATUS_NOT_FOUND; return HA_ERR_END_OF_FILE; } - if (m_sk_match_prefix) - { - const rocksdb::Slice prefix((const char*)m_sk_match_prefix, - m_sk_match_length); - if (!kd.value_matches_prefix(rkey, prefix)) - { - table->status= STATUS_NOT_FOUND; + if (m_sk_match_prefix) { + const rocksdb::Slice prefix((const char *)m_sk_match_prefix, + m_sk_match_length); + if (!kd.value_matches_prefix(rkey, prefix)) { + table->status = STATUS_NOT_FOUND; return HA_ERR_END_OF_FILE; } } - const rocksdb::Slice value= m_scan_it->value(); - if (try_keyonly_read_from_sk(buf, kd, rkey, value, RDB_INVALID_KEY_LEN)) - { + const rocksdb::Slice value = m_scan_it->value(); + if (kd.unpack_record(table, buf, &rkey, &value, + m_verify_row_debug_checksums)) { return HA_ERR_INTERNAL_ERROR; } - const enum icp_result icp_status= check_index_cond(); - if (icp_status == ICP_NO_MATCH) - { + const enum icp_result icp_status = check_index_cond(); + if (icp_status == ICP_NO_MATCH) { if (move_forward) m_scan_it->Next(); else m_scan_it->Prev(); continue; /* Get the next (or prev) index tuple */ - } - else if (icp_status == ICP_OUT_OF_RANGE) - { + } else if (icp_status == ICP_OUT_OF_RANGE) { /* We have walked out of range we are scanning */ - table->status= STATUS_NOT_FOUND; + table->status = STATUS_NOT_FOUND; return HA_ERR_END_OF_FILE; - } - else /* icp_status == ICP_MATCH */ + } else /* icp_status == ICP_MATCH */ { - /* Index Condition is satisfied. We have rc==0, proceed to fetch the row. */ + /* Index Condition is satisfied. We have rc==0, proceed to fetch the + * row. */ break; } /* @@ -6722,84 +6113,81 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, */ } } - return 0; + return HA_EXIT_SUCCESS; } +int ha_rocksdb::index_read_last_map(uchar *const buf, const uchar *const key, + key_part_map keypart_map) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::index_read_last_map(uchar* const buf, const uchar* const key, - key_part_map keypart_map) -{ - return index_read_map(buf, key, keypart_map, HA_READ_PREFIX_LAST); + DBUG_RETURN(index_read_map(buf, key, keypart_map, HA_READ_PREFIX_LAST)); } +int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::check(THD* const thd, HA_CHECK_OPT* const check_opt) -{ - const uint pk= pk_index(table, m_tbl_def); + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(check_opt != nullptr); + + const uint pk = pk_index(table, m_tbl_def); String rowkey_copy; String sec_key_copy; - const char* const table_name = table->s->table_name.str; + const char *const table_name = table->s->table_name.str; // Only when debugging: don't use snapshot when reading // Rdb_transaction *tx= get_or_create_tx(table->in_use); // tx->snapshot= nullptr; - const bool save_verify_row_debug_checksums= m_verify_row_debug_checksums; - m_verify_row_debug_checksums= true; + const bool save_verify_row_debug_checksums = m_verify_row_debug_checksums; + m_verify_row_debug_checksums = true; /* For each secondary index, check that we can get a PK value from it */ // NO_LINT_DEBUG sql_print_information("CHECKTABLE %s: Checking table %s", table_name, table_name); ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true - ha_rows row_checksums= ha_rows(-1); - bool first_index= true; + ha_rows row_checksums = ha_rows(-1); + bool first_index = true; - for (uint keyno= 0; keyno < table->s->keys; keyno++) - { - if (keyno != pk) - { + for (uint keyno = 0; keyno < table->s->keys; keyno++) { + if (keyno != pk) { extra(HA_EXTRA_KEYREAD); ha_index_init(keyno, true); - ha_rows rows= 0; - ha_rows checksums= 0; + ha_rows rows = 0; + ha_rows checksums = 0; if (first_index) - row_checksums_at_start= m_row_checksums_checked; + row_checksums_at_start = m_row_checksums_checked; int res; // NO_LINT_DEBUG sql_print_information("CHECKTABLE %s: Checking index %s", table_name, table->key_info[keyno].name); - while (1) - { + while (1) { if (!rows) - res= index_first(table->record[0]); + res = index_first(table->record[0]); else - res= index_next(table->record[0]); + res = index_next(table->record[0]); if (res == HA_ERR_END_OF_FILE) break; - if (res) - { + if (res) { // error // NO_LINT_DEBUG sql_print_error("CHECKTABLE %s: .. row %lld: index scan error %d", table_name, rows, res); goto error; } - rocksdb::Slice key= m_scan_it->key(); + rocksdb::Slice key = m_scan_it->key(); sec_key_copy.copy(key.data(), key.size(), &my_charset_bin); rowkey_copy.copy(m_last_rowkey.ptr(), m_last_rowkey.length(), &my_charset_bin); - if (m_key_descr_arr[keyno]-> - unpack_info_has_checksum(m_scan_it->value())) - { + if (m_key_descr_arr[keyno]->unpack_info_has_checksum( + m_scan_it->value())) { checksums++; } - if ((res= get_row_by_rowid(table->record[0], rowkey_copy.ptr(), - rowkey_copy.length()))) - { + if ((res = get_row_by_rowid(table->record[0], rowkey_copy.ptr(), + rowkey_copy.length()))) { // NO_LINT_DEBUG sql_print_error("CHECKTABLE %s: .. row %lld: " "failed to fetch row by rowid", @@ -6807,18 +6195,17 @@ int ha_rocksdb::check(THD* const thd, HA_CHECK_OPT* const check_opt) goto error; } - longlong hidden_pk_id= 0; + longlong hidden_pk_id = 0; if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) goto error; /* Check if we get the same PK value */ - uint packed_size= m_pk_descr->pack_record( + uint packed_size = m_pk_descr->pack_record( table, m_pack_buffer, table->record[0], m_pk_packed_tuple, nullptr, false, hidden_pk_id); if (packed_size != rowkey_copy.length() || - memcmp(m_pk_packed_tuple, rowkey_copy.ptr(), packed_size)) - { + memcmp(m_pk_packed_tuple, rowkey_copy.ptr(), packed_size)) { // NO_LINT_DEBUG sql_print_error("CHECKTABLE %s: .. row %lld: PK value mismatch", table_name, rows); @@ -6826,12 +6213,11 @@ int ha_rocksdb::check(THD* const thd, HA_CHECK_OPT* const check_opt) } /* Check if we get the same secondary key value */ - packed_size= m_key_descr_arr[keyno]->pack_record( + packed_size = m_key_descr_arr[keyno]->pack_record( table, m_pack_buffer, table->record[0], m_sk_packed_tuple, &m_sk_tails, false, hidden_pk_id); if (packed_size != sec_key_copy.length() || - memcmp(m_sk_packed_tuple, sec_key_copy.ptr(), packed_size)) - { + memcmp(m_sk_packed_tuple, sec_key_copy.ptr(), packed_size)) { // NO_LINT_DEBUG sql_print_error("CHECKTABLE %s: .. row %lld: " "secondary index value mismatch", @@ -6841,72 +6227,64 @@ int ha_rocksdb::check(THD* const thd, HA_CHECK_OPT* const check_opt) rows++; continue; -print_and_error: - { - std::string buf; - buf = rdb_hexdump(rowkey_copy.ptr(), rowkey_copy.length(), - RDB_MAX_HEXDUMP_LEN); - // NO_LINT_DEBUG - sql_print_error("CHECKTABLE %s: rowkey: %s", table_name, - buf.c_str()); + print_and_error : { + std::string buf; + buf = rdb_hexdump(rowkey_copy.ptr(), rowkey_copy.length(), + RDB_MAX_HEXDUMP_LEN); + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: rowkey: %s", table_name, buf.c_str()); - buf= rdb_hexdump(m_retrieved_record.data(), m_retrieved_record.size(), - RDB_MAX_HEXDUMP_LEN); - // NO_LINT_DEBUG - sql_print_error("CHECKTABLE %s: record: %s", table_name, - buf.c_str()); + buf = rdb_hexdump(m_retrieved_record.data(), m_retrieved_record.size(), + RDB_MAX_HEXDUMP_LEN); + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: record: %s", table_name, buf.c_str()); - buf = rdb_hexdump(sec_key_copy.ptr(), sec_key_copy.length(), - RDB_MAX_HEXDUMP_LEN); - // NO_LINT_DEBUG - sql_print_error("CHECKTABLE %s: index: %s", table_name, - buf.c_str()); + buf = rdb_hexdump(sec_key_copy.ptr(), sec_key_copy.length(), + RDB_MAX_HEXDUMP_LEN); + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: index: %s", table_name, buf.c_str()); - goto error; - } + goto error; + } } // NO_LINT_DEBUG sql_print_information("CHECKTABLE %s: ... %lld index entries checked " - "(%lld had checksums)", table_name, rows, - checksums); + "(%lld had checksums)", + table_name, rows, checksums); - if (first_index) - { - row_checksums= m_row_checksums_checked - row_checksums_at_start; - first_index= false; + if (first_index) { + row_checksums = m_row_checksums_checked - row_checksums_at_start; + first_index = false; } ha_index_end(); } } - if (row_checksums != ha_rows(-1)) - { + if (row_checksums != ha_rows(-1)) { // NO_LINT_DEBUG sql_print_information("CHECKTABLE %s: %lld table records had checksums", table_name, row_checksums); } extra(HA_EXTRA_NO_KEYREAD); - m_verify_row_debug_checksums= save_verify_row_debug_checksums; + m_verify_row_debug_checksums = save_verify_row_debug_checksums; /* TODO: we should check also for PK records that are missing in the secondary indexes. For that, need to walk through the PK and check that every PK record has a proper counterpart in each secondary index. */ - return HA_ADMIN_OK; + DBUG_RETURN(HA_ADMIN_OK); error: - m_verify_row_debug_checksums= save_verify_row_debug_checksums; + m_verify_row_debug_checksums = save_verify_row_debug_checksums; ha_index_or_rnd_end(); extra(HA_EXTRA_NO_KEYREAD); - return HA_ADMIN_CORRUPT; -} + DBUG_RETURN(HA_ADMIN_CORRUPT); +} -static void dbug_dump_str(FILE* const out, const char* const str, int len) -{ +static void dbug_dump_str(FILE *const out, const char *const str, int len) { fprintf(out, "\""); - for (int i= 0; i < len; i++) - { + for (int i = 0; i < len; i++) { if (str[i] > 32) fprintf(out, "%c", str[i]); else @@ -6915,23 +6293,21 @@ static void dbug_dump_str(FILE* const out, const char* const str, int len) fprintf(out, "\""); } - /* Debugging help: dump the whole database into a human-readable file. Usage: dbug_dump_database(rdb); */ -void dbug_dump_database(rocksdb::DB* const db) -{ - FILE* const out= fopen("/tmp/rocksdb.dump", "wt"); +void dbug_dump_database(rocksdb::DB *const db) { + FILE *const out = fopen("/tmp/rocksdb.dump", "wt"); if (!out) return; - rocksdb::Iterator* it = db->NewIterator(rocksdb::ReadOptions()); + rocksdb::Iterator *it = db->NewIterator(rocksdb::ReadOptions()); for (it->SeekToFirst(); it->Valid(); it->Next()) { - rocksdb::Slice key= it->key(); - rocksdb::Slice val= it->value(); + rocksdb::Slice key = it->key(); + rocksdb::Slice val = it->value(); dbug_dump_str(out, key.data(), key.size()); fprintf(out, " -> "); dbug_dump_str(out, val.data(), val.size()); @@ -6943,25 +6319,21 @@ void dbug_dump_database(rocksdb::DB* const db) } rocksdb::Status ha_rocksdb::get_for_update( - Rdb_transaction* const tx, - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - std::string* const value) const -{ + Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, std::string *const value) const { DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE); const bool exclusive = m_lock_rows != RDB_LOCK_READ; - rocksdb::Status s= tx->get_for_update(column_family, key, value, exclusive); + rocksdb::Status s = tx->get_for_update(column_family, key, value, exclusive); // If we have a lock conflict and we are running in READ COMMITTTED mode // release and reacquire the snapshot and then retry the get_for_update(). if (s.IsBusy() && !s.IsDeadlock() && - my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) - { + my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) { tx->release_snapshot(); tx->acquire_snapshot(false); - s= tx->get_for_update(column_family, key, value, exclusive); + s = tx->get_for_update(column_family, key, value, exclusive); } return s; @@ -6975,160 +6347,146 @@ rocksdb::Status ha_rocksdb::get_for_update( other - HA_ERR error code. */ -int ha_rocksdb::get_row_by_rowid(uchar* const buf, const char* const rowid, - const uint rowid_size) -{ +int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, + const uint rowid_size) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(buf != nullptr); DBUG_ASSERT(rowid != nullptr); DBUG_ASSERT(table != nullptr); int rc; - DBUG_ENTER("ha_rocksdb::get_row_by_rowid"); rocksdb::Slice key_slice(rowid, rowid_size); - Rdb_transaction* const tx= get_or_create_tx(table->in_use); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); DBUG_ASSERT(tx != nullptr); DEBUG_SYNC(ha_thd(), "rocksdb.get_row_by_rowid"); - DBUG_EXECUTE_IF("dbug.rocksdb.get_row_by_rowid", - { - THD *thd= ha_thd(); - const char act[]= - "now signal Reached " - "wait_for signal.rocksdb.get_row_by_rowid_let_running"; - DBUG_ASSERT(opt_debug_sync_timeout > 0); - DBUG_ASSERT(!debug_sync_set_action(thd, - STRING_WITH_LEN(act))); - };); + DBUG_EXECUTE_IF("dbug.rocksdb.get_row_by_rowid", { + THD *thd = ha_thd(); + const char act[] = "now signal Reached " + "wait_for signal.rocksdb.get_row_by_rowid_let_running"; + DBUG_ASSERT(opt_debug_sync_timeout > 0); + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + };); bool found; rocksdb::Status s; - if (m_lock_rows == RDB_LOCK_NONE) - { + if (m_lock_rows == RDB_LOCK_NONE) { tx->acquire_snapshot(true); - s= tx->get(m_pk_descr->get_cf(), key_slice, &m_retrieved_record); - } - else - { - s= get_for_update(tx, m_pk_descr->get_cf(), key_slice, &m_retrieved_record); + s = tx->get(m_pk_descr->get_cf(), key_slice, &m_retrieved_record); + } else { + s = get_for_update(tx, m_pk_descr->get_cf(), key_slice, + &m_retrieved_record); } - if (!s.IsNotFound() && !s.ok()) - { + if (!s.IsNotFound() && !s.ok()) { DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def)); } - found= !s.IsNotFound(); + found = !s.IsNotFound(); - table->status= STATUS_NOT_FOUND; - if (found) - { - m_last_rowkey.copy((const char*)rowid, rowid_size, &my_charset_bin); - rc= convert_record_from_storage_format(&key_slice, buf); + table->status = STATUS_NOT_FOUND; + if (found) { + m_last_rowkey.copy((const char *)rowid, rowid_size, &my_charset_bin); + rc = convert_record_from_storage_format(&key_slice, buf); if (!rc) - table->status= 0; - } - else - { + table->status = 0; + } else { /* Note: we don't need to unlock the row. It is intentional that we keep locks on rows that don't exist. */ - rc= HA_ERR_KEY_NOT_FOUND; + rc = HA_ERR_KEY_NOT_FOUND; } DBUG_RETURN(rc); } +int ha_rocksdb::index_next(uchar *const buf) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::index_next(uchar* const buf) -{ - bool moves_forward= true; + bool moves_forward = true; ha_statistic_increment(&SSV::ha_read_next_count); - if (m_key_descr_arr[active_index]->m_is_reverse_cf) - { - moves_forward= false; + if (m_key_descr_arr[active_index]->m_is_reverse_cf) { + moves_forward = false; } - int rc= index_next_with_direction(buf, moves_forward); + int rc = index_next_with_direction(buf, moves_forward); if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - return rc; + + DBUG_RETURN(rc); } +int ha_rocksdb::index_prev(uchar *const buf) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::index_prev(uchar* const buf) -{ - bool moves_forward= false; + bool moves_forward = false; ha_statistic_increment(&SSV::ha_read_prev_count); - if (m_key_descr_arr[active_index]->m_is_reverse_cf) - { - moves_forward= true; + if (m_key_descr_arr[active_index]->m_is_reverse_cf) { + moves_forward = true; } - int rc= index_next_with_direction(buf, moves_forward); + int rc = index_next_with_direction(buf, moves_forward); if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - return rc; + DBUG_RETURN(rc); } +int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::index_next_with_direction(uchar* const buf, bool move_forward) -{ int rc; - DBUG_ENTER("ha_rocksdb::index_next"); - if (active_index == pk_index(table, m_tbl_def)) - { - rc= rnd_next_with_direction(buf, move_forward); - } - else - { - if (m_skip_scan_it_next_call) - { - m_skip_scan_it_next_call= false; - } - else - { + if (active_index == pk_index(table, m_tbl_def)) { + rc = rnd_next_with_direction(buf, move_forward); + } else { + if (m_skip_scan_it_next_call) { + m_skip_scan_it_next_call = false; + } else { if (move_forward) - m_scan_it->Next(); /* this call cannot fail */ + m_scan_it->Next(); /* this call cannot fail */ else m_scan_it->Prev(); } - rc= find_icp_matching_index_rec(move_forward, buf); + rc = find_icp_matching_index_rec(move_forward, buf); if (!rc) - rc= secondary_index_read(active_index, buf); + rc = secondary_index_read(active_index, buf); } DBUG_RETURN(rc); } +int ha_rocksdb::index_first(uchar *const buf) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::index_first(uchar* const buf) -{ - m_sk_match_prefix= nullptr; + m_sk_match_prefix = nullptr; ha_statistic_increment(&SSV::ha_read_first_count); - int rc= m_key_descr_arr[active_index]->m_is_reverse_cf ? - index_last_intern(buf) : index_first_intern(buf); + int rc = m_key_descr_arr[active_index]->m_is_reverse_cf + ? index_last_intern(buf) + : index_first_intern(buf); if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - return rc; + + DBUG_RETURN(rc); } +int ha_rocksdb::index_last(uchar *const buf) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::index_last(uchar* const buf) -{ - m_sk_match_prefix= nullptr; + m_sk_match_prefix = nullptr; ha_statistic_increment(&SSV::ha_read_last_count); - int rc= m_key_descr_arr[active_index]->m_is_reverse_cf ? - index_first_intern(buf) : index_last_intern(buf); + int rc = m_key_descr_arr[active_index]->m_is_reverse_cf + ? index_first_intern(buf) + : index_last_intern(buf); if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - return rc; -} + DBUG_RETURN(rc); +} /* Start scanning from the "first" value. @@ -7161,61 +6519,53 @@ int ha_rocksdb::index_last(uchar* const buf) */ -int ha_rocksdb::index_first_intern(uchar* const buf) -{ +int ha_rocksdb::index_first_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(buf != nullptr); uchar *key; uint key_size; int rc; - DBUG_ENTER("ha_rocksdb::index_first"); - if (is_pk(active_index, table, m_tbl_def)) - { - key= m_pk_packed_tuple; - } - else - { - key= m_sk_packed_tuple; + if (is_pk(active_index, table, m_tbl_def)) { + key = m_pk_packed_tuple; + } else { + key = m_sk_packed_tuple; } DBUG_ASSERT(key != nullptr); - const Rdb_key_def& kd= *m_key_descr_arr[active_index]; - if (kd.m_is_reverse_cf) - { + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + if (kd.m_is_reverse_cf) { kd.get_supremum_key(key, &key_size); - } - else - { + } else { kd.get_infimum_key(key, &key_size); } - rocksdb::Slice index_key((const char*)key, key_size); + rocksdb::Slice index_key((const char *)key, key_size); - Rdb_transaction* const tx= get_or_create_tx(table->in_use); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); DBUG_ASSERT(tx != nullptr); - const bool is_new_snapshot= !tx->has_snapshot(); + const bool is_new_snapshot = !tx->has_snapshot(); // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) - for (;;) - { + for (;;) { setup_scan_iterator(kd, &index_key, false, !kd.m_is_reverse_cf, Rdb_key_def::INDEX_NUMBER_SIZE); - m_skip_scan_it_next_call= true; + m_skip_scan_it_next_call = true; - rc= index_next_with_direction(buf, true); + rc = index_next_with_direction(buf, true); if (rc != HA_ERR_LOCK_DEADLOCK || !is_new_snapshot) - break; // exit the loop + break; // exit the loop // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); release_scan_iterator(); } - if (!rc) - { + if (!rc) { /* index_next is always incremented on success, so decrement if it is index_first instead @@ -7223,10 +6573,10 @@ int ha_rocksdb::index_first_intern(uchar* const buf) stats.rows_index_first++; stats.rows_index_next--; } + DBUG_RETURN(rc); } - /** @details Start scanning from the "last" value @@ -7264,90 +6614,76 @@ int ha_rocksdb::index_first_intern(uchar* const buf) So, need to: it->Seek(n) || it->SeekToLast(); it->Prev(); */ -int ha_rocksdb::index_last_intern(uchar* const buf) -{ +int ha_rocksdb::index_last_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(buf != nullptr); uchar *key; uint key_size; int rc; - DBUG_ENTER("ha_rocksdb::index_last"); - if (is_pk(active_index, table, m_tbl_def)) - { - key= m_pk_packed_tuple; - } - else - { - key= m_sk_packed_tuple; + if (is_pk(active_index, table, m_tbl_def)) { + key = m_pk_packed_tuple; + } else { + key = m_sk_packed_tuple; } DBUG_ASSERT(key != nullptr); - const Rdb_key_def& kd= *m_key_descr_arr[active_index]; - if (kd.m_is_reverse_cf) - { + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + if (kd.m_is_reverse_cf) { kd.get_infimum_key(key, &key_size); - } - else - { + } else { kd.get_supremum_key(key, &key_size); } - rocksdb::Slice index_key((const char*)key, key_size); + rocksdb::Slice index_key((const char *)key, key_size); - Rdb_transaction* const tx= get_or_create_tx(table->in_use); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); DBUG_ASSERT(tx != nullptr); - bool is_new_snapshot= !tx->has_snapshot(); + bool is_new_snapshot = !tx->has_snapshot(); // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) - for (;;) - { + for (;;) { setup_scan_iterator(kd, &index_key, false, kd.m_is_reverse_cf, Rdb_key_def::INDEX_NUMBER_SIZE); - if (!m_scan_it->Valid()) - { + if (!m_scan_it->Valid()) { /* RocksDB keyspace has no records that follow our index. It could be, our index is the last in the keyspace. Navigate to last record in the keyspace. */ m_scan_it->SeekToLast(); - } - else - { + } else { /* Iterator is pointing at some record right after the end of the index. Move one record back. */ m_scan_it->Prev(); } - m_skip_scan_it_next_call= false; + m_skip_scan_it_next_call = false; - if (is_pk(active_index, table, m_tbl_def)) - { - m_skip_scan_it_next_call= true; - rc= rnd_next_with_direction(buf, false); - } - else - { - rc= find_icp_matching_index_rec(false /*move_forward*/, buf); + if (is_pk(active_index, table, m_tbl_def)) { + m_skip_scan_it_next_call = true; + rc = rnd_next_with_direction(buf, false); + } else { + rc = find_icp_matching_index_rec(false /*move_forward*/, buf); if (!rc) - rc= secondary_index_read(active_index, buf); + rc = secondary_index_read(active_index, buf); } if (rc != HA_ERR_LOCK_DEADLOCK || !is_new_snapshot) - break; /* exit the loop */ + break; /* exit the loop */ // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); release_scan_iterator(); } - if (!rc) - { + if (!rc) { /* index_next is always incremented on success, so decrement if it is index_first instead @@ -7355,18 +6691,20 @@ int ha_rocksdb::index_last_intern(uchar* const buf) stats.rows_index_first++; stats.rows_index_next--; } + DBUG_RETURN(rc); } +void ha_rocksdb::unlock_row() { + DBUG_ENTER_FUNC(); -void ha_rocksdb::unlock_row() -{ - if (m_lock_rows != RDB_LOCK_NONE) - { - Rdb_transaction* const tx= get_or_create_tx(table->in_use); - tx->release_lock(m_pk_descr->get_cf(), std::string(m_last_rowkey.ptr(), - m_last_rowkey.length())); + if (m_lock_rows != RDB_LOCK_NONE) { + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + tx->release_lock(m_pk_descr->get_cf(), + std::string(m_last_rowkey.ptr(), m_last_rowkey.length())); } + + DBUG_VOID_RETURN; } /* @@ -7375,35 +6713,35 @@ void ha_rocksdb::unlock_row() - If the index is PRIMARY KEY, and if all of the columns of the table are covered by the PRIMARY KEY, SingleDelete can be used. */ -bool ha_rocksdb::can_use_single_delete(const uint &index) const -{ +bool ha_rocksdb::can_use_single_delete(const uint &index) const { return (index != pk_index(table, m_tbl_def) || (!has_hidden_pk(table) && - table->key_info[index].actual_key_parts == table->s->fields)); + table->key_info[index].actual_key_parts == table->s->fields)); } -bool ha_rocksdb::skip_unique_check() const -{ +bool ha_rocksdb::skip_unique_check() const { /* We want to skip unique checks if: 1) bulk_load is on 2) this table is in the whitelist of tables to skip and the replication lag has reached a large enough value (see unique_check_lag_threshold and unique_check_lage_reset_threshold) - 3) the user set rocksdb_skip_unique_check + 3) the user set unique_checks option to 0 */ return THDVAR(table->in_use, bulk_load) || (m_force_skip_unique_check && m_skip_unique_check) || - THDVAR(table->in_use, skip_unique_check); + my_core::thd_test_options(table->in_use, OPTION_RELAXED_UNIQUE_CHECKS); } -void ha_rocksdb::set_force_skip_unique_check(bool skip) -{ - m_force_skip_unique_check= skip; +void ha_rocksdb::set_force_skip_unique_check(bool skip) { + DBUG_ENTER_FUNC(); + + m_force_skip_unique_check = skip; + + DBUG_VOID_RETURN; } -bool ha_rocksdb::commit_in_the_middle() -{ +bool ha_rocksdb::commit_in_the_middle() { return THDVAR(table->in_use, bulk_load) || THDVAR(table->in_use, commit_in_the_middle); } @@ -7413,8 +6751,7 @@ bool ha_rocksdb::commit_in_the_middle() @retval true if bulk commit failed @retval false if bulk commit was skipped or succeeded */ -bool ha_rocksdb::do_bulk_commit(Rdb_transaction* const tx) -{ +bool ha_rocksdb::do_bulk_commit(Rdb_transaction *const tx) { DBUG_ASSERT(tx != nullptr); return commit_in_the_middle() && tx->get_write_count() >= THDVAR(table->in_use, bulk_load_size) && @@ -7427,8 +6764,7 @@ bool ha_rocksdb::do_bulk_commit(Rdb_transaction* const tx) does not contain a primary key. (In which case we generate a hidden 'auto-incremented' pk.) */ -bool ha_rocksdb::has_hidden_pk(const TABLE* const table) const -{ +bool ha_rocksdb::has_hidden_pk(const TABLE *const table) const { DBUG_ASSERT(table != nullptr); return Rdb_key_def::table_has_hidden_pk(table); } @@ -7437,9 +6773,8 @@ bool ha_rocksdb::has_hidden_pk(const TABLE* const table) const Returns true if given index number is a hidden_pk. - This is used when a table is created with no primary key. */ -bool ha_rocksdb::is_hidden_pk(const uint index, const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) -{ +bool ha_rocksdb::is_hidden_pk(const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(table_arg->s != nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); @@ -7449,22 +6784,19 @@ bool ha_rocksdb::is_hidden_pk(const uint index, const TABLE* const table_arg, } /* Returns index of primary key */ -uint ha_rocksdb::pk_index(const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) -{ +uint ha_rocksdb::pk_index(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(table_arg->s != nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); - return table_arg->s->primary_key == MAX_INDEXES ? - tbl_def_arg->m_key_count - 1 : - table_arg->s->primary_key; + return table_arg->s->primary_key == MAX_INDEXES ? tbl_def_arg->m_key_count - 1 + : table_arg->s->primary_key; } /* Returns true if given index number is a primary key */ -bool ha_rocksdb::is_pk(const uint index, const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) -{ +bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(table_arg->s != nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); @@ -7473,64 +6805,59 @@ bool ha_rocksdb::is_pk(const uint index, const TABLE* const table_arg, is_hidden_pk(index, table_arg, tbl_def_arg); } -const char* ha_rocksdb::get_key_name(const uint index, - const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) -{ +const char *ha_rocksdb::get_key_name(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { DBUG_ASSERT(table_arg != nullptr); - if (is_hidden_pk(index, table_arg, tbl_def_arg)) - { + if (is_hidden_pk(index, table_arg, tbl_def_arg)) { return HIDDEN_PK_NAME; } return table_arg->key_info[index].name; } -const char* ha_rocksdb::get_key_comment(const uint index, - const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) -{ +const char *ha_rocksdb::get_key_comment(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { DBUG_ASSERT(table_arg != nullptr); - if (is_hidden_pk(index, table_arg, tbl_def_arg)) - { + if (is_hidden_pk(index, table_arg, tbl_def_arg)) { return nullptr; } return table_arg->key_info[index].comment.str; } -int ha_rocksdb::write_row(uchar* const buf) -{ - DBUG_ENTER("ha_rocksdb::write_row"); +int ha_rocksdb::write_row(uchar *const buf) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(buf != nullptr); DBUG_ASSERT(buf == table->record[0]); DBUG_ASSERT(m_lock_rows == RDB_LOCK_WRITE); + ha_statistic_increment(&SSV::ha_write_count); /* Note: "buf == table->record[0]" is copied from innodb. I am not aware of any use cases where this condition is not true. */ - if (table->next_number_field && buf == table->record[0]) - { + if (table->next_number_field && buf == table->record[0]) { int err; - if ((err= update_auto_increment())) - { + if ((err = update_auto_increment())) { DBUG_RETURN(err); } } - const int rv= update_write_row(nullptr, buf, skip_unique_check()); + const int rv = update_write_row(nullptr, buf, skip_unique_check()); - if (rv == 0) - { + if (rv == 0) { stats.rows_inserted++; update_row_stats(ROWS_INSERTED); } + DBUG_RETURN(rv); } - /** Constructing m_last_rowkey (MyRocks key expression) from before_update|delete image (MySQL row expression). @@ -7540,87 +6867,76 @@ int ha_rocksdb::write_row(uchar* const buf) called without setting m_last_rowkey. This function sets m_last_rowkey for Read Free Replication. */ -void ha_rocksdb::set_last_rowkey(const uchar* const old_data) -{ - if (old_data && use_read_free_rpl()) - { - const int old_pk_size= m_pk_descr->pack_record( +void ha_rocksdb::set_last_rowkey(const uchar *const old_data) { + if (old_data && use_read_free_rpl()) { + const int old_pk_size = m_pk_descr->pack_record( table, m_pack_buffer, old_data, m_pk_packed_tuple, nullptr, false); - m_last_rowkey.copy((const char*)m_pk_packed_tuple, old_pk_size, + m_last_rowkey.copy((const char *)m_pk_packed_tuple, old_pk_size, &my_charset_bin); } } -int ha_rocksdb::get_pk_for_update(struct update_row_info* const row_info) -{ +int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { int size; /* Get new row key for any insert, and any update where the pk is not hidden. Row key for updates with hidden pk is handled below. */ - if (!has_hidden_pk(table)) - { - row_info->hidden_pk_id= 0; - - row_info->new_pk_unpack_info= &m_pk_unpack_info; - - size= m_pk_descr->pack_record(table, m_pack_buffer, row_info->new_data, - m_pk_packed_tuple, - row_info->new_pk_unpack_info, - false); - } - else if (row_info->old_data == nullptr) - { - row_info->hidden_pk_id= update_hidden_pk_val(); - size= m_pk_descr->pack_hidden_pk(row_info->hidden_pk_id, m_pk_packed_tuple); - } - else - { + if (!has_hidden_pk(table)) { + row_info->hidden_pk_id = 0; + + row_info->new_pk_unpack_info = &m_pk_unpack_info; + + size = m_pk_descr->pack_record(table, m_pack_buffer, row_info->new_data, + m_pk_packed_tuple, + row_info->new_pk_unpack_info, false); + } else if (row_info->old_data == nullptr) { + row_info->hidden_pk_id = update_hidden_pk_val(); + size = + m_pk_descr->pack_hidden_pk(row_info->hidden_pk_id, m_pk_packed_tuple); + } else { /* If hidden primary key, rowkey for new record will always be the same as before */ - size= row_info->old_pk_slice.size(); + size = row_info->old_pk_slice.size(); memcpy(m_pk_packed_tuple, row_info->old_pk_slice.data(), size); - if (read_hidden_pk_id_from_rowkey(&row_info->hidden_pk_id)) - { + if (read_hidden_pk_id_from_rowkey(&row_info->hidden_pk_id)) { return HA_ERR_INTERNAL_ERROR; } } - row_info->new_pk_slice= rocksdb::Slice((const char*)m_pk_packed_tuple, size); + row_info->new_pk_slice = + rocksdb::Slice((const char *)m_pk_packed_tuple, size); - return 0; + return HA_EXIT_SUCCESS; } int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id, - const struct update_row_info& row_info, - bool* const found, - bool* const pk_changed) -{ + const struct update_row_info &row_info, + bool *const found, + bool *const pk_changed) { DBUG_ASSERT(found != nullptr); DBUG_ASSERT(pk_changed != nullptr); - *pk_changed= false; + *pk_changed = false; /* For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs always require locking. */ - if (row_info.old_pk_slice.size() > 0) - { + if (row_info.old_pk_slice.size() > 0) { /* If the keys are the same, then no lock is needed */ if (!Rdb_pk_comparator::bytewise_compare(row_info.new_pk_slice, - row_info.old_pk_slice)) - { - *found= false; - return 0; + row_info.old_pk_slice)) { + *found = false; + return HA_EXIT_SUCCESS; } - *pk_changed= true; + *pk_changed = true; } /* @@ -7642,49 +6958,46 @@ int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id, 2) T1 Get(empty) -> T1 Put(insert, not committed yet) -> T2 Get(empty) -> T2 Put(insert, blocked) -> T1 commit -> T2 commit(overwrite) */ - const rocksdb::Status s= get_for_update(row_info.tx, m_pk_descr->get_cf(), - row_info.new_pk_slice, &m_retrieved_record); - if (!s.ok() && !s.IsNotFound()) - { + const rocksdb::Status s = + get_for_update(row_info.tx, m_pk_descr->get_cf(), row_info.new_pk_slice, + &m_retrieved_record); + if (!s.ok() && !s.IsNotFound()) { return row_info.tx->set_status_error(table->in_use, s, *m_key_descr_arr[key_id], m_tbl_def); } - *found= !s.IsNotFound(); - return 0; + *found = !s.IsNotFound(); + return HA_EXIT_SUCCESS; } int ha_rocksdb::check_and_lock_sk(const uint &key_id, - const struct update_row_info& row_info, - bool* const found) const -{ + const struct update_row_info &row_info, + bool *const found) const { DBUG_ASSERT(found != nullptr); - *found= false; + *found = false; /* Can skip checking this key if none of the key fields have changed. */ - if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) - { - return 0; + if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) { + return HA_EXIT_SUCCESS; } - KEY* key_info= nullptr; - uint n_null_fields= 0; - uint user_defined_key_parts= 1; + KEY *key_info = nullptr; + uint n_null_fields = 0; + uint user_defined_key_parts = 1; - key_info= &table->key_info[key_id]; - user_defined_key_parts= key_info->user_defined_key_parts; + key_info = &table->key_info[key_id]; + user_defined_key_parts = key_info->user_defined_key_parts; /* If there are no uniqueness requirements, there's no need to obtain a lock for this key. */ - if (!(key_info->flags & HA_NOSAME)) - { - return 0; + if (!(key_info->flags & HA_NOSAME)) { + return HA_EXIT_SUCCESS; } - const Rdb_key_def& kd= *m_key_descr_arr[key_id]; + const Rdb_key_def &kd = *m_key_descr_arr[key_id]; /* Calculate the new key for obtaining the lock @@ -7692,33 +7005,30 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, For unique secondary indexes, the key used for locking does not include the extended fields. */ - int size= kd.pack_record(table, m_pack_buffer, row_info.new_data, - m_sk_packed_tuple, nullptr, false, 0, - user_defined_key_parts, &n_null_fields); - if (n_null_fields > 0) - { + int size = + kd.pack_record(table, m_pack_buffer, row_info.new_data, m_sk_packed_tuple, + nullptr, false, 0, user_defined_key_parts, &n_null_fields); + if (n_null_fields > 0) { /* If any fields are marked as NULL this will never match another row as to NULL never matches anything else including another NULL. */ - return 0; + return HA_EXIT_SUCCESS; } - const rocksdb::Slice new_slice= rocksdb::Slice((const char*)m_sk_packed_tuple, - size); + const rocksdb::Slice new_slice = + rocksdb::Slice((const char *)m_sk_packed_tuple, size); /* For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs always require locking. */ - if (row_info.old_data != nullptr) - { - size= kd.pack_record(table, m_pack_buffer, row_info.old_data, + if (row_info.old_data != nullptr) { + size = kd.pack_record(table, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old, nullptr, false, - row_info.hidden_pk_id, - user_defined_key_parts); - const rocksdb::Slice old_slice= rocksdb::Slice( - (const char*)m_sk_packed_tuple_old, size); + row_info.hidden_pk_id, user_defined_key_parts); + const rocksdb::Slice old_slice = + rocksdb::Slice((const char *)m_sk_packed_tuple_old, size); /* For updates, if the keys are the same, then no lock is needed @@ -7727,9 +7037,8 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, this key is unique since NULL is not equal to each other, so no lock is needed. */ - if (!Rdb_pk_comparator::bytewise_compare(new_slice, old_slice)) - { - return 0; + if (!Rdb_pk_comparator::bytewise_compare(new_slice, old_slice)) { + return HA_EXIT_SUCCESS; } } @@ -7741,7 +7050,7 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, the latest committed data. */ - const bool all_parts_used= (user_defined_key_parts == kd.get_key_parts()); + const bool all_parts_used = (user_defined_key_parts == kd.get_key_parts()); /* This iterator seems expensive since we need to allocate and free @@ -7754,27 +7063,23 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, The bloom filter may need to be disabled for this lookup. */ - const bool total_order_seek= - !can_use_bloom_filter(ha_thd(), kd, new_slice, all_parts_used, - is_ascending(*m_key_descr_arr[key_id], - HA_READ_KEY_EXACT)); - const bool fill_cache= !THDVAR(ha_thd(), skip_fill_cache); + const bool total_order_seek = !can_use_bloom_filter( + ha_thd(), kd, new_slice, all_parts_used, + is_ascending(*m_key_descr_arr[key_id], HA_READ_KEY_EXACT)); + const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache); /* psergey-todo: we just need to take lock, lookups not needed: */ std::string dummy_value; - const rocksdb::Status s= get_for_update(row_info.tx, kd.get_cf(), new_slice, - &dummy_value); - if (!s.ok() && !s.IsNotFound()) - { + const rocksdb::Status s = + get_for_update(row_info.tx, kd.get_cf(), new_slice, &dummy_value); + if (!s.ok() && !s.IsNotFound()) { return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def); } - rocksdb::Iterator* const iter= row_info.tx->get_iterator(kd.get_cf(), - total_order_seek, - fill_cache, - true /* read current data */, + rocksdb::Iterator *const iter = row_info.tx->get_iterator( + kd.get_cf(), total_order_seek, fill_cache, true /* read current data */, false /* acquire snapshot */); /* Need to scan the transaction to see if there is a duplicate key. @@ -7782,92 +7087,104 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, in the transaction. */ iter->Seek(new_slice); - *found= !read_key_exact(kd, iter, all_parts_used, new_slice); + *found = !read_key_exact(kd, iter, all_parts_used, new_slice); delete iter; - return 0; + return HA_EXIT_SUCCESS; } int ha_rocksdb::check_uniqueness_and_lock( - const struct update_row_info& row_info, - bool* const pk_changed) -{ + const struct update_row_info &row_info, bool *const pk_changed) { /* Go through each index and determine if the index has uniqueness requirements. If it does, then try to obtain a row lock on the new values. Once all locks have been obtained, then perform the changes needed to update/insert the row. */ - for (uint key_id= 0; key_id < m_tbl_def->m_key_count; key_id++) - { + for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) { bool found; - int rc; + int rc; - if (is_pk(key_id, table, m_tbl_def)) - { - rc= check_and_lock_unique_pk(key_id, row_info, &found, pk_changed); - } - else - { - rc= check_and_lock_sk(key_id, row_info, &found); + if (is_pk(key_id, table, m_tbl_def)) { + rc = check_and_lock_unique_pk(key_id, row_info, &found, pk_changed); + } else { + rc = check_and_lock_sk(key_id, row_info, &found); } - if (rc != 0) - { + if (rc != 0) { return rc; } - if (found) - { + if (found) { /* There is a row with this key already, so error out. */ - errkey= key_id; - m_dupp_errkey= errkey; + errkey = key_id; + m_dupp_errkey = errkey; return HA_ERR_FOUND_DUPP_KEY; } } + return HA_EXIT_SUCCESS; +} + +int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg, + const Rdb_key_def &index, + const rocksdb::Slice *key, + struct unique_sk_buf_info *sk_info) { + uint n_null_fields = 0; + const rocksdb::Comparator *index_comp = index.get_cf()->GetComparator(); + + /* Get proper SK buffer. */ + uchar *sk_buf = sk_info->swap_and_get_sk_buf(); + + /* Get memcmp form of sk without extended pk tail */ + uint sk_memcmp_size = + index.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields); + + sk_info->sk_memcmp_key = + rocksdb::Slice(reinterpret_cast<char *>(sk_buf), sk_memcmp_size); + + if (sk_info->sk_memcmp_key_old.size() > 0 && n_null_fields == 0 && + index_comp->Compare(sk_info->sk_memcmp_key, sk_info->sk_memcmp_key_old) == + 0) { + return 1; + } + + sk_info->sk_memcmp_key_old = sk_info->sk_memcmp_key; return 0; } -int ha_rocksdb::bulk_load_key(Rdb_transaction* const tx, - const Rdb_key_def& kd, - const rocksdb::Slice& key, - const rocksdb::Slice& value) -{ - rocksdb::ColumnFamilyHandle* const cf= kd.get_cf(); +int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, + const rocksdb::Slice &key, + const rocksdb::Slice &value) { + rocksdb::ColumnFamilyHandle *const cf = kd.get_cf(); DBUG_ASSERT(cf != nullptr); - if (m_sst_info == nullptr) - { - m_sst_info= - std::make_shared<Rdb_sst_info>(rdb, m_table_handler->m_table_name, - kd.get_name(), cf, rocksdb_db_options, - THDVAR(ha_thd(), trace_sst_api)); + if (m_sst_info == nullptr) { + m_sst_info = std::make_shared<Rdb_sst_info>( + rdb, m_table_handler->m_table_name, kd.get_name(), cf, + rocksdb_db_options, THDVAR(ha_thd(), trace_sst_api)); tx->start_bulk_load(this); - m_bulk_load_tx= tx; + m_bulk_load_tx = tx; } DBUG_ASSERT(m_sst_info != nullptr); - int rc= m_sst_info->put(key, value); - if (rc != 0) - { + int rc = m_sst_info->put(key, value); + if (rc != 0) { my_printf_error(ER_UNKNOWN_ERROR, "Failed to add a key to sst file writer(%s)", MYF(0), m_sst_info->error_message().c_str()); - rc= HA_ERR_INTERNAL_ERROR; + rc = HA_ERR_INTERNAL_ERROR; } return rc; } -int ha_rocksdb::finalize_bulk_load() -{ - int rc= 0; +int ha_rocksdb::finalize_bulk_load() { + int rc = 0; /* Skip if there are no possible ongoing bulk loads */ - if (m_sst_info == nullptr && m_bulk_load_tx == nullptr) - { + if (m_sst_info == nullptr && m_bulk_load_tx == nullptr) { return rc; } @@ -7878,99 +7195,86 @@ int ha_rocksdb::finalize_bulk_load() flushed and cleared by another thread by the time the mutex has been acquired. */ - if (m_sst_info != nullptr) - { - rc= m_sst_info->commit(); - if (rc != 0) - { + if (m_sst_info != nullptr) { + rc = m_sst_info->commit(); + if (rc != 0) { + /* + Log the error immediately here in case the server crashes before + mysql prints via my_printf_error. + */ + sql_print_error("Failed to commit bulk loaded sst file to the " + "data store (%s)", m_sst_info->error_message().c_str()); + my_printf_error(ER_UNKNOWN_ERROR, "Failed to commit bulk loaded sst file to the " "data store (%s)", MYF(0), m_sst_info->error_message().c_str()); - rc= HA_ERR_INTERNAL_ERROR; + rc = HA_ERR_INTERNAL_ERROR; } - m_sst_info= nullptr; + m_sst_info = nullptr; m_bulk_load_tx->end_bulk_load(this); - m_bulk_load_tx= nullptr; + m_bulk_load_tx = nullptr; } mysql_mutex_unlock(&m_bulk_load_mutex); return rc; } -int ha_rocksdb::update_pk(const Rdb_key_def& kd, - const struct update_row_info& row_info, - const bool &pk_changed) -{ - const uint key_id= kd.get_keyno(); - const bool hidden_pk= is_hidden_pk(key_id, table, m_tbl_def); - if (!hidden_pk && pk_changed) - { +int ha_rocksdb::update_pk(const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool &pk_changed) { + const uint key_id = kd.get_keyno(); + const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def); + if (!hidden_pk && pk_changed) { /* The old key needs to be deleted. */ - const rocksdb::Status s= delete_or_singledelete( - key_id, row_info.tx, kd.get_cf(), - row_info.old_pk_slice); - if (!s.ok()) - { + const rocksdb::Status s = delete_or_singledelete( + key_id, row_info.tx, kd.get_cf(), row_info.old_pk_slice); + if (!s.ok()) { return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def); } } - if (table->next_number_field) - { + if (table->next_number_field) { update_auto_incr_val(); } rocksdb::Slice value_slice; convert_record_to_storage_format(row_info.new_pk_slice, - row_info.new_pk_unpack_info, - &value_slice); + row_info.new_pk_unpack_info, &value_slice); - int rc= 0; - const auto cf= m_pk_descr->get_cf(); + int rc = 0; + const auto cf = m_pk_descr->get_cf(); if (rocksdb_enable_bulk_load_api && THDVAR(table->in_use, bulk_load) && - !hidden_pk) - { + !hidden_pk) { /* Write the primary key directly to an SST file using an SstFileWriter */ - rc= bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, - value_slice); - } - else if (row_info.skip_unique_check) - { + rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice); + } else if (row_info.skip_unique_check) { /* It is responsibility of the user to make sure that the data being inserted doesn't violate any unique keys. */ row_info.tx->get_blind_write_batch()->Put(cf, row_info.new_pk_slice, value_slice); - } - else if (row_info.tx->m_ddl_transaction) - { + } else if (row_info.tx->m_ddl_transaction) { /* DDL statement must check for unique key conflicts. For example: ALTER TABLE tbl DROP PRIMARY KEY, ADD PRIMARY KEY(non_unique_column) */ row_info.tx->get_indexed_write_batch()->Put(cf, row_info.new_pk_slice, value_slice); - } - else - { - const auto s= row_info.tx->put(cf, row_info.new_pk_slice, value_slice); - if (!s.ok()) - { - if (s.IsBusy()) - { - errkey= table->s->primary_key; - m_dupp_errkey= errkey; + } else { + const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice); + if (!s.ok()) { + if (s.IsBusy()) { + errkey = table->s->primary_key; + m_dupp_errkey = errkey; rc = HA_ERR_FOUND_DUPP_KEY; - } - else - { + } else { rc = row_info.tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def); } @@ -7980,10 +7284,8 @@ int ha_rocksdb::update_pk(const Rdb_key_def& kd, return rc; } -int ha_rocksdb::update_sk(const TABLE* const table_arg, - const Rdb_key_def& kd, - const struct update_row_info& row_info) -{ +int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, + const struct update_row_info &row_info) { int new_packed_size; int old_packed_size; @@ -7995,26 +7297,21 @@ int ha_rocksdb::update_sk(const TABLE* const table_arg, /* Can skip updating this key if none of the key fields have changed. */ - if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) - { - return 0; + if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) { + return HA_EXIT_SUCCESS; } - const bool store_row_debug_checksums= should_store_row_debug_checksums(); + const bool store_row_debug_checksums = should_store_row_debug_checksums(); - new_packed_size= kd.pack_record(table_arg, m_pack_buffer, row_info.new_data, - m_sk_packed_tuple, &m_sk_tails, - store_row_debug_checksums, - row_info.hidden_pk_id); + new_packed_size = kd.pack_record( + table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple, + &m_sk_tails, store_row_debug_checksums, row_info.hidden_pk_id); - if (row_info.old_data != nullptr) - { + if (row_info.old_data != nullptr) { // The old value - old_packed_size= kd.pack_record(table_arg, m_pack_buffer, - row_info.old_data, - m_sk_packed_tuple_old, &m_sk_tails_old, - store_row_debug_checksums, - row_info.hidden_pk_id); + old_packed_size = kd.pack_record( + table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old, + &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id); /* Check if we are going to write the same value. This can happen when @@ -8032,12 +7329,11 @@ int ha_rocksdb::update_sk(const TABLE* const table_arg, */ if (old_packed_size == new_packed_size && m_sk_tails_old.get_current_pos() == m_sk_tails.get_current_pos() && - memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, - old_packed_size) == 0 && + memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, old_packed_size) == + 0 && memcmp(m_sk_tails_old.ptr(), m_sk_tails.ptr(), - m_sk_tails.get_current_pos()) == 0) - { - return 0; + m_sk_tails.get_current_pos()) == 0) { + return HA_EXIT_SUCCESS; } /* @@ -8046,9 +7342,8 @@ int ha_rocksdb::update_sk(const TABLE* const table_arg, (also note that DDL statements do not delete rows, so this is not a DDL statement) */ - old_key_slice= rocksdb::Slice( - reinterpret_cast<const char*>(m_sk_packed_tuple_old), - old_packed_size); + old_key_slice = rocksdb::Slice( + reinterpret_cast<const char *>(m_sk_packed_tuple_old), old_packed_size); row_info.tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(), old_key_slice); @@ -8065,74 +7360,63 @@ int ha_rocksdb::update_sk(const TABLE* const table_arg, rocksdb::WriteBatchBase *write_batch; if ((row_info.tx->m_ddl_transaction && !(table_arg->key_info[key_id].flags & HA_NOSAME)) || - row_info.skip_unique_check) - { - write_batch= row_info.tx->get_blind_write_batch(); - } - else - { - write_batch= row_info.tx->get_indexed_write_batch(); + row_info.skip_unique_check) { + write_batch = row_info.tx->get_blind_write_batch(); + } else { + write_batch = row_info.tx->get_indexed_write_batch(); } - new_key_slice= rocksdb::Slice( - reinterpret_cast<const char*>(m_sk_packed_tuple), new_packed_size); - new_value_slice= rocksdb::Slice( - reinterpret_cast<const char*>(m_sk_tails.ptr()), - m_sk_tails.get_current_pos()); + new_key_slice = rocksdb::Slice( + reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size); + new_value_slice = + rocksdb::Slice(reinterpret_cast<const char *>(m_sk_tails.ptr()), + m_sk_tails.get_current_pos()); write_batch->Put(kd.get_cf(), new_key_slice, new_value_slice); - return 0; + return HA_EXIT_SUCCESS; } -int ha_rocksdb::update_indexes(const struct update_row_info& row_info, - const bool &pk_changed) -{ +int ha_rocksdb::update_indexes(const struct update_row_info &row_info, + const bool &pk_changed) { int rc; - for (uint key_id= 0; key_id < m_tbl_def->m_key_count; key_id++) - { - const Rdb_key_def& kd= *m_key_descr_arr[key_id]; - if (is_pk(key_id, table, m_tbl_def)) - { - rc= update_pk(kd, row_info, pk_changed); - } - else - { - rc= update_sk(table, kd, row_info); + for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) { + const Rdb_key_def &kd = *m_key_descr_arr[key_id]; + if (is_pk(key_id, table, m_tbl_def)) { + rc = update_pk(kd, row_info, pk_changed); + } else { + rc = update_sk(table, kd, row_info); } - if (rc != 0) - { + if (rc != 0) { return rc; } } - return 0; + return HA_EXIT_SUCCESS; } -int ha_rocksdb::update_write_row(const uchar* const old_data, - const uchar* const new_data, - const bool skip_unique_check) -{ - bool pk_changed= false; - struct update_row_info row_info; +int ha_rocksdb::update_write_row(const uchar *const old_data, + const uchar *const new_data, + const bool skip_unique_check) { + DBUG_ENTER_FUNC(); - row_info.old_data= old_data; - row_info.new_data= new_data; - row_info.skip_unique_check= skip_unique_check; - row_info.new_pk_unpack_info= nullptr; + bool pk_changed = false; + struct update_row_info row_info; - DBUG_ENTER("ha_rocksdb::write_row_common"); + row_info.old_data = old_data; + row_info.new_data = new_data; + row_info.skip_unique_check = skip_unique_check; + row_info.new_pk_unpack_info = nullptr; set_last_rowkey(old_data); - row_info.tx= get_or_create_tx(table->in_use); + row_info.tx = get_or_create_tx(table->in_use); - if (old_data != nullptr) - { - row_info.old_pk_slice= rocksdb::Slice(m_last_rowkey.ptr(), - m_last_rowkey.length()); + if (old_data != nullptr) { + row_info.old_pk_slice = + rocksdb::Slice(m_last_rowkey.ptr(), m_last_rowkey.length()); /* Determine which indexes need updating. */ calc_updated_indexes(); @@ -8141,21 +7425,18 @@ int ha_rocksdb::update_write_row(const uchar* const old_data, /* Get the new row key into row_info.new_pk_slice */ - int rc= get_pk_for_update(&row_info); - if (rc != 0) - { + int rc = get_pk_for_update(&row_info); + if (rc != 0) { DBUG_RETURN(rc); } - if (!skip_unique_check) - { + if (!skip_unique_check) { /* Check to see if we are going to have failures because of unique keys. Also lock the appropriate key values. */ - rc= check_uniqueness_and_lock(row_info, &pk_changed); - if (rc != 0) - { + rc = check_uniqueness_and_lock(row_info, &pk_changed); + if (rc != 0) { DBUG_RETURN(rc); } } @@ -8168,59 +7449,57 @@ int ha_rocksdb::update_write_row(const uchar* const old_data, here because updates to the transaction will be made and those updates cannot be easily removed without rolling back the entire transaction. */ - rc= update_indexes(row_info, pk_changed); - if (rc != 0) - { + rc = update_indexes(row_info, pk_changed); + if (rc != 0) { DBUG_RETURN(rc); } - if (do_bulk_commit(row_info.tx)) - { + if (do_bulk_commit(row_info.tx)) { DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } - /* Open a cursor and position it at the passed record */ -void ha_rocksdb::setup_scan_iterator( - const Rdb_key_def& kd, - rocksdb::Slice* const slice, - const bool use_all_keys, - const bool is_ascending, - const uint eq_cond_len) -{ +void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, + rocksdb::Slice *const slice, + const bool use_all_keys, + const bool is_ascending, + const uint eq_cond_len) { DBUG_ASSERT(slice != nullptr); DBUG_ASSERT(slice->size() >= eq_cond_len); - Rdb_transaction* const tx= get_or_create_tx(table->in_use); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); - bool skip_bloom= true; + bool skip_bloom = true; const rocksdb::Slice eq_cond(slice->data(), eq_cond_len); - if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys, is_ascending)) - { - skip_bloom= false; + if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys, is_ascending)) { + skip_bloom = false; } /* In some cases, setup_scan_iterator() is called multiple times from the same query but bloom filter can not always be used. - Suppose the following query example. id2 is VARCHAR(30) and PRIMARY KEY (id1, id2). - select count(*) from t2 WHERE id1=100 and id2 IN ('00000000000000000000', '100'); + Suppose the following query example. id2 is VARCHAR(30) and PRIMARY KEY + (id1, id2). + select count(*) from t2 WHERE id1=100 and id2 IN ('00000000000000000000', + '100'); In this case, setup_scan_iterator() is called twice, the first time is for - (id1, id2)=(100, '00000000000000000000') and the second time is for (100, '100'). - If prefix bloom filter length is 24 bytes, prefix bloom filter can be used for the + (id1, id2)=(100, '00000000000000000000') and the second time is for (100, + '100'). + If prefix bloom filter length is 24 bytes, prefix bloom filter can be used + for the first condition but not for the second condition. - If bloom filter condition is changed, currently it is necessary to destroy and + If bloom filter condition is changed, currently it is necessary to destroy + and re-create Iterator. */ - if (m_scan_it_skips_bloom != skip_bloom) - { + if (m_scan_it_skips_bloom != skip_bloom) { release_scan_iterator(); } @@ -8228,46 +7507,40 @@ void ha_rocksdb::setup_scan_iterator( SQL layer can call rnd_init() multiple times in a row. In that case, re-use the iterator, but re-position it at the table start. */ - if (!m_scan_it) - { - const bool fill_cache= !THDVAR(ha_thd(), skip_fill_cache); - if (commit_in_the_middle()) - { + if (!m_scan_it) { + const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache); + if (commit_in_the_middle()) { DBUG_ASSERT(m_scan_it_snapshot == nullptr); - m_scan_it_snapshot= rdb->GetSnapshot(); + m_scan_it_snapshot = rdb->GetSnapshot(); auto read_opts = rocksdb::ReadOptions(); read_opts.total_order_seek = true; // TODO: set based on WHERE conditions - read_opts.snapshot= m_scan_it_snapshot; - m_scan_it= rdb->NewIterator(read_opts, kd.get_cf()); - } - else - { - m_scan_it= tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache); + read_opts.snapshot = m_scan_it_snapshot; + m_scan_it = rdb->NewIterator(read_opts, kd.get_cf()); + } else { + m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache); } - m_scan_it_skips_bloom= skip_bloom; + m_scan_it_skips_bloom = skip_bloom; } /* - Seek() will "Position at the first key in the source that at or past target". + Seek() will "Position at the first key in the source that at or past + target". The operation cannot fail. */ m_scan_it->Seek(*slice); } -void ha_rocksdb::release_scan_iterator() -{ +void ha_rocksdb::release_scan_iterator() { delete m_scan_it; - m_scan_it= nullptr; + m_scan_it = nullptr; - if (m_scan_it_snapshot) - { + if (m_scan_it_snapshot) { rdb->ReleaseSnapshot(m_scan_it_snapshot); - m_scan_it_snapshot= nullptr; + m_scan_it_snapshot = nullptr; } } -void ha_rocksdb::setup_iterator_for_rnd_scan() -{ +void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; if (m_pk_descr->m_is_reverse_cf) @@ -8275,27 +7548,22 @@ void ha_rocksdb::setup_iterator_for_rnd_scan() else m_pk_descr->get_infimum_key(m_pk_packed_tuple, &key_size); - rocksdb::Slice table_key((const char*)m_pk_packed_tuple, key_size); + rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); setup_scan_iterator(*m_pk_descr, &table_key); - m_skip_scan_it_next_call= true; + m_skip_scan_it_next_call = true; } +int ha_rocksdb::rnd_init(bool scan) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::rnd_init(bool scan) -{ - DBUG_ENTER("ha_rocksdb::rnd_init"); - - Rdb_transaction* const tx= get_or_create_tx(table->in_use); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); setup_read_decoders(); - if (scan) - { - m_rnd_scan_is_new_snapshot= !tx->has_snapshot(); + if (scan) { + m_rnd_scan_is_new_snapshot = !tx->has_snapshot(); setup_iterator_for_rnd_scan(); - } - else - { + } else { /* We don't need any preparations for rnd_pos() calls. */ } @@ -8304,48 +7572,46 @@ int ha_rocksdb::rnd_init(bool scan) // the snapshot immediately. tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } +int ha_rocksdb::rnd_next(uchar *const buf) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::rnd_next(uchar* const buf) -{ int rc; ha_statistic_increment(&SSV::ha_read_rnd_next_count); - for (;;) - { - rc= rnd_next_with_direction(buf, true); + for (;;) { + rc = rnd_next_with_direction(buf, true); if (rc != HA_ERR_LOCK_DEADLOCK || !m_rnd_scan_is_new_snapshot) - break; /* exit the loop */ + break; /* exit the loop */ // release the snapshot and iterator and then regenerate them - Rdb_transaction *tx= get_or_create_tx(table->in_use); + Rdb_transaction *tx = get_or_create_tx(table->in_use); tx->release_snapshot(); release_scan_iterator(); setup_iterator_for_rnd_scan(); } - m_rnd_scan_is_new_snapshot= false; + m_rnd_scan_is_new_snapshot = false; if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - return rc; -} + DBUG_RETURN(rc); +} /* See also secondary_index_read(). */ -int ha_rocksdb::rnd_next_with_direction(uchar* const buf, bool move_forward) -{ +int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { + DBUG_ENTER_FUNC(); + int rc; - DBUG_ENTER("ha_rocksdb::rnd_next"); - table->status= STATUS_NOT_FOUND; + table->status = STATUS_NOT_FOUND; stats.rows_requested++; - if (!m_scan_it || !m_scan_it->Valid()) - { + if (!m_scan_it || !m_scan_it->Valid()) { /* We can get here when SQL layer has called @@ -8357,49 +7623,41 @@ int ha_rocksdb::rnd_next_with_direction(uchar* const buf, bool move_forward) DBUG_RETURN(HA_ERR_END_OF_FILE); } - for ( ; ; ) - { - if (m_skip_scan_it_next_call) - { - m_skip_scan_it_next_call= false; - } - else - { + for (;;) { + if (m_skip_scan_it_next_call) { + m_skip_scan_it_next_call = false; + } else { if (move_forward) - m_scan_it->Next(); /* this call cannot fail */ + m_scan_it->Next(); /* this call cannot fail */ else - m_scan_it->Prev(); /* this call cannot fail */ + m_scan_it->Prev(); /* this call cannot fail */ } - if (!m_scan_it->Valid()) - { - rc= HA_ERR_END_OF_FILE; + if (!m_scan_it->Valid()) { + rc = HA_ERR_END_OF_FILE; break; } /* check if we're out of this table */ - const rocksdb::Slice key= m_scan_it->key(); - if (!m_pk_descr->covers_key(key)) - { - rc= HA_ERR_END_OF_FILE; + const rocksdb::Slice key = m_scan_it->key(); + if (!m_pk_descr->covers_key(key)) { + rc = HA_ERR_END_OF_FILE; break; } - if (m_lock_rows != RDB_LOCK_NONE) - { + if (m_lock_rows != RDB_LOCK_NONE) { /* Lock the row we've just read. Now we call get_for_update which will 1) Take a lock and 2) Will fail if the row was deleted since the snapshot was taken. */ - Rdb_transaction* const tx= get_or_create_tx(table->in_use); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete"); - const rocksdb::Status s= get_for_update(tx, m_pk_descr->get_cf(), key, - &m_retrieved_record); + const rocksdb::Status s = + get_for_update(tx, m_pk_descr->get_cf(), key, &m_retrieved_record); if (s.IsNotFound() && - my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) - { + my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) { // This occurs if we accessed a row, tried to lock it, failed, // released and reacquired the snapshot (because of READ COMMITTED // mode) and the row was deleted by someone else in the meantime. @@ -8407,27 +7665,24 @@ int ha_rocksdb::rnd_next_with_direction(uchar* const buf, bool move_forward) continue; } - if (!s.ok()) - { - DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, - m_tbl_def)); + if (!s.ok()) { + DBUG_RETURN( + tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def)); } // If we called get_for_update() use the value from that call not from // the iterator as it may be stale since we don't have a snapshot // when m_lock_rows is not RDB_LOCK_NONE. m_last_rowkey.copy(key.data(), key.size(), &my_charset_bin); - rc= convert_record_from_storage_format(&key, buf); - } - else - { + rc = convert_record_from_storage_format(&key, buf); + } else { // Use the value from the iterator - rocksdb::Slice value= m_scan_it->value(); + rocksdb::Slice value = m_scan_it->value(); m_last_rowkey.copy(key.data(), key.size(), &my_charset_bin); - rc= convert_record_from_storage_format(&key, &value, buf); + rc = convert_record_from_storage_format(&key, &value, buf); } - table->status= 0; + table->status = 0; break; } @@ -8440,19 +7695,18 @@ int ha_rocksdb::rnd_next_with_direction(uchar* const buf, bool move_forward) DBUG_RETURN(rc); } - -int ha_rocksdb::rnd_end() -{ - DBUG_ENTER("ha_rocksdb::rnd_end"); +int ha_rocksdb::rnd_end() { + DBUG_ENTER_FUNC(); release_scan_iterator(); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } -int ha_rocksdb::index_init(uint idx, bool sorted) -{ - Rdb_transaction* const tx= get_or_create_tx(table->in_use); +int ha_rocksdb::index_init(uint idx, bool sorted) { + DBUG_ENTER_FUNC(); + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); DBUG_ASSERT(tx != nullptr); setup_read_decoders(); @@ -8462,39 +7716,34 @@ int ha_rocksdb::index_init(uint idx, bool sorted) // Otherwise acquire the snapshot immediately. tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE); - active_index= idx; + active_index = idx; - return 0; + DBUG_RETURN(HA_EXIT_SUCCESS); } -int ha_rocksdb::index_end() -{ +int ha_rocksdb::index_end() { + DBUG_ENTER_FUNC(); + release_scan_iterator(); - active_index= MAX_KEY; - in_range_check_pushed_down= FALSE; - return 0; + active_index = MAX_KEY; + in_range_check_pushed_down = FALSE; + + DBUG_RETURN(HA_EXIT_SUCCESS); } +int ha_rocksdb::truncate() { + DBUG_ENTER_FUNC(); -int ha_rocksdb::delete_all_rows(ha_rows* const nrows) -{ - DBUG_ENTER("ha_rocksdb::delete_all_rows"); - /* - We will have to delete records one-by-one anyway. Let the SQL layer do - it. - */ - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} + DBUG_ASSERT(m_tbl_def != nullptr); + + m_tbl_def->m_auto_incr_val = 1; + m_tbl_def->m_hidden_pk_val = 1; -int ha_rocksdb::truncate() -{ - m_tbl_def->m_auto_incr_val= 1; - m_tbl_def->m_hidden_pk_val= 1; remove_rows(m_tbl_def); - return 0; -} + DBUG_RETURN(HA_EXIT_SUCCESS); +} /* Delete the row we've last read. The row is also passed as parameter. @@ -8504,108 +7753,95 @@ int ha_rocksdb::truncate() The row is either table->record[0] or table->record[1]. (Check out InnoDB: row_update_for_mysql() has "UT_NOT_USED(mysql_rec)" */ -int ha_rocksdb::delete_row(const uchar* const buf) -{ - DBUG_ENTER("ha_rocksdb::delete_row"); +int ha_rocksdb::delete_row(const uchar *const buf) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(buf != nullptr); ha_statistic_increment(&SSV::ha_delete_count); set_last_rowkey(buf); rocksdb::Slice key_slice(m_last_rowkey.ptr(), m_last_rowkey.length()); - Rdb_transaction* const tx= get_or_create_tx(table->in_use); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); - const uint index= pk_index(table, m_tbl_def); - rocksdb::Status s= delete_or_singledelete(index, tx, - m_pk_descr->get_cf(), - key_slice); - if (!s.ok()) - { + const uint index = pk_index(table, m_tbl_def); + rocksdb::Status s = + delete_or_singledelete(index, tx, m_pk_descr->get_cf(), key_slice); + if (!s.ok()) { DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def)); } - longlong hidden_pk_id= 0; - if (m_tbl_def->m_key_count > 1 && - has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) + longlong hidden_pk_id = 0; + if (m_tbl_def->m_key_count > 1 && has_hidden_pk(table) && + read_hidden_pk_id_from_rowkey(&hidden_pk_id)) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); // Delete the record for every secondary index - for (uint i= 0; i < m_tbl_def->m_key_count; i++) - { - if (!is_pk(i, table, m_tbl_def)) - { + for (uint i = 0; i < m_tbl_def->m_key_count; i++) { + if (!is_pk(i, table, m_tbl_def)) { int packed_size; - const Rdb_key_def& kd= *m_key_descr_arr[i]; - packed_size= kd.pack_record(table, m_pack_buffer, buf, m_sk_packed_tuple, + const Rdb_key_def &kd = *m_key_descr_arr[i]; + packed_size = kd.pack_record(table, m_pack_buffer, buf, m_sk_packed_tuple, nullptr, false, hidden_pk_id); rocksdb::Slice secondary_key_slice( - reinterpret_cast<const char*>(m_sk_packed_tuple), packed_size); + reinterpret_cast<const char *>(m_sk_packed_tuple), packed_size); /* Deleting on secondary key doesn't need any locks: */ tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(), secondary_key_slice); } } - if (do_bulk_commit(tx)) - { + if (do_bulk_commit(tx)) { DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } stats.rows_deleted++; update_row_stats(ROWS_DELETED); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } - -rocksdb::Status -ha_rocksdb::delete_or_singledelete(uint index, - Rdb_transaction* const tx, - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key) -{ +rocksdb::Status ha_rocksdb::delete_or_singledelete( + uint index, Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) { if (can_use_single_delete(index)) return tx->single_delete(column_family, key); return tx->delete_key(column_family, key); } +void ha_rocksdb::update_stats(void) { + DBUG_ENTER_FUNC(); -void ha_rocksdb::update_stats(void) -{ - DBUG_ENTER("ha_rocksdb::update_stats"); + stats.records = 0; + stats.index_file_length = 0ul; + stats.data_file_length = 0ul; + stats.mean_rec_length = 0; - stats.records= 0; - stats.index_file_length= 0ul; - stats.data_file_length= 0ul; - stats.mean_rec_length= 0; - for (uint i= 0; i < m_tbl_def->m_key_count; i++) - { - if (is_pk(i, table, m_tbl_def)) - { - stats.data_file_length= m_pk_descr->m_stats.m_actual_disk_size; + for (uint i = 0; i < m_tbl_def->m_key_count; i++) { + if (is_pk(i, table, m_tbl_def)) { + stats.data_file_length = m_pk_descr->m_stats.m_actual_disk_size; stats.records = m_pk_descr->m_stats.m_rows; - } - else - { - stats.index_file_length+= m_key_descr_arr[i]->m_stats.m_actual_disk_size; + } else { + stats.index_file_length += m_key_descr_arr[i]->m_stats.m_actual_disk_size; } } + DBUG_VOID_RETURN; } -int ha_rocksdb::info(uint flag) -{ - DBUG_ENTER("ha_rocksdb::info"); +int ha_rocksdb::info(uint flag) { + DBUG_ENTER_FUNC(); if (!table) - return 1; + return HA_EXIT_FAILURE; - if (flag & HA_STATUS_VARIABLE) - { + if (flag & HA_STATUS_VARIABLE) { /* Test only to simulate corrupted stats */ DBUG_EXECUTE_IF("myrocks_simulate_negative_stats", - m_pk_descr->m_stats.m_actual_disk_size = - -m_pk_descr->m_stats.m_actual_disk_size;); + m_pk_descr->m_stats.m_actual_disk_size = + -m_pk_descr->m_stats.m_actual_disk_size;); update_stats(); @@ -8615,27 +7851,27 @@ int ha_rocksdb::info(uint flag) */ if (static_cast<longlong>(stats.data_file_length) < 0 || static_cast<longlong>(stats.index_file_length) < 0 || - static_cast<longlong>(stats.records) < 0) - { + static_cast<longlong>(stats.records) < 0) { if (analyze(nullptr, nullptr)) { - DBUG_RETURN(1); + DBUG_RETURN(HA_EXIT_FAILURE); } update_stats(); } - if (stats.records == 0) - { + if (stats.records == 0) { // most likely, the table is in memtable // try to deduce from GetApproximateSizes - uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE*2]; - auto r= get_range(pk_index(table, m_tbl_def), buf); - uint64_t sz= 0; - rdb->GetApproximateSizes( - m_pk_descr->get_cf(), - &r, 1, - &sz, true); - stats.records = sz/ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + auto r = get_range(pk_index(table, m_tbl_def), buf); + uint64_t sz = 0; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + rdb->GetApproximateSizes(m_pk_descr->get_cf(), &r, 1, &sz, true); +#pragma GCC diagnostic pop + + stats.records = sz / ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; stats.data_file_length = sz; if (rocksdb_debug_optimizer_n_rows > 0) @@ -8645,66 +7881,59 @@ int ha_rocksdb::info(uint flag) if (stats.records != 0) stats.mean_rec_length = stats.data_file_length / stats.records; } - if (flag & HA_STATUS_CONST) - { - ref_length= m_pk_descr->max_storage_fmt_length(); + if (flag & HA_STATUS_CONST) { + ref_length = m_pk_descr->max_storage_fmt_length(); // TODO: Needs to reimplement after having real index statistics - for (uint i= 0; i < m_tbl_def->m_key_count; i++) - { - if (is_hidden_pk(i, table, m_tbl_def)) - { + for (uint i = 0; i < m_tbl_def->m_key_count; i++) { + if (is_hidden_pk(i, table, m_tbl_def)) { continue; } - KEY* const k= &table->key_info[i]; - for (uint j = 0; j < k->actual_key_parts; j++) - { - const Rdb_index_stats& k_stats= m_key_descr_arr[i]->m_stats; + KEY *const k = &table->key_info[i]; + for (uint j = 0; j < k->actual_key_parts; j++) { + const Rdb_index_stats &k_stats = m_key_descr_arr[i]->m_stats; uint x = k_stats.m_distinct_keys_per_prefix.size() > j && - k_stats.m_distinct_keys_per_prefix[j] > 0 ? - k_stats.m_rows / k_stats.m_distinct_keys_per_prefix[j] : - 0; + k_stats.m_distinct_keys_per_prefix[j] > 0 + ? k_stats.m_rows / k_stats.m_distinct_keys_per_prefix[j] + : 0; if (x > stats.records) x = stats.records; if ((x == 0 && rocksdb_debug_optimizer_no_zero_cardinality) || - rocksdb_debug_optimizer_n_rows > 0) - { - // Fake cardinality implementation. For example, (idx1, idx2, idx3) index + rocksdb_debug_optimizer_n_rows > 0) { + // Fake cardinality implementation. For example, (idx1, idx2, idx3) + // index // will have rec_per_key for (idx1)=4, (idx1,2)=2, and (idx1,2,3)=1. // rec_per_key for the whole index is 1, and multiplied by 2^n if // n suffix columns of the index are not used. - x = 1 << (k->actual_key_parts-j-1); + x = 1 << (k->actual_key_parts - j - 1); } - k->rec_per_key[j]= x; + k->rec_per_key[j] = x; } } } - if (flag & HA_STATUS_ERRKEY) - { + if (flag & HA_STATUS_ERRKEY) { /* Currently we support only primary keys so we know which key had a uniqueness violation. */ - errkey= m_dupp_errkey; - dup_ref= m_pk_tuple; // TODO(?): this should store packed PK. + errkey = m_dupp_errkey; + dup_ref = m_pk_tuple; // TODO(?): this should store packed PK. } - if (flag & HA_STATUS_AUTO) - { - stats.auto_increment_value= m_tbl_def->m_auto_incr_val; + if (flag & HA_STATUS_AUTO) { + stats.auto_increment_value = m_tbl_def->m_auto_incr_val; } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } -void ha_rocksdb::position(const uchar* const record) -{ - DBUG_ENTER("ha_rocksdb::position"); +void ha_rocksdb::position(const uchar *const record) { + DBUG_ENTER_FUNC(); - longlong hidden_pk_id= 0; + longlong hidden_pk_id = 0; if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) - DBUG_ASSERT(false); // should never reach here + DBUG_ASSERT(false); // should never reach here /* Get packed primary key value from the record. @@ -8719,8 +7948,8 @@ void ha_rocksdb::position(const uchar* const record) read set (this may potentially disable index-only access when PK column cannot be restored from its mem-comparable form in the secondary indexes). */ - const uint packed_size= m_pk_descr->pack_record(table, m_pack_buffer, record, - ref, nullptr, false, hidden_pk_id); + const uint packed_size = m_pk_descr->pack_record( + table, m_pack_buffer, record, ref, nullptr, false, hidden_pk_id); /* It could be that mem-comparable form of PK occupies less than ref_length @@ -8732,26 +7961,23 @@ void ha_rocksdb::position(const uchar* const record) DBUG_VOID_RETURN; } +int ha_rocksdb::rnd_pos(uchar *const buf, uchar *const pos) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::rnd_pos(uchar* const buf, uchar* const pos) -{ int rc; size_t len; - DBUG_ENTER("ha_rocksdb::rnd_pos"); ha_statistic_increment(&SSV::ha_read_rnd_count); stats.rows_requested++; - len= m_pk_descr->key_length(table, rocksdb::Slice((const char*)pos, - ref_length)); - if (len == size_t(-1)) - { + len = m_pk_descr->key_length(table, + rocksdb::Slice((const char *)pos, ref_length)); + if (len == size_t(-1)) { DBUG_RETURN(HA_ERR_INTERNAL_ERROR); /* Data corruption? */ } - rc= get_row_by_rowid(buf, pos, len); + rc = get_row_by_rowid(buf, pos, len); - if (!rc) - { + if (!rc) { stats.rows_read++; update_row_stats(ROWS_READ); } @@ -8759,37 +7985,32 @@ int ha_rocksdb::rnd_pos(uchar* const buf, uchar* const pos) DBUG_RETURN(rc); } - /* @brief Calculate (if needed) the bitmap of indexes that are modified by the current query @detail - The calculation done by checking index definitions against the table->write_set + The calculation done by checking index definitions against the + table->write_set column bitmap. */ -void ha_rocksdb::calc_updated_indexes() -{ - if (!m_update_scope_is_valid) - { - m_update_scope_is_valid= true; +void ha_rocksdb::calc_updated_indexes() { + if (!m_update_scope_is_valid) { + m_update_scope_is_valid = true; m_update_scope.clear_all(); - for (uint keynr= 0; keynr < table->s->keys; keynr++) - { - const Rdb_key_def& kd= *m_key_descr_arr[keynr]; + for (uint keynr = 0; keynr < table->s->keys; keynr++) { + const Rdb_key_def &kd = *m_key_descr_arr[keynr]; /* Walk over all key parts, including the "extended key" suffix */ - const uint key_parts= kd.get_key_parts(); - for (uint kp= 0; kp < key_parts; kp++) - { + const uint key_parts = kd.get_key_parts(); + for (uint kp = 0; kp < key_parts; kp++) { if (has_hidden_pk(table) && kp + 1 == key_parts) break; - Field* const field= kd.get_table_field_for_part_no(table, kp); - if (bitmap_is_set(table->write_set, field->field_index)) - { + Field *const field = kd.get_table_field_for_part_no(table, kp); + if (bitmap_is_set(table->write_set, field->field_index)) { m_update_scope.set_bit(keynr); break; } @@ -8798,10 +8019,11 @@ void ha_rocksdb::calc_updated_indexes() } } +int ha_rocksdb::update_row(const uchar *const old_data, uchar *const new_data) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::update_row(const uchar* const old_data, uchar* const new_data) -{ - DBUG_ENTER("ha_rocksdb::update_row"); + DBUG_ASSERT(old_data != nullptr); + DBUG_ASSERT(new_data != nullptr); DBUG_ASSERT(m_lock_rows == RDB_LOCK_WRITE); /* old_data points to record we're updating. It is the same as the record @@ -8811,40 +8033,34 @@ int ha_rocksdb::update_row(const uchar* const old_data, uchar* const new_data) DBUG_ASSERT(new_data == table->record[0]); ha_statistic_increment(&SSV::ha_update_count); - const int rv= update_write_row(old_data, new_data, false); + const int rv = update_write_row(old_data, new_data, false); - if (rv == 0) - { + if (rv == 0) { stats.rows_updated++; update_row_stats(ROWS_UPDATED); } + DBUG_RETURN(rv); } - /* The following function was copied from ha_blackhole::store_lock: */ -THR_LOCK_DATA **ha_rocksdb::store_lock(THD* const thd, - THR_LOCK_DATA **to, - enum thr_lock_type lock_type) -{ - DBUG_ENTER("ha_rocksdb::store_lock"); +THR_LOCK_DATA **ha_rocksdb::store_lock(THD *const thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(to != nullptr); bool in_lock_tables = my_core::thd_in_lock_tables(thd); /* First, make a decision about MyRocks's internal locking */ - if (lock_type >= TL_WRITE_ALLOW_WRITE) - { + if (lock_type >= TL_WRITE_ALLOW_WRITE) { m_lock_rows = RDB_LOCK_WRITE; - } - else if (lock_type == TL_READ_WITH_SHARED_LOCKS) - { + } else if (lock_type == TL_READ_WITH_SHARED_LOCKS) { m_lock_rows = RDB_LOCK_READ; - } - else - { + } else { m_lock_rows = RDB_LOCK_NONE; - if (THDVAR(thd, lock_scanned_rows)) - { + if (THDVAR(thd, lock_scanned_rows)) { /* The following logic was copied directly from ha_innobase::store_lock_with_x_type() in @@ -8854,8 +8070,7 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD* const thd, const uint sql_command = my_core::thd_sql_command(thd); if ((lock_type == TL_READ && in_lock_tables) || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) || - can_hold_read_locks_on_select(thd, lock_type)) - { + can_hold_read_locks_on_select(thd, lock_type)) { ulong tx_isolation = my_core::thd_tx_isolation(thd); if (sql_command != SQLCOM_CHECKSUM && ((my_core::thd_test_options(thd, OPTION_BIN_LOG) && @@ -8864,10 +8079,8 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD* const thd, (lock_type != TL_READ && lock_type != TL_READ_NO_INSERT) || (sql_command != SQLCOM_INSERT_SELECT && sql_command != SQLCOM_REPLACE_SELECT && - sql_command != SQLCOM_UPDATE && - sql_command != SQLCOM_DELETE && - sql_command != SQLCOM_CREATE_TABLE))) - { + sql_command != SQLCOM_UPDATE && sql_command != SQLCOM_DELETE && + sql_command != SQLCOM_CREATE_TABLE))) { m_lock_rows = RDB_LOCK_READ; } } @@ -8875,8 +8088,7 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD* const thd, } /* Then, tell the SQL layer what kind of locking it should use: */ - if (lock_type != TL_IGNORE && m_db_lock.type == TL_UNLOCK) - { + if (lock_type != TL_IGNORE && m_db_lock.type == TL_UNLOCK) { /* Here is where we get into the guts of a row level lock. If TL_UNLOCK is set @@ -8885,8 +8097,7 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD* const thd, */ if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) && - !in_lock_tables && !my_core::thd_tablespace_op(thd)) - { + !in_lock_tables && !my_core::thd_tablespace_op(thd)) { lock_type = TL_WRITE_ALLOW_WRITE; } @@ -8898,65 +8109,61 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD* const thd, concurrent inserts to t2. */ - if (lock_type == TL_READ_NO_INSERT && !in_lock_tables) - { + if (lock_type == TL_READ_NO_INSERT && !in_lock_tables) { lock_type = TL_READ; } - m_db_lock.type= lock_type; + m_db_lock.type = lock_type; } - *to++= &m_db_lock; + + *to++ = &m_db_lock; + DBUG_RETURN(to); } -void ha_rocksdb::read_thd_vars(THD* const thd) -{ - m_store_row_debug_checksums= THDVAR(thd, store_row_debug_checksums); - m_verify_row_debug_checksums= THDVAR(thd, verify_row_debug_checksums); - m_checksums_pct= THDVAR(thd, checksums_pct); +void ha_rocksdb::read_thd_vars(THD *const thd) { + m_store_row_debug_checksums = THDVAR(thd, store_row_debug_checksums); + m_verify_row_debug_checksums = THDVAR(thd, verify_row_debug_checksums); + m_checksums_pct = THDVAR(thd, checksums_pct); } -const char* ha_rocksdb::thd_rocksdb_tmpdir() -{ - const char* tmp_dir = THDVAR(ha_thd(), tmpdir); +const char *ha_rocksdb::thd_rocksdb_tmpdir() { + const char *tmp_dir = THDVAR(ha_thd(), tmpdir); /* We want to treat an empty string as nullptr, in these cases DDL operations will use the default --tmpdir passed to mysql instead. */ if (tmp_dir != nullptr && *tmp_dir == '\0') { - tmp_dir= nullptr; + tmp_dir = nullptr; } - return(tmp_dir); + return (tmp_dir); } -int ha_rocksdb::external_lock(THD* const thd, int lock_type) -{ - int res= 0; - DBUG_ENTER("ha_rocksdb::external_lock"); +int ha_rocksdb::external_lock(THD *const thd, int lock_type) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); - int binlog_format= my_core::thd_binlog_format(thd); - bool unsafe_for_binlog= THDVAR(ha_thd(), unsafe_for_binlog); - if (lock_type == F_WRLCK && !thd->rli_slave && - !unsafe_for_binlog && + int res = 0; + int binlog_format = my_core::thd_binlog_format(thd); + bool unsafe_for_binlog = THDVAR(ha_thd(), unsafe_for_binlog); + if (lock_type == F_WRLCK && !thd->rli_slave && !unsafe_for_binlog && binlog_format != BINLOG_FORMAT_ROW && binlog_format != BINLOG_FORMAT_UNSPEC && - my_core::thd_binlog_filter_ok(thd)) - { - my_printf_error(ER_UNKNOWN_ERROR, - "Can't execute updates on master with binlog_format != ROW.", - MYF(0)); + my_core::thd_binlog_filter_ok(thd)) { + my_printf_error( + ER_UNKNOWN_ERROR, + "Can't execute updates on master with binlog_format != ROW.", MYF(0)); DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - if (lock_type == F_UNLCK) - { - Rdb_transaction* const tx= get_or_create_tx(thd); + if (lock_type == F_UNLCK) { + Rdb_transaction *const tx = get_or_create_tx(thd); tx->io_perf_end_and_record(&m_io_perf); - if (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - { + if (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { /* Do like InnoDB: when we get here, it's time to commit a single-statement transaction. @@ -8965,21 +8172,17 @@ int ha_rocksdb::external_lock(THD* const thd, int lock_type) for each of them, but that's ok because non-first tx->commit() calls will be no-ops. */ - res= tx->commit_or_rollback(); + res = tx->commit_or_rollback(); } - } - else - { + } else { if (my_core::thd_tx_isolation(thd) < ISO_READ_COMMITTED || - my_core::thd_tx_isolation(thd) > ISO_REPEATABLE_READ) - { - my_printf_error(ER_UNKNOWN_ERROR, - "MyRocks supports only READ COMMITTED and " - "REPEATABLE READ isolation levels. " - "Please change from current isolation " - "level %s", - MYF(0), - tx_isolation_names[my_core::thd_tx_isolation(thd)]); + my_core::thd_tx_isolation(thd) > ISO_REPEATABLE_READ) { + my_printf_error( + ER_UNKNOWN_ERROR, "MyRocks supports only READ COMMITTED and " + "REPEATABLE READ isolation levels. " + "Please change from current isolation " + "level %s", + MYF(0), tx_isolation_names[my_core::thd_tx_isolation(thd)]); DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } /* @@ -8987,19 +8190,17 @@ int ha_rocksdb::external_lock(THD* const thd, int lock_type) is, handler->start_stmt() is not called for INSERTs. So, we put this code here. */ - Rdb_transaction* const tx= get_or_create_tx(thd); + Rdb_transaction *const tx = get_or_create_tx(thd); read_thd_vars(thd); - m_update_scope_is_valid= false; + m_update_scope_is_valid = false; - if (skip_unique_check()) - { + if (skip_unique_check()) { if ((thd->lex->sql_command == SQLCOM_INSERT || - thd->lex->sql_command == SQLCOM_LOAD || - thd->lex->sql_command == SQLCOM_REPLACE) && + thd->lex->sql_command == SQLCOM_LOAD || + thd->lex->sql_command == SQLCOM_REPLACE) && (thd->lex->duplicates == DUP_REPLACE || - thd->lex->duplicates == DUP_UPDATE)) - { + thd->lex->duplicates == DUP_UPDATE)) { my_printf_error(ER_UNKNOWN_ERROR, "When unique checking is disabled in MyRocks, INSERT," "UPDATE,LOAD statements with clauses that update or " @@ -9011,10 +8212,8 @@ int ha_rocksdb::external_lock(THD* const thd, int lock_type) } } - if (lock_type == F_WRLCK) - { - if (tx->is_tx_read_only()) - { + if (lock_type == F_WRLCK) { + if (tx->is_tx_read_only()) { my_printf_error(ER_UNKNOWN_ERROR, "Can't execute updates when you started a transaction " "with START TRANSACTION WITH CONSISTENT [ROCKSDB] " @@ -9030,13 +8229,12 @@ int ha_rocksdb::external_lock(THD* const thd, int lock_type) Note: sometimes, external_lock() can be called without a prior ::store_lock call. That's why we need to set lock_* members here, too. */ - m_lock_rows= RDB_LOCK_WRITE; + m_lock_rows = RDB_LOCK_WRITE; if (thd->lex->sql_command == SQLCOM_CREATE_INDEX || - thd->lex->sql_command == SQLCOM_DROP_INDEX || - thd->lex->sql_command == SQLCOM_ALTER_TABLE) - { - tx->m_ddl_transaction= true; + thd->lex->sql_command == SQLCOM_DROP_INDEX || + thd->lex->sql_command == SQLCOM_ALTER_TABLE) { + tx->m_ddl_transaction = true; } } rocksdb_register_tx(rocksdb_hton, thd, tx); @@ -9046,7 +8244,6 @@ int ha_rocksdb::external_lock(THD* const thd, int lock_type) DBUG_RETURN(res); } - /** @note A quote from ha_innobase::start_stmt(): @@ -9057,63 +8254,58 @@ int ha_rocksdb::external_lock(THD* const thd, int lock_type) </quote> */ -int ha_rocksdb::start_stmt(THD* const thd, thr_lock_type lock_type) -{ - Rdb_transaction* const tx= get_or_create_tx(thd); +int ha_rocksdb::start_stmt(THD *const thd, thr_lock_type lock_type) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + + Rdb_transaction *const tx = get_or_create_tx(thd); read_thd_vars(thd); rocksdb_register_tx(ht, thd, tx); tx->io_perf_start(&m_io_perf); - return 0; + + DBUG_RETURN(HA_EXIT_SUCCESS); } -rocksdb::Range get_range( - uint32_t i, uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE*2], int offset1, - int offset2) -{ - uchar* buf_begin= buf; - uchar* buf_end= buf + Rdb_key_def::INDEX_NUMBER_SIZE; +rocksdb::Range get_range(uint32_t i, + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2], + int offset1, int offset2) { + uchar *buf_begin = buf; + uchar *buf_end = buf + Rdb_key_def::INDEX_NUMBER_SIZE; rdb_netbuf_store_index(buf_begin, i + offset1); rdb_netbuf_store_index(buf_end, i + offset2); return rocksdb::Range( - rocksdb::Slice((const char*) buf_begin, Rdb_key_def::INDEX_NUMBER_SIZE), - rocksdb::Slice((const char*) buf_end, Rdb_key_def::INDEX_NUMBER_SIZE)); + rocksdb::Slice((const char *)buf_begin, Rdb_key_def::INDEX_NUMBER_SIZE), + rocksdb::Slice((const char *)buf_end, Rdb_key_def::INDEX_NUMBER_SIZE)); } -static rocksdb::Range get_range( - const Rdb_key_def& kd, - uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE*2], - int offset1, int offset2) -{ +static rocksdb::Range get_range(const Rdb_key_def &kd, + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2], + int offset1, int offset2) { return get_range(kd.get_index_number(), buf, offset1, offset2); } -rocksdb::Range get_range(const Rdb_key_def& kd, - uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE*2]) -{ - if (kd.m_is_reverse_cf) - { +rocksdb::Range get_range(const Rdb_key_def &kd, + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) { + if (kd.m_is_reverse_cf) { return myrocks::get_range(kd, buf, 1, 0); - } - else - { + } else { return myrocks::get_range(kd, buf, 0, 1); } } -rocksdb::Range ha_rocksdb::get_range( - const int &i, uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE*2]) const -{ +rocksdb::Range +ha_rocksdb::get_range(const int &i, + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const { return myrocks::get_range(*m_key_descr_arr[i], buf); } - /* Drop index thread's main logic */ -void Rdb_drop_index_thread::run() -{ +void Rdb_drop_index_thread::run() { mysql_mutex_lock(&m_signal_mutex); for (;;) { @@ -9129,11 +8321,11 @@ void Rdb_drop_index_thread::run() timespec ts; clock_gettime(CLOCK_REALTIME, &ts); ts.tv_sec += dict_manager.is_drop_index_empty() - ? 24*60*60 // no filtering - : 60; // filtering + ? 24 * 60 * 60 // no filtering + : 60; // filtering - const auto ret __attribute__((__unused__)) = mysql_cond_timedwait( - &m_signal_cond, &m_signal_mutex, &ts); + const auto ret MY_ATTRIBUTE((__unused__)) = + mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts); if (m_stop) { break; } @@ -9141,7 +8333,7 @@ void Rdb_drop_index_thread::run() DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT); mysql_mutex_unlock(&m_signal_mutex); - std::vector<GL_INDEX_ID> indices; + std::unordered_set<GL_INDEX_ID> indices; dict_manager.get_ongoing_drop_indexes(&indices); if (!indices.empty()) { std::unordered_set<GL_INDEX_ID> finished; @@ -9149,78 +8341,65 @@ void Rdb_drop_index_thread::run() read_opts.total_order_seek = true; // disable bloom filter for (const auto d : indices) { - uint32 cf_flags= 0; - if (!dict_manager.get_cf_flags(d.cf_id, &cf_flags)) - { + uint32 cf_flags = 0; + if (!dict_manager.get_cf_flags(d.cf_id, &cf_flags)) { sql_print_error("RocksDB: Failed to get column family flags " "from cf id %u. MyRocks data dictionary may " - "get corrupted.", d.cf_id); + "get corrupted.", + d.cf_id); abort_with_stack_traces(); } - rocksdb::ColumnFamilyHandle* cfh= cf_manager.get_cf(d.cf_id); + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id); DBUG_ASSERT(cfh); - const bool is_reverse_cf= cf_flags & Rdb_key_def::REVERSE_CF_FLAG; + const bool is_reverse_cf = cf_flags & Rdb_key_def::REVERSE_CF_FLAG; - bool index_removed= false; - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE]= {0}; + bool index_removed = false; + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; rdb_netbuf_store_uint32(key_buf, d.index_id); - const rocksdb::Slice - key = rocksdb::Slice((char*)key_buf, sizeof(key_buf)); - uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE*2]; - rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf?1:0, - is_reverse_cf?0:1); + const rocksdb::Slice key = + rocksdb::Slice((char *)key_buf, sizeof(key_buf)); + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0, + is_reverse_cf ? 0 : 1); rocksdb::CompactRangeOptions compact_range_options; compact_range_options.bottommost_level_compaction = - rocksdb::BottommostLevelCompaction::kForce; + rocksdb::BottommostLevelCompaction::kForce; compact_range_options.exclusive_manual_compaction = false; rocksdb::Status status = DeleteFilesInRange(rdb->GetBaseDB(), cfh, - &range.start, &range.limit); - if (!status.ok()) - { - if (status.IsShutdownInProgress()) - { + &range.start, &range.limit); + if (!status.ok()) { + if (status.IsShutdownInProgress()) { break; } rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD); } - status = rdb->CompactRange( - compact_range_options, cfh, &range.start, &range.limit); - if (!status.ok()) - { - if (status.IsShutdownInProgress()) - { + status = rdb->CompactRange(compact_range_options, cfh, &range.start, + &range.limit); + if (!status.ok()) { + if (status.IsShutdownInProgress()) { break; } rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD); } - std::unique_ptr<rocksdb::Iterator> it( - rdb->NewIterator(read_opts, cfh)); + std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(read_opts, cfh)); it->Seek(key); - if (is_reverse_cf) - { - if (!it->Valid()) - { + if (is_reverse_cf) { + if (!it->Valid()) { it->SeekToLast(); - } - else - { + } else { it->Prev(); } } - if (!it->Valid()) - { - index_removed= true; - } - else - { - if (memcmp(it->key().data(), key_buf, Rdb_key_def::INDEX_NUMBER_SIZE)) - { + if (!it->Valid()) { + index_removed = true; + } else { + if (memcmp(it->key().data(), key_buf, + Rdb_key_def::INDEX_NUMBER_SIZE)) { // Key does not have same prefix - index_removed= true; + index_removed = true; } } - if (index_removed) - { + if (index_removed) { finished.insert(d); } } @@ -9235,14 +8414,11 @@ void Rdb_drop_index_thread::run() mysql_mutex_unlock(&m_signal_mutex); } - -Rdb_tbl_def* ha_rocksdb::get_table_if_exists(const char* const tablename) -{ +Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) { DBUG_ASSERT(tablename != nullptr); std::string str; - if (rdb_normalize_tablename(tablename, &str)) - { + if (rdb_normalize_tablename(tablename, &str)) { // We were not passed table name? DBUG_ASSERT(0); return nullptr; @@ -9258,14 +8434,16 @@ Rdb_tbl_def* ha_rocksdb::get_table_if_exists(const char* const tablename) tablename points to line in form "./dbname/tablename". */ -int ha_rocksdb::delete_table(const char* const tablename) -{ - DBUG_ENTER("ha_rocksdb::delete_table"); - const std::unique_ptr<rocksdb::WriteBatch> wb= dict_manager.begin(); - rocksdb::WriteBatch* const batch= wb.get(); +int ha_rocksdb::delete_table(const char *const tablename) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(tablename != nullptr); + + const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); /* Find the table in the hash */ - Rdb_tbl_def* const tbl = get_table_if_exists(tablename); + Rdb_tbl_def *const tbl = get_table_if_exists(tablename); if (!tbl) DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); dict_manager.add_drop_table(tbl->m_key_descr_arr, tbl->m_key_count, batch); @@ -9280,18 +8458,16 @@ int ha_rocksdb::delete_table(const char* const tablename) rdb_drop_idx_thread.signal(); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } - -void ha_rocksdb::remove_rows(Rdb_tbl_def* const tbl) -{ - const rocksdb::WriteOptions wo= - rdb_get_rocksdb_write_options(handler::ha_thd()); +void ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { + const rocksdb::WriteOptions wo = + rdb_get_rocksdb_write_options(handler::ha_thd()); rocksdb::ReadOptions opts; - opts.total_order_seek= true; - rocksdb::Iterator* const it= rdb->NewIterator(opts); + opts.total_order_seek = true; + rocksdb::Iterator *const it = rdb->NewIterator(opts); char key_buf[MAX_KEY_LENGTH]; uint key_len; @@ -9300,18 +8476,15 @@ void ha_rocksdb::remove_rows(Rdb_tbl_def* const tbl) (This is is not crash-safe, but it doesn't matter, because bulk row deletion will be handled on rocksdb side) */ - for (uint i= 0; i < tbl->m_key_count ; i++) - { - const Rdb_key_def& kd= *tbl->m_key_descr_arr[i]; - kd.get_infimum_key(reinterpret_cast<uchar*>(key_buf), &key_len); + for (uint i = 0; i < tbl->m_key_count; i++) { + const Rdb_key_def &kd = *tbl->m_key_descr_arr[i]; + kd.get_infimum_key(reinterpret_cast<uchar *>(key_buf), &key_len); const rocksdb::Slice table_key(key_buf, key_len); it->Seek(table_key); - while (it->Valid()) - { - const rocksdb::Slice key= it->key(); - if (!kd.covers_key(key)) - { + while (it->Valid()) { + const rocksdb::Slice key = it->key(); + if (!kd.covers_key(key)) { break; } @@ -9325,35 +8498,32 @@ void ha_rocksdb::remove_rows(Rdb_tbl_def* const tbl) delete it; } +int ha_rocksdb::rename_table(const char *const from, const char *const to) { + DBUG_ENTER_FUNC(); -int ha_rocksdb::rename_table(const char* const from, const char* const to) -{ DBUG_ASSERT(from != nullptr); DBUG_ASSERT(to != nullptr); - bool write_err= false; + bool write_err = false; std::string from_str; std::string to_str; std::string from_db; std::string to_db; if (rdb_normalize_tablename(from, &from_str) || - rdb_split_normalized_tablename(from_str, &from_db)) - { - return HA_ERR_INTERNAL_ERROR; + rdb_split_normalized_tablename(from_str, &from_db)) { + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } if (rdb_normalize_tablename(to, &to_str) || - rdb_split_normalized_tablename(to_str, &to_db)) - { - return HA_ERR_INTERNAL_ERROR; + rdb_split_normalized_tablename(to_str, &to_db)) { + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } // If the user changed the database part of the name then validate that the // 'to' database exists. - if (from_db != to_db && !rdb_database_exists(to_db)) - { - return HA_ERR_INTERNAL_ERROR; + if (from_db != to_db && !rdb_database_exists(to_db)) { + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } /* Check if any index has a per-index column family */ @@ -9368,30 +8538,28 @@ int ha_rocksdb::rename_table(const char* const from, const char* const to) it has been opened by this statement), and check is_auto_cf there. */ Rdb_tbl_def *tdef; - if (!(tdef= ddl_manager.find(from_str))) - return HA_ERR_INTERNAL_ERROR; + if (!(tdef = ddl_manager.find(from_str))) + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - for (uint i= 0; i < tdef->m_key_count; i++) - { + for (uint i = 0; i < tdef->m_key_count; i++) { DBUG_ASSERT(tdef->m_key_descr_arr != nullptr); - if (tdef->m_key_descr_arr[i]->m_is_auto_cf) - { + if (tdef->m_key_descr_arr[i]->m_is_auto_cf) { my_error(ER_NOT_SUPPORTED_YET, MYF(0), "ALTER TABLE on table with per-index CF"); - return HA_ERR_INTERNAL_ERROR; + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } } } - const std::unique_ptr<rocksdb::WriteBatch> wb= dict_manager.begin(); - rocksdb::WriteBatch* const batch= wb.get(); + const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); dict_manager.lock(); - write_err= ddl_manager.rename(from_str, to_str, batch) - || dict_manager.commit(batch); + write_err = + ddl_manager.rename(from_str, to_str, batch) || dict_manager.commit(batch); dict_manager.unlock(); - return write_err ? HA_ERR_INTERNAL_ERROR : 0; + DBUG_RETURN(write_err ? HA_ERR_INTERNAL_ERROR : 0); } /** @@ -9405,25 +8573,25 @@ int ha_rocksdb::rename_table(const char* const from, const char* const to) */ -bool ha_rocksdb::check_if_incompatible_data(HA_CREATE_INFO* const info, - uint table_changes) -{ - DBUG_ENTER("ha_rocksdb::check_if_incompatible_data"); +bool ha_rocksdb::check_if_incompatible_data(HA_CREATE_INFO *const info, + uint table_changes) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(info != nullptr); + // this function is needed only for online alter-table DBUG_RETURN(COMPATIBLE_DATA_NO); } - -int ha_rocksdb::extra(enum ha_extra_function operation) -{ - DBUG_ENTER("ha_rocksdb::extra"); +int ha_rocksdb::extra(enum ha_extra_function operation) { + DBUG_ENTER_FUNC(); switch (operation) { case HA_EXTRA_KEYREAD: - m_keyread_only= true; + m_keyread_only = true; break; case HA_EXTRA_NO_KEYREAD: - m_keyread_only= false; + m_keyread_only = false; break; case HA_EXTRA_FLUSH: /* @@ -9436,17 +8604,17 @@ int ha_rocksdb::extra(enum ha_extra_function operation) break; } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } /* Given a starting key and an ending key, estimate the number of rows that will exist between the two keys. */ -ha_rows ha_rocksdb::records_in_range(uint inx, key_range* const min_key, - key_range* const max_key) -{ - DBUG_ENTER("ha_rocksdb::records_in_range"); +ha_rows ha_rocksdb::records_in_range(uint inx, key_range *const min_key, + key_range *const max_key) { + DBUG_ENTER_FUNC(); + ha_rows ret = THDVAR(ha_thd(), records_in_range); if (ret) { DBUG_RETURN(ret); @@ -9458,71 +8626,67 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range* const min_key, } } - const Rdb_key_def& kd= *m_key_descr_arr[inx]; + const Rdb_key_def &kd = *m_key_descr_arr[inx]; - uint size1=0; + uint size1 = 0; if (min_key) { size1 = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, - min_key->key, min_key->keypart_map); + min_key->key, min_key->keypart_map); if (min_key->flag == HA_READ_PREFIX_LAST_OR_PREV || min_key->flag == HA_READ_PREFIX_LAST || - min_key->flag == HA_READ_AFTER_KEY) - { + min_key->flag == HA_READ_AFTER_KEY) { kd.successor(m_sk_packed_tuple, size1); } } else { kd.get_infimum_key(m_sk_packed_tuple, &size1); } - uint size2=0; + uint size2 = 0; if (max_key) { size2 = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple_old, - max_key->key, max_key->keypart_map); + max_key->key, max_key->keypart_map); if (max_key->flag == HA_READ_PREFIX_LAST_OR_PREV || max_key->flag == HA_READ_PREFIX_LAST || - max_key->flag == HA_READ_AFTER_KEY) - { + max_key->flag == HA_READ_AFTER_KEY) { kd.successor(m_sk_packed_tuple_old, size2); } // pad the upper key with FFFFs to make sure it is more than the lower if (size1 > size2) { - memset(m_sk_packed_tuple_old+size2, 0xff, size1-size2); + memset(m_sk_packed_tuple_old + size2, 0xff, size1 - size2); size2 = size1; } } else { kd.get_supremum_key(m_sk_packed_tuple_old, &size2); } - const rocksdb::Slice slice1((const char*) m_sk_packed_tuple, size1); - const rocksdb::Slice slice2((const char*) m_sk_packed_tuple_old, size2); + const rocksdb::Slice slice1((const char *)m_sk_packed_tuple, size1); + const rocksdb::Slice slice2((const char *)m_sk_packed_tuple_old, size2); // slice1 >= slice2 means no row will match - if (slice1.compare(slice2) >= 0) - { - DBUG_RETURN(0); + if (slice1.compare(slice2) >= 0) { + DBUG_RETURN(HA_EXIT_SUCCESS); } - rocksdb::Range r( - kd.m_is_reverse_cf ? slice2 : slice1, - kd.m_is_reverse_cf ? slice1 : slice2 - ); + rocksdb::Range r(kd.m_is_reverse_cf ? slice2 : slice1, + kd.m_is_reverse_cf ? slice1 : slice2); - uint64_t sz=0; + uint64_t sz = 0; auto disk_size = kd.m_stats.m_actual_disk_size; if (disk_size == 0) disk_size = kd.m_stats.m_data_size; auto rows = kd.m_stats.m_rows; - if (rows == 0 || disk_size == 0) - { - rows= 1; - disk_size= ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; + if (rows == 0 || disk_size == 0) { + rows = 1; + disk_size = ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // Getting statistics, including from Memtables - rdb->GetApproximateSizes( - kd.get_cf(), - &r, 1, - &sz, true); - ret = rows*sz/disk_size; + rdb->GetApproximateSizes(kd.get_cf(), &r, 1, &sz, true); +#pragma GCC diagnostic pop + + ret = rows * sz / disk_size; /* GetApproximateSizes() gives estimates so ret might exceed stats.records. @@ -9531,30 +8695,29 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range* const min_key, To prevent this, changing estimated records slightly smaller than stats.records. */ - if (ret >= stats.records) - { + if (ret >= stats.records) { ret = stats.records * 0.99; } - if (rocksdb_debug_optimizer_n_rows > 0) - { - ret= rocksdb_debug_optimizer_n_rows; - } - else if (ret == 0) - { + if (rocksdb_debug_optimizer_n_rows > 0) { + ret = rocksdb_debug_optimizer_n_rows; + } else if (ret == 0) { ret = 1; } DBUG_RETURN(ret); } +void ha_rocksdb::update_create_info(HA_CREATE_INFO *const create_info) { + DBUG_ENTER_FUNC(); -void ha_rocksdb::update_create_info(HA_CREATE_INFO* const create_info) -{ - if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) - { - create_info->auto_increment_value= m_tbl_def->m_auto_incr_val; + DBUG_ASSERT(create_info != nullptr); + + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) { + create_info->auto_increment_value = m_tbl_def->m_auto_incr_val; } + + DBUG_VOID_RETURN; } /** @@ -9571,85 +8734,81 @@ void ha_rocksdb::update_create_info(HA_CREATE_INFO* const create_info) column family, the first row of the next index id should be the last row of the previous index id. */ -int ha_rocksdb::optimize(THD* const thd, HA_CHECK_OPT* const check_opt) -{ - int rc= 0; - for (uint i= 0; i < table->s->keys; i++) - { - uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE*2]; +int ha_rocksdb::optimize(THD *const thd, HA_CHECK_OPT *const check_opt) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(check_opt != nullptr); + + int rc = 0; + for (uint i = 0; i < table->s->keys; i++) { + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; auto range = get_range(i, buf); if (!rdb->CompactRange(rocksdb::CompactRangeOptions(), - m_key_descr_arr[i]->get_cf(), - &range.start, &range.limit).ok()) - { - rc= 1; + m_key_descr_arr[i]->get_cf(), &range.start, + &range.limit) + .ok()) { + rc = 1; break; } } - return rc; + + DBUG_RETURN(rc); } -int ha_rocksdb::calculate_stats(const TABLE* const table_arg, THD* const thd, - HA_CHECK_OPT* const check_opt) -{ - DBUG_ENTER("ha_rocksdb::calculate_stats"); +int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd, + HA_CHECK_OPT *const check_opt) { + DBUG_ENTER_FUNC(); // find per column family key ranges which need to be queried - std::unordered_map<rocksdb::ColumnFamilyHandle*, std::vector<rocksdb::Range>> - ranges; + std::unordered_map<rocksdb::ColumnFamilyHandle *, std::vector<rocksdb::Range>> + ranges; std::unordered_set<GL_INDEX_ID> ids_to_check; std::vector<uchar> buf(table_arg->s->keys * 2 * - Rdb_key_def::INDEX_NUMBER_SIZE); - for (uint i = 0; i < table_arg->s->keys; i++) - { + Rdb_key_def::INDEX_NUMBER_SIZE); + for (uint i = 0; i < table_arg->s->keys; i++) { const auto bufp = &buf[i * 2 * Rdb_key_def::INDEX_NUMBER_SIZE]; - const Rdb_key_def& kd= *m_key_descr_arr[i]; + const Rdb_key_def &kd = *m_key_descr_arr[i]; ranges[kd.get_cf()].push_back(get_range(i, bufp)); ids_to_check.insert(kd.get_gl_index_id()); } // for analyze statements, force flush on memtable to get accurate cardinality - Rdb_cf_manager& cf_manager= rdb_get_cf_manager(); + Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); if (thd != nullptr && THDVAR(thd, flush_memtable_on_analyze) && - !rocksdb_pause_background_work) - { - for (auto it : ids_to_check) - { + !rocksdb_pause_background_work) { + for (auto it : ids_to_check) { rdb->Flush(rocksdb::FlushOptions(), cf_manager.get_cf(it.cf_id)); } } // get RocksDB table properties for these ranges rocksdb::TablePropertiesCollection props; - for (auto it : ranges) - { - const auto old_size __attribute__((__unused__)) = props.size(); + for (auto it : ranges) { + const auto old_size MY_ATTRIBUTE((__unused__)) = props.size(); const auto status = rdb->GetPropertiesOfTablesInRange( - it.first, &it.second[0], it.second.size(), &props); + it.first, &it.second[0], it.second.size(), &props); DBUG_ASSERT(props.size() >= old_size); if (!status.ok()) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - int num_sst= 0; + int num_sst = 0; // group stats per index id std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats; - for (const auto &it : ids_to_check) - { + for (const auto &it : ids_to_check) { // Initialize the stats to 0. If there are no files that contain // this gl_index_id, then 0 should be stored for the cached stats. stats[it] = Rdb_index_stats(it); } - for (const auto &it : props) - { + for (const auto &it : props) { std::vector<Rdb_index_stats> sst_stats; Rdb_tbl_prop_coll::read_stats_from_tbl_props(it.second, &sst_stats); /* sst_stats is a list of index statistics for indexes that have entries in the current SST file. */ - for (const auto &it1 : sst_stats) - { + for (const auto &it1 : sst_stats) { /* Only update statistics for indexes that belong to this SQL table. @@ -9661,7 +8820,7 @@ int ha_rocksdb::calculate_stats(const TABLE* const table_arg, THD* const thd, if (ids_to_check.find(it1.m_gl_index_id) == ids_to_check.end()) continue; - auto kd= ddl_manager.safe_find(it1.m_gl_index_id); + auto kd = ddl_manager.safe_find(it1.m_gl_index_id); DBUG_ASSERT(kd != nullptr); stats[it1.m_gl_index_id].merge(it1, true, kd->max_storage_fmt_length()); } @@ -9672,24 +8831,23 @@ int ha_rocksdb::calculate_stats(const TABLE* const table_arg, THD* const thd, ddl_manager.set_stats(stats); ddl_manager.persist_stats(true); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } -int ha_rocksdb::analyze(THD* const thd, HA_CHECK_OPT* const check_opt) -{ - DBUG_ENTER("ha_rocksdb::analyze"); +int ha_rocksdb::analyze(THD *const thd, HA_CHECK_OPT *const check_opt) { + DBUG_ENTER_FUNC(); - if (!table) - DBUG_RETURN(1); + if (!table) { + DBUG_RETURN(HA_EXIT_FAILURE); + } DBUG_RETURN(calculate_stats(table, thd, check_opt)); } void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, ulonglong nb_desired_values, - ulonglong* const first_value, - ulonglong* const nb_reserved_values) -{ + ulonglong *const first_value, + ulonglong *const nb_reserved_values) { /* MySQL has a somewhat complicated way of handling the auto-increment value. The first time get_auto_increment is called for a statement, @@ -9707,36 +8865,31 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, */ DEBUG_SYNC(ha_thd(), "rocksdb.autoinc_vars"); - if (off > inc) - { - off= 1; + if (off > inc) { + off = 1; } // Local variable reference to simplify code below - std::atomic<longlong>& auto_incr= m_tbl_def->m_auto_incr_val; + std::atomic<longlong> &auto_incr = m_tbl_def->m_auto_incr_val; - if (inc == 1 && off == 1) - { + if (inc == 1 && off == 1) { // Optimization for the standard case where we are always simply // incrementing from the last position - *first_value= auto_incr++; - } - else - { + *first_value = auto_incr++; + } else { // The next value can be more complicated if either `inc` or 'off' is not 1 longlong new_val; - longlong last_val= auto_incr; + longlong last_val = auto_incr; // Loop until we can correctly update the atomic value - do - { + do { // Calculate the next value in the auto increment series: // offset + N * increment // where N is 0, 1, 2, ... // // For further information please visit: // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html - new_val= ((last_val + (inc - off) - 1) / inc) * inc + off; + new_val = ((last_val + (inc - off) - 1) / inc) * inc + off; // Attempt to store the new value (plus 1 since m_auto_incr_val contains // the next available value) into the atomic value. If the current @@ -9745,26 +8898,24 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, // with the current value). } while (!auto_incr.compare_exchange_weak(last_val, new_val + 1)); - *first_value= new_val; + *first_value = new_val; } - *nb_reserved_values= 1; + *nb_reserved_values = 1; } - #ifndef DBUG_OFF /* Debugger help function */ static char dbug_item_print_buf[512]; -const char *dbug_print_item(Item* const item) -{ - char* const buf= dbug_item_print_buf; +const char *dbug_print_item(Item *const item) { + char *const buf = dbug_item_print_buf; String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin); str.length(0); if (!item) return "(Item*)nullptr"; - item->print(&str ,QT_ORDINARY); + item->print(&str, QT_ORDINARY); if (str.c_ptr() == buf) return buf; else @@ -9773,7 +8924,6 @@ const char *dbug_print_item(Item* const item) #endif /*DBUG_OFF*/ - /** SQL layer calls this function to push an index condition. @@ -9795,8 +8945,9 @@ const char *dbug_print_item(Item* const item) Part of condition we couldn't check (always nullptr). */ -class Item* ha_rocksdb::idx_cond_push(uint keyno, class Item* const idx_cond) -{ +class Item *ha_rocksdb::idx_cond_push(uint keyno, class Item *const idx_cond) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(keyno != MAX_KEY); DBUG_ASSERT(idx_cond != nullptr); @@ -9805,10 +8956,9 @@ class Item* ha_rocksdb::idx_cond_push(uint keyno, class Item* const idx_cond) in_range_check_pushed_down = TRUE; /* We will check the whole condition */ - return nullptr; + DBUG_RETURN(nullptr); } - /* @brief Check the index condition. @@ -9827,13 +8977,11 @@ class Item* ha_rocksdb::idx_cond_push(uint keyno, class Item* const idx_cond) and return it) */ -enum icp_result ha_rocksdb::check_index_cond() const -{ +enum icp_result ha_rocksdb::check_index_cond() const { DBUG_ASSERT(pushed_idx_cond); DBUG_ASSERT(pushed_idx_cond_keyno != MAX_KEY); - if (end_range && compare_key_icp(end_range) > 0) - { + if (end_range && compare_key_icp(end_range) > 0) { /* caller should return HA_ERR_END_OF_FILE already */ return ICP_OUT_OF_RANGE; } @@ -9841,7 +8989,6 @@ enum icp_result ha_rocksdb::check_index_cond() const return pushed_idx_cond->val_int() ? ICP_MATCH : ICP_NO_MATCH; } - /** Checking if an index is used for ascending scan or not @@ -9853,37 +9000,32 @@ enum icp_result ha_rocksdb::check_index_cond() const @param kd @param find_flag */ -bool ha_rocksdb::is_ascending(const Rdb_key_def& kd, - enum ha_rkey_function find_flag) const -{ +bool ha_rocksdb::is_ascending(const Rdb_key_def &kd, + enum ha_rkey_function find_flag) const { bool is_ascending; switch (find_flag) { - case HA_READ_KEY_EXACT: - { + case HA_READ_KEY_EXACT: { is_ascending = !kd.m_is_reverse_cf; break; } - case HA_READ_PREFIX: - { - is_ascending= true; + case HA_READ_PREFIX: { + is_ascending = true; break; } case HA_READ_KEY_OR_NEXT: - case HA_READ_AFTER_KEY: - { + case HA_READ_AFTER_KEY: { is_ascending = !kd.m_is_reverse_cf; break; } case HA_READ_KEY_OR_PREV: case HA_READ_BEFORE_KEY: case HA_READ_PREFIX_LAST: - case HA_READ_PREFIX_LAST_OR_PREV: - { + case HA_READ_PREFIX_LAST_OR_PREV: { is_ascending = kd.m_is_reverse_cf; break; } default: - is_ascending= false; + is_ascending = false; } return is_ascending; } @@ -9893,16 +9035,23 @@ bool ha_rocksdb::is_ascending(const Rdb_key_def& kd, */ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( - TABLE* /* altered_table */, - my_core::Alter_inplace_info* const ha_alter_info) -{ - DBUG_ENTER("check_if_supported_alter"); + TABLE *altered_table, my_core::Alter_inplace_info *const ha_alter_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(ha_alter_info != nullptr); if (ha_alter_info->handler_flags & ~(my_core::Alter_inplace_info::DROP_INDEX | my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | - my_core::Alter_inplace_info::ADD_INDEX)) - { + my_core::Alter_inplace_info::ADD_INDEX | + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) { + DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); + } + + /* We don't support unique keys on table w/ no primary keys */ + if ((ha_alter_info->handler_flags & + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX) && + has_hidden_pk(altered_table)) { DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); } @@ -9938,37 +9087,41 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( @retval false Success */ bool ha_rocksdb::prepare_inplace_alter_table( - TABLE* const altered_table, - my_core::Alter_inplace_info* const ha_alter_info) -{ - DBUG_ENTER("prepare_inplace_alter_table"); - const uint n_dropped_keys= ha_alter_info->index_drop_count; - const uint n_added_keys= ha_alter_info->index_add_count; - const uint old_n_keys= m_tbl_def->m_key_count; - uint new_n_keys= altered_table->s->keys; - if (has_hidden_pk(altered_table)) - { + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(altered_table != nullptr); + DBUG_ASSERT(ha_alter_info != nullptr); + + const uint n_dropped_keys = ha_alter_info->index_drop_count; + const uint n_added_keys = ha_alter_info->index_add_count; + const uint old_n_keys = m_tbl_def->m_key_count; + uint new_n_keys = altered_table->s->keys; + + if (has_hidden_pk(altered_table)) { new_n_keys += 1; } - std::shared_ptr<Rdb_key_def>* const old_key_descr= m_tbl_def->m_key_descr_arr; - std::shared_ptr<Rdb_key_def>* const new_key_descr= + std::shared_ptr<Rdb_key_def> *const old_key_descr = + m_tbl_def->m_key_descr_arr; + std::shared_ptr<Rdb_key_def> *const new_key_descr = new std::shared_ptr<Rdb_key_def>[new_n_keys]; - Rdb_tbl_def* const new_tdef= new Rdb_tbl_def(m_tbl_def->full_tablename()); - new_tdef->m_key_descr_arr= new_key_descr; - new_tdef->m_key_count= new_n_keys; - new_tdef->m_auto_incr_val= - m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed); - new_tdef->m_hidden_pk_val= - m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); + Rdb_tbl_def *const new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename()); + new_tdef->m_key_descr_arr = new_key_descr; + new_tdef->m_key_count = new_n_keys; + new_tdef->m_auto_incr_val = + m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed); + new_tdef->m_hidden_pk_val = + m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); if (ha_alter_info->handler_flags & - (my_core::Alter_inplace_info::DROP_INDEX | - my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | - my_core::Alter_inplace_info::ADD_INDEX) - && create_key_defs(altered_table, new_tdef, table, m_tbl_def)) - { + (my_core::Alter_inplace_info::DROP_INDEX | + my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | + my_core::Alter_inplace_info::ADD_INDEX | + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX) && + create_key_defs(altered_table, new_tdef, table, m_tbl_def)) { /* Delete the new key descriptors */ delete[] new_key_descr; @@ -9976,9 +9129,13 @@ bool ha_rocksdb::prepare_inplace_alter_table( Explicitly mark as nullptr so we don't accidentally remove entries from data dictionary on cleanup (or cause double delete[]). */ - new_tdef->m_key_descr_arr= nullptr; + new_tdef->m_key_descr_arr = nullptr; delete new_tdef; - DBUG_RETURN(1); + + my_printf_error(ER_UNKNOWN_ERROR, + "MyRocks failed creating new key definitions during alter.", + MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); } std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes; @@ -9986,16 +9143,13 @@ bool ha_rocksdb::prepare_inplace_alter_table( uint i; uint j; - const KEY* key; + const KEY *key; /* Determine which(if any) key definition(s) need to be dropped */ - for (i = 0; i < ha_alter_info->index_drop_count; i++) - { - const KEY* const key = ha_alter_info->index_drop_buffer[i]; - for (j = 0; j < old_n_keys; j++) - { - if (!old_key_descr[j]->m_name.compare(key->name)) - { + for (i = 0; i < ha_alter_info->index_drop_count; i++) { + const KEY *const key = ha_alter_info->index_drop_buffer[i]; + for (j = 0; j < old_n_keys; j++) { + if (!old_key_descr[j]->m_name.compare(key->name)) { dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id()); break; } @@ -10005,14 +9159,10 @@ bool ha_rocksdb::prepare_inplace_alter_table( DBUG_ASSERT(dropped_index_ids.size() == ha_alter_info->index_drop_count); /* Determine which(if any) key definitions(s) need to be added */ - for (i = 0; i < ha_alter_info->index_add_count; i++) - { - key = &ha_alter_info->key_info_buffer[ - ha_alter_info->index_add_buffer[i]]; - for (j = 0; j < new_n_keys; j++) - { - if (!new_key_descr[j]->m_name.compare(key->name)) - { + for (i = 0; i < ha_alter_info->index_add_count; i++) { + key = &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; + for (j = 0; j < new_n_keys; j++) { + if (!new_key_descr[j]->m_name.compare(key->name)) { added_indexes.insert(new_key_descr[j]); break; } @@ -10021,11 +9171,11 @@ bool ha_rocksdb::prepare_inplace_alter_table( DBUG_ASSERT(added_indexes.size() == ha_alter_info->index_add_count); - ha_alter_info->handler_ctx= new Rdb_inplace_alter_ctx( - new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys, - added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys); + ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx( + new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys, + added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys); - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } /** @@ -10048,53 +9198,74 @@ bool ha_rocksdb::prepare_inplace_alter_table( @retval true Error @retval false Success */ -bool ha_rocksdb::inplace_alter_table(TABLE* const altered_table, - my_core::Alter_inplace_info* const ha_alter_info) -{ - /* Do nothing for now */ - DBUG_ENTER("inplace_alter_table"); - DBUG_ASSERT(ha_alter_info->handler_ctx); +bool ha_rocksdb::inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) { + DBUG_ENTER_FUNC(); - Rdb_inplace_alter_ctx* const ctx= - static_cast<Rdb_inplace_alter_ctx*> (ha_alter_info->handler_ctx); + DBUG_ASSERT(altered_table != nullptr); + DBUG_ASSERT(ha_alter_info != nullptr); + DBUG_ASSERT(ha_alter_info->handler_ctx != nullptr); - if (ha_alter_info->handler_flags & my_core::Alter_inplace_info::ADD_INDEX) - { + Rdb_inplace_alter_ctx *const ctx = + static_cast<Rdb_inplace_alter_ctx *>(ha_alter_info->handler_ctx); + + if (ha_alter_info->handler_flags & + (my_core::Alter_inplace_info::ADD_INDEX | + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) { /* Buffers need to be set up again to account for new, possibly longer secondary keys. */ free_key_buffers(); - if (alloc_key_buffers(altered_table, ctx->m_new_tdef)) - { - DBUG_RETURN(1); + + DBUG_ASSERT(ctx != nullptr); + + /* + If adding unique index, allocate special buffers for duplicate checking. + */ + int err; + if ((err = alloc_key_buffers( + altered_table, ctx->m_new_tdef, + ha_alter_info->handler_flags & + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX))) { + my_printf_error(ER_UNKNOWN_ERROR, + "MyRocks failed allocating key buffers during alter.", + MYF(0)); + DBUG_RETURN(err); } - /* Populate all new secondary keys by scanning primary key */ - if (inplace_populate_sk(altered_table, ctx->m_added_indexes)) - { - DBUG_RETURN(1); + /* Populate all new secondary keys by scanning the primary key. */ + if ((err = inplace_populate_sk(altered_table, ctx->m_added_indexes))) { + my_printf_error(ER_UNKNOWN_ERROR, + "MyRocks failed populating secondary key during alter.", + MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); } } - DBUG_EXECUTE_IF("myrocks_simulate_index_create_rollback", DBUG_RETURN(1);); - DBUG_RETURN(0); + DBUG_EXECUTE_IF("myrocks_simulate_index_create_rollback", { + dbug_create_err_inplace_alter(); + DBUG_RETURN(HA_EXIT_FAILURE); + };); + + DBUG_RETURN(HA_EXIT_SUCCESS); } /** Scan the Primary Key index entries and populate the new secondary keys. */ -int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, - const std::unordered_set<std::shared_ptr<Rdb_key_def>>& indexes) -{ - DBUG_ENTER("ha_rocksdb::inplace_populate_sk"); - const std::unique_ptr<rocksdb::WriteBatch> wb= dict_manager.begin(); - rocksdb::WriteBatch* const batch= wb.get(); +int ha_rocksdb::inplace_populate_sk( + TABLE *const new_table_arg, + const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) { + DBUG_ENTER_FUNC(); + + const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); /* Update the data dictionary */ std::unordered_set<GL_INDEX_ID> create_index_ids; - for (const auto& index : indexes) - { + for (const auto &index : indexes) { create_index_ids.insert(index->get_gl_index_id()); } dict_manager.add_create_index(create_index_ids, batch); @@ -10102,8 +9273,8 @@ int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, const bool hidden_pk_exists = has_hidden_pk(table); - int res= 0; - Rdb_transaction *tx= get_or_create_tx(table->in_use); + int res = 0; + Rdb_transaction *tx = get_or_create_tx(table->in_use); /* There is one specific scenario where m_sst_info may not be nullptr. This @@ -10133,28 +9304,26 @@ int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, open handlers have been closed at this point, and the one we're on is the only one left. */ - if (m_sst_info != nullptr) - { - if ((res= finalize_bulk_load())) - { + if (m_sst_info != nullptr) { + if ((res = finalize_bulk_load())) { DBUG_RETURN(res); } tx->commit(); } - const ulonglong rdb_merge_buf_size= THDVAR(ha_thd(), merge_buf_size); - const ulonglong rdb_merge_combine_read_size= THDVAR(ha_thd(), - merge_combine_read_size); + const ulonglong rdb_merge_buf_size = THDVAR(ha_thd(), merge_buf_size); + const ulonglong rdb_merge_combine_read_size = + THDVAR(ha_thd(), merge_combine_read_size); + + for (const auto &index : indexes) { + const rocksdb::Comparator *index_comp = index->get_cf()->GetComparator(); + bool is_unique_index = + new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME; - for (const auto& index : indexes) - { - const rocksdb::Comparator* index_comp= index->get_cf()->GetComparator(); Rdb_index_merge rdb_merge(thd_rocksdb_tmpdir(), rdb_merge_buf_size, - rdb_merge_combine_read_size, - index_comp); + rdb_merge_combine_read_size, index_comp); - if ((res= rdb_merge.init())) - { + if ((res = rdb_merge.init())) { DBUG_RETURN(res); } @@ -10163,17 +9332,14 @@ int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, as the pk index position may have changed in the case of hidden primary keys. */ - const uint pk= pk_index(table, m_tbl_def); + const uint pk = pk_index(table, m_tbl_def); ha_index_init(pk, true); /* Scan each record in the primary key in order */ for (res = index_first(table->record[0]); res == 0; - res = index_next(table->record[0])) - { - longlong hidden_pk_id= 0; - if (hidden_pk_exists && - read_hidden_pk_id_from_rowkey(&hidden_pk_id)) - { + res = index_next(table->record[0])) { + longlong hidden_pk_id = 0; + if (hidden_pk_exists && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { // NO_LINT_DEBUG sql_print_error("Error retrieving hidden pk id."); ha_index_end(); @@ -10181,32 +9347,27 @@ int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, } /* Create new secondary index entry */ - const int new_packed_size= index->pack_record( - new_table_arg, m_pack_buffer, - table->record[0], - m_sk_packed_tuple, &m_sk_tails, - should_store_row_debug_checksums(), - hidden_pk_id); - - const rocksdb::Slice key= rocksdb::Slice( - reinterpret_cast<const char*>(m_sk_packed_tuple), new_packed_size); - const rocksdb::Slice val= rocksdb::Slice( - reinterpret_cast<const char*>(m_sk_tails.ptr()), - m_sk_tails.get_current_pos()); + const int new_packed_size = index->pack_record( + new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple, + &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id); + + const rocksdb::Slice key = rocksdb::Slice( + reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size); + const rocksdb::Slice val = + rocksdb::Slice(reinterpret_cast<const char *>(m_sk_tails.ptr()), + m_sk_tails.get_current_pos()); /* Add record to offset tree in preparation for writing out to disk in sorted chunks. */ - if ((res= rdb_merge.add(key, val))) - { + if ((res = rdb_merge.add(key, val))) { ha_index_end(); DBUG_RETURN(res); } } - if (res != HA_ERR_END_OF_FILE) - { + if (res != HA_ERR_END_OF_FILE) { // NO_LINT_DEBUG sql_print_error("Error retrieving index entry from primary key."); ha_index_end(); @@ -10221,13 +9382,39 @@ int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, */ rocksdb::Slice merge_key; rocksdb::Slice merge_val; - while ((res= rdb_merge.next(&merge_key, &merge_val)) == 0) - { + + struct unique_sk_buf_info sk_info; + sk_info.dup_sk_buf = m_dup_sk_packed_tuple; + sk_info.dup_sk_buf_old = m_dup_sk_packed_tuple_old; + + while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) { + /* Perform uniqueness check if needed */ + if (is_unique_index) { + if (check_duplicate_sk(new_table_arg, *index, &merge_key, &sk_info)) { + /* + Duplicate entry found when trying to create unique secondary key. + We need to unpack the record into new_table_arg->record[0] as it + is used inside print_keydup_error so that the error message shows + the duplicate record. + */ + if (index->unpack_record(new_table_arg, new_table_arg->record[0], + &merge_key, nullptr, + m_verify_row_debug_checksums)) { + /* Should never reach here */ + DBUG_ASSERT(0); + } + + print_keydup_error(new_table_arg, + &new_table_arg->key_info[index->get_keyno()], + MYF(0)); + DBUG_RETURN(ER_DUP_ENTRY); + } + } + /* Insert key and slice to SST via SSTFileWriter API. */ - if ((res= bulk_load_key(tx, *index, merge_key, merge_val))) - { + if ((res = bulk_load_key(tx, *index, merge_key, merge_val))) { break; } } @@ -10236,15 +9423,13 @@ int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, Here, res == -1 means that we are finished, while > 0 means an error occurred. */ - if (res > 0) - { + if (res > 0) { // NO_LINT_DEBUG sql_print_error("Error while bulk loading keys in external merge sort."); DBUG_RETURN(res); } - if ((res= tx->finish_bulk_load())) - { + if ((res = tx->finish_bulk_load())) { // NO_LINT_DEBUG sql_print_error("Error finishing bulk load."); DBUG_RETURN(res); @@ -10294,14 +9479,15 @@ int ha_rocksdb::inplace_populate_sk(const TABLE* const new_table_arg, @retval false Success */ bool ha_rocksdb::commit_inplace_alter_table( - my_core::TABLE* const altered_table, - my_core::Alter_inplace_info* const ha_alter_info, - bool commit) -{ - Rdb_inplace_alter_ctx* const ctx0= - static_cast<Rdb_inplace_alter_ctx*> (ha_alter_info->handler_ctx); + my_core::TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info, bool commit) { + DBUG_ENTER_FUNC(); - DBUG_ENTER("commit_inplace_alter_table"); + DBUG_ASSERT(altered_table != nullptr); + DBUG_ASSERT(ha_alter_info != nullptr); + + Rdb_inplace_alter_ctx *const ctx0 = + static_cast<Rdb_inplace_alter_ctx *>(ha_alter_info->handler_ctx); /* IMPORTANT: When rollback is requested, mysql will abort with @@ -10313,12 +9499,10 @@ bool ha_rocksdb::commit_inplace_alter_table( is done for each partition. A successful commit call only executes once for all partitions. */ - if (!commit) - { + if (!commit) { /* If ctx has not been created yet, nothing to do here */ - if (!ctx0) - { - DBUG_RETURN(0); + if (!ctx0) { + DBUG_RETURN(HA_EXIT_SUCCESS); } /* @@ -10326,12 +9510,10 @@ bool ha_rocksdb::commit_inplace_alter_table( erase the mappings inside the ddl_manager, as the old_key_descr is still using them. */ - if (ctx0->m_new_key_descr) - { + if (ctx0->m_new_key_descr) { /* Delete the new key descriptors */ - for (uint i = 0; i < ctx0->m_new_tdef->m_key_count; i++) - { - ctx0->m_new_key_descr[i]= nullptr; + for (uint i = 0; i < ctx0->m_new_tdef->m_key_count; i++) { + ctx0->m_new_key_descr[i] = nullptr; } delete[] ctx0->m_new_key_descr; @@ -10341,7 +9523,10 @@ bool ha_rocksdb::commit_inplace_alter_table( delete ctx0->m_new_tdef; } - DBUG_RETURN(0); + /* Rollback any partially created indexes */ + dict_manager.rollback_ongoing_index_creation(); + + DBUG_RETURN(HA_EXIT_SUCCESS); } DBUG_ASSERT(ctx0); @@ -10350,17 +9535,13 @@ bool ha_rocksdb::commit_inplace_alter_table( For partitioned tables, we need to commit all changes to all tables at once, unlike in the other inplace alter API methods. */ - inplace_alter_handler_ctx** ctx_array; - inplace_alter_handler_ctx* ctx_single[2]; + inplace_alter_handler_ctx **ctx_array; + inplace_alter_handler_ctx *ctx_single[2]; - if (ha_alter_info->group_commit_ctx) - { - DBUG_EXECUTE_IF("crash_during_index_creation_partition", - DBUG_SUICIDE();); + if (ha_alter_info->group_commit_ctx) { + DBUG_EXECUTE_IF("crash_during_index_creation_partition", DBUG_SUICIDE();); ctx_array = ha_alter_info->group_commit_ctx; - } - else - { + } else { ctx_single[0] = ctx0; ctx_single[1] = nullptr; ctx_array = ctx_single; @@ -10372,32 +9553,29 @@ bool ha_rocksdb::commit_inplace_alter_table( if (ha_alter_info->handler_flags & (my_core::Alter_inplace_info::DROP_INDEX | my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | - my_core::Alter_inplace_info::ADD_INDEX)) - { - const std::unique_ptr<rocksdb::WriteBatch> wb= dict_manager.begin(); - rocksdb::WriteBatch* const batch= wb.get(); + my_core::Alter_inplace_info::ADD_INDEX | + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) { + const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); std::unordered_set<GL_INDEX_ID> create_index_ids; - m_tbl_def= ctx0->m_new_tdef; - m_key_descr_arr= m_tbl_def->m_key_descr_arr; - m_pk_descr= m_key_descr_arr[pk_index(altered_table, m_tbl_def)]; + m_tbl_def = ctx0->m_new_tdef; + m_key_descr_arr = m_tbl_def->m_key_descr_arr; + m_pk_descr = m_key_descr_arr[pk_index(altered_table, m_tbl_def)]; dict_manager.lock(); - for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) - { - Rdb_inplace_alter_ctx* const ctx= - static_cast<Rdb_inplace_alter_ctx*> (*pctx); + for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) { + Rdb_inplace_alter_ctx *const ctx = + static_cast<Rdb_inplace_alter_ctx *>(*pctx); /* Mark indexes to be dropped */ dict_manager.add_drop_index(ctx->m_dropped_index_ids, batch); - for (const auto& index : ctx->m_added_indexes) - { + for (const auto &index : ctx->m_added_indexes) { create_index_ids.insert(index->get_gl_index_id()); } - if (ddl_manager.put_and_write(ctx->m_new_tdef, batch)) - { + if (ddl_manager.put_and_write(ctx->m_new_tdef, batch)) { /* Failed to write new entry into data dictionary, this should never happen. @@ -10406,8 +9584,7 @@ bool ha_rocksdb::commit_inplace_alter_table( } } - if (dict_manager.commit(batch)) - { + if (dict_manager.commit(batch)) { /* Should never reach here. We assume MyRocks will abort if commit fails. */ @@ -10417,8 +9594,8 @@ bool ha_rocksdb::commit_inplace_alter_table( dict_manager.unlock(); /* Mark ongoing create indexes as finished/remove from data dictionary */ - dict_manager.finish_indexes_operation(create_index_ids, - Rdb_key_def::DDL_CREATE_INDEX_ONGOING); + dict_manager.finish_indexes_operation( + create_index_ids, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); /* We need to recalculate the index stats here manually. The reason is that @@ -10427,8 +9604,7 @@ bool ha_rocksdb::commit_inplace_alter_table( prevents us from updating the stats normally as the ddl_manager cannot find the proper gl_index_ids yet during adjust_stats calls. */ - if (calculate_stats(altered_table, nullptr, nullptr)) - { + if (calculate_stats(altered_table, nullptr, nullptr)) { /* Failed to update index statistics, should never happen */ DBUG_ASSERT(0); } @@ -10436,29 +9612,28 @@ bool ha_rocksdb::commit_inplace_alter_table( rdb_drop_idx_thread.signal(); } - DBUG_RETURN(0); + DBUG_RETURN(HA_EXIT_SUCCESS); } #define SHOW_FNAME(name) rocksdb_show_##name -#define DEF_SHOW_FUNC(name, key) \ - static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR *var, char *buff) \ - { \ - rocksdb_status_counters.name = \ - rocksdb_stats->getTickerCount(rocksdb::key); \ - var->type = SHOW_LONGLONG; \ - var->value = (char *)&rocksdb_status_counters.name; \ - return 0; \ +#define DEF_SHOW_FUNC(name, key) \ + static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \ + rocksdb_status_counters.name = \ + rocksdb_stats->getTickerCount(rocksdb::key); \ + var->type = SHOW_LONGLONG; \ + var->value = (char *)&rocksdb_status_counters.name; \ + return HA_EXIT_SUCCESS; \ } -#define DEF_STATUS_VAR(name) \ - {"rocksdb_" #name, (char*) &SHOW_FNAME(name), SHOW_FUNC} +#define DEF_STATUS_VAR(name) \ + { "rocksdb_" #name, (char *)&SHOW_FNAME(name), SHOW_FUNC } -#define DEF_STATUS_VAR_PTR(name, ptr, option) \ - {"rocksdb_" name, (char*) ptr, option} +#define DEF_STATUS_VAR_PTR(name, ptr, option) \ + { "rocksdb_" name, (char *)ptr, option } -#define DEF_STATUS_VAR_FUNC(name, ptr, option) \ - {name, reinterpret_cast<char*>(ptr), option} +#define DEF_STATUS_VAR_FUNC(name, ptr, option) \ + { name, reinterpret_cast<char *>(ptr), option } struct rocksdb_status_counters_t { uint64_t block_cache_miss; @@ -10582,113 +9757,108 @@ static void myrocks_update_status() { export_stats.system_rows_updated = global_stats.system_rows[ROWS_UPDATED]; } -static SHOW_VAR myrocks_status_variables[]= { - DEF_STATUS_VAR_FUNC("rows_deleted", &export_stats.rows_deleted, - SHOW_LONGLONG), - DEF_STATUS_VAR_FUNC("rows_inserted", &export_stats.rows_inserted, - SHOW_LONGLONG), - DEF_STATUS_VAR_FUNC("rows_read", &export_stats.rows_read, SHOW_LONGLONG), - DEF_STATUS_VAR_FUNC("rows_updated", &export_stats.rows_updated, - SHOW_LONGLONG), - DEF_STATUS_VAR_FUNC("system_rows_deleted", &export_stats.system_rows_deleted, - SHOW_LONGLONG), - DEF_STATUS_VAR_FUNC("system_rows_inserted", - &export_stats.system_rows_inserted, SHOW_LONGLONG), - DEF_STATUS_VAR_FUNC("system_rows_read", &export_stats.system_rows_read, - SHOW_LONGLONG), - DEF_STATUS_VAR_FUNC("system_rows_updated", &export_stats.system_rows_updated, - SHOW_LONGLONG), - - {NullS, NullS, SHOW_LONG} -}; - -static void show_myrocks_vars(THD* thd, SHOW_VAR* var, char* buff) { +static SHOW_VAR myrocks_status_variables[] = { + DEF_STATUS_VAR_FUNC("rows_deleted", &export_stats.rows_deleted, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_inserted", &export_stats.rows_inserted, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_read", &export_stats.rows_read, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_updated", &export_stats.rows_updated, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_deleted", + &export_stats.system_rows_deleted, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_inserted", + &export_stats.system_rows_inserted, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_read", &export_stats.system_rows_read, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_updated", + &export_stats.system_rows_updated, SHOW_LONGLONG), + + {NullS, NullS, SHOW_LONG}}; + +static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) { myrocks_update_status(); var->type = SHOW_ARRAY; - var->value = reinterpret_cast<char*>(&myrocks_status_variables); -} - -static SHOW_VAR rocksdb_status_vars[]= { - DEF_STATUS_VAR(block_cache_miss), - DEF_STATUS_VAR(block_cache_hit), - DEF_STATUS_VAR(block_cache_add), - DEF_STATUS_VAR(block_cache_index_miss), - DEF_STATUS_VAR(block_cache_index_hit), - DEF_STATUS_VAR(block_cache_filter_miss), - DEF_STATUS_VAR(block_cache_filter_hit), - DEF_STATUS_VAR(block_cache_data_miss), - DEF_STATUS_VAR(block_cache_data_hit), - DEF_STATUS_VAR(bloom_filter_useful), - DEF_STATUS_VAR(memtable_hit), - DEF_STATUS_VAR(memtable_miss), - DEF_STATUS_VAR(compaction_key_drop_new), - DEF_STATUS_VAR(compaction_key_drop_obsolete), - DEF_STATUS_VAR(compaction_key_drop_user), - DEF_STATUS_VAR(number_keys_written), - DEF_STATUS_VAR(number_keys_read), - DEF_STATUS_VAR(number_keys_updated), - DEF_STATUS_VAR(bytes_written), - DEF_STATUS_VAR(bytes_read), - DEF_STATUS_VAR(no_file_closes), - DEF_STATUS_VAR(no_file_opens), - DEF_STATUS_VAR(no_file_errors), - DEF_STATUS_VAR(l0_slowdown_micros), - DEF_STATUS_VAR(memtable_compaction_micros), - DEF_STATUS_VAR(l0_num_files_stall_micros), - DEF_STATUS_VAR(rate_limit_delay_millis), - DEF_STATUS_VAR(num_iterators), - DEF_STATUS_VAR(number_multiget_get), - DEF_STATUS_VAR(number_multiget_keys_read), - DEF_STATUS_VAR(number_multiget_bytes_read), - DEF_STATUS_VAR(number_deletes_filtered), - DEF_STATUS_VAR(number_merge_failures), - DEF_STATUS_VAR(bloom_filter_prefix_checked), - DEF_STATUS_VAR(bloom_filter_prefix_useful), - DEF_STATUS_VAR(number_reseeks_iteration), - DEF_STATUS_VAR(getupdatessince_calls), - DEF_STATUS_VAR(block_cachecompressed_miss), - DEF_STATUS_VAR(block_cachecompressed_hit), - DEF_STATUS_VAR(wal_synced), - DEF_STATUS_VAR(wal_bytes), - DEF_STATUS_VAR(write_self), - DEF_STATUS_VAR(write_other), - DEF_STATUS_VAR(write_timedout), - DEF_STATUS_VAR(write_wal), - DEF_STATUS_VAR(flush_write_bytes), - DEF_STATUS_VAR(compact_read_bytes), - DEF_STATUS_VAR(compact_write_bytes), - DEF_STATUS_VAR(number_superversion_acquires), - DEF_STATUS_VAR(number_superversion_releases), - DEF_STATUS_VAR(number_superversion_cleanups), - DEF_STATUS_VAR(number_block_not_compressed), - DEF_STATUS_VAR_PTR("snapshot_conflict_errors", - &rocksdb_snapshot_conflict_errors, - SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("wal_group_syncs", - &rocksdb_wal_group_syncs, - SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_stat_computes", &rocksdb_number_stat_computes, SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put, - SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete, - SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_sst_entry_singledelete", - &rocksdb_num_sst_entry_singledelete, SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_sst_entry_merge", &rocksdb_num_sst_entry_merge, - SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other, - SHOW_LONGLONG), - {"rocksdb", reinterpret_cast<char*>(&show_myrocks_vars), SHOW_FUNC}, - {NullS, NullS, SHOW_LONG} -}; - + var->value = reinterpret_cast<char *>(&myrocks_status_variables); +} + +static SHOW_VAR rocksdb_status_vars[] = { + DEF_STATUS_VAR(block_cache_miss), + DEF_STATUS_VAR(block_cache_hit), + DEF_STATUS_VAR(block_cache_add), + DEF_STATUS_VAR(block_cache_index_miss), + DEF_STATUS_VAR(block_cache_index_hit), + DEF_STATUS_VAR(block_cache_filter_miss), + DEF_STATUS_VAR(block_cache_filter_hit), + DEF_STATUS_VAR(block_cache_data_miss), + DEF_STATUS_VAR(block_cache_data_hit), + DEF_STATUS_VAR(bloom_filter_useful), + DEF_STATUS_VAR(memtable_hit), + DEF_STATUS_VAR(memtable_miss), + DEF_STATUS_VAR(compaction_key_drop_new), + DEF_STATUS_VAR(compaction_key_drop_obsolete), + DEF_STATUS_VAR(compaction_key_drop_user), + DEF_STATUS_VAR(number_keys_written), + DEF_STATUS_VAR(number_keys_read), + DEF_STATUS_VAR(number_keys_updated), + DEF_STATUS_VAR(bytes_written), + DEF_STATUS_VAR(bytes_read), + DEF_STATUS_VAR(no_file_closes), + DEF_STATUS_VAR(no_file_opens), + DEF_STATUS_VAR(no_file_errors), + DEF_STATUS_VAR(l0_slowdown_micros), + DEF_STATUS_VAR(memtable_compaction_micros), + DEF_STATUS_VAR(l0_num_files_stall_micros), + DEF_STATUS_VAR(rate_limit_delay_millis), + DEF_STATUS_VAR(num_iterators), + DEF_STATUS_VAR(number_multiget_get), + DEF_STATUS_VAR(number_multiget_keys_read), + DEF_STATUS_VAR(number_multiget_bytes_read), + DEF_STATUS_VAR(number_deletes_filtered), + DEF_STATUS_VAR(number_merge_failures), + DEF_STATUS_VAR(bloom_filter_prefix_checked), + DEF_STATUS_VAR(bloom_filter_prefix_useful), + DEF_STATUS_VAR(number_reseeks_iteration), + DEF_STATUS_VAR(getupdatessince_calls), + DEF_STATUS_VAR(block_cachecompressed_miss), + DEF_STATUS_VAR(block_cachecompressed_hit), + DEF_STATUS_VAR(wal_synced), + DEF_STATUS_VAR(wal_bytes), + DEF_STATUS_VAR(write_self), + DEF_STATUS_VAR(write_other), + DEF_STATUS_VAR(write_timedout), + DEF_STATUS_VAR(write_wal), + DEF_STATUS_VAR(flush_write_bytes), + DEF_STATUS_VAR(compact_read_bytes), + DEF_STATUS_VAR(compact_write_bytes), + DEF_STATUS_VAR(number_superversion_acquires), + DEF_STATUS_VAR(number_superversion_releases), + DEF_STATUS_VAR(number_superversion_cleanups), + DEF_STATUS_VAR(number_block_not_compressed), + DEF_STATUS_VAR_PTR("snapshot_conflict_errors", + &rocksdb_snapshot_conflict_errors, SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("wal_group_syncs", &rocksdb_wal_group_syncs, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_stat_computes", &rocksdb_number_stat_computes, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_singledelete", + &rocksdb_num_sst_entry_singledelete, SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_merge", &rocksdb_num_sst_entry_merge, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other, + SHOW_LONGLONG), + {"rocksdb", reinterpret_cast<char *>(&show_myrocks_vars), SHOW_FUNC}, + {NullS, NullS, SHOW_LONG}}; /* Background thread's main logic */ -void Rdb_background_thread::run() -{ +void Rdb_background_thread::run() { // How many seconds to wait till flushing the WAL next time. const int WAKE_UP_INTERVAL = 1; @@ -10696,24 +9866,22 @@ void Rdb_background_thread::run() clock_gettime(CLOCK_REALTIME, &ts_next_sync); ts_next_sync.tv_sec += WAKE_UP_INTERVAL; - for (;;) - { + for (;;) { // Wait until the next timeout or until we receive a signal to stop the // thread. Request to stop the thread should only be triggered when the // storage engine is being unloaded. mysql_mutex_lock(&m_signal_mutex); - const auto ret __attribute__((__unused__)) = mysql_cond_timedwait( - &m_signal_cond, &m_signal_mutex, &ts_next_sync); + const auto ret MY_ATTRIBUTE((__unused__)) = + mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts_next_sync); // Check that we receive only the expected error codes. DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT); - const bool local_stop= m_stop; - const bool local_save_stats= m_save_stats; + const bool local_stop = m_stop; + const bool local_save_stats = m_save_stats; reset(); mysql_mutex_unlock(&m_signal_mutex); - if (local_stop) - { + if (local_stop) { // If we're here then that's because condition variable was signaled by // another thread and we're shutting down. Break out the loop to make // sure that shutdown thread can proceed. @@ -10723,8 +9891,7 @@ void Rdb_background_thread::run() // This path should be taken only when the timer expired. DBUG_ASSERT(ret == ETIMEDOUT); - if (local_save_stats) - { + if (local_save_stats) { ddl_manager.persist_stats(); } @@ -10732,10 +9899,9 @@ void Rdb_background_thread::run() clock_gettime(CLOCK_REALTIME, &ts); // Flush the WAL. - if (rdb && rocksdb_background_sync) - { + if (rdb && rocksdb_background_sync) { DBUG_ASSERT(!rocksdb_db_options.allow_mmap_writes); - const rocksdb::Status s= rdb->SyncWAL(); + const rocksdb::Status s = rdb->SyncWAL(); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD); } @@ -10743,14 +9909,13 @@ void Rdb_background_thread::run() // Set the next timestamp for mysql_cond_timedwait() (which ends up calling // pthread_cond_timedwait()) to wait on. - ts_next_sync.tv_sec= ts.tv_sec + WAKE_UP_INTERVAL; + ts_next_sync.tv_sec = ts.tv_sec + WAKE_UP_INTERVAL; } // save remaining stats which might've left unsaved ddl_manager.persist_stats(); } - /** Deciding if it is possible to use bloom filter or not. @@ -10769,22 +9934,17 @@ void Rdb_background_thread::run() @param use_all_keys True if all key parts are set with equal conditions. This is aware of extended keys. */ -bool can_use_bloom_filter(THD *thd, - const Rdb_key_def& kd, +bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, - const bool use_all_keys, - bool is_ascending) -{ - bool can_use= false; + const bool use_all_keys, bool is_ascending) { + bool can_use = false; - if (THDVAR(thd, skip_bloom_filter_on_read)) - { + if (THDVAR(thd, skip_bloom_filter_on_read)) { return can_use; } - rocksdb::Options opt = rdb->GetOptions(kd.get_cf()); - if (opt.prefix_extractor) - { + const rocksdb::SliceTransform *prefix_extractor = kd.get_extractor(); + if (prefix_extractor) { /* This is an optimized use case for CappedPrefixTransform. If eq_cond length >= prefix extractor length and if @@ -10803,92 +9963,113 @@ bool can_use_bloom_filter(THD *thd, shorter require all parts of the key to be available for the short key match. */ - if (use_all_keys && opt.prefix_extractor->InRange(eq_cond)) - can_use= true; + if (use_all_keys && prefix_extractor->InRange(eq_cond)) + can_use = true; else if (!is_ascending) - can_use= false; - else if (opt.prefix_extractor->SameResultWhenAppended(eq_cond)) - can_use= true; + can_use = false; + else if (prefix_extractor->SameResultWhenAppended(eq_cond)) + can_use = true; else - can_use= false; - } else - { + can_use = false; + } else { /* if prefix extractor is not defined, all key parts have to be used by eq_cond. */ if (use_all_keys) - can_use= true; + can_use = true; else - can_use= false; + can_use = false; } return can_use; } /* For modules that need access to the global data structures */ -rocksdb::TransactionDB *rdb_get_rocksdb_db() -{ - return rdb; -} +rocksdb::TransactionDB *rdb_get_rocksdb_db() { return rdb; } -Rdb_cf_manager& rdb_get_cf_manager() -{ - return cf_manager; -} +Rdb_cf_manager &rdb_get_cf_manager() { return cf_manager; } -rocksdb::BlockBasedTableOptions& rdb_get_table_options() -{ +rocksdb::BlockBasedTableOptions &rdb_get_table_options() { return rocksdb_tbl_options; } - -int rdb_get_table_perf_counters(const char* const tablename, - Rdb_perf_counters* const counters) -{ +int rdb_get_table_perf_counters(const char *const tablename, + Rdb_perf_counters *const counters) { DBUG_ASSERT(counters != nullptr); DBUG_ASSERT(tablename != nullptr); Rdb_table_handler *table_handler; - table_handler= rdb_open_tables.get_table_handler(tablename); - if (table_handler == nullptr) - { + table_handler = rdb_open_tables.get_table_handler(tablename); + if (table_handler == nullptr) { return HA_ERR_INTERNAL_ERROR; } counters->load(table_handler->m_table_perf_context); rdb_open_tables.release_table_handler(table_handler); - return 0; + return HA_EXIT_SUCCESS; +} + +const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) { + // If this assertion fails then this means that a member has been either added + // to or removed from RDB_IO_ERROR_TYPE enum and this function needs to be + // changed to return the appropriate value. + static_assert(RDB_IO_ERROR_LAST == 4, "Please handle all the error types."); + + switch (err_type) { + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT: + return "RDB_IO_ERROR_TX_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT: + return "RDB_IO_ERROR_DICT_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD: + return "RDB_IO_ERROR_BG_THREAD"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL: + return "RDB_IO_ERROR_GENERAL"; + default: + DBUG_ASSERT(false); + return "(unknown)"; + } } +// In case of core dump generation we want this function NOT to be optimized +// so that we can capture as much data as possible to debug the root cause +// more efficiently. +#pragma GCC push_options +#pragma GCC optimize("O0") -void rdb_handle_io_error(rocksdb::Status status, RDB_IO_ERROR_TYPE err_type) -{ - if (status.IsIOError()) - { +void rdb_handle_io_error(const rocksdb::Status status, + const RDB_IO_ERROR_TYPE err_type) { + if (status.IsIOError()) { switch (err_type) { case RDB_IO_ERROR_TX_COMMIT: - case RDB_IO_ERROR_DICT_COMMIT: - { - sql_print_error("RocksDB: Failed to write to WAL - status %d, %s", - status.code(), status.ToString().c_str()); - sql_print_error("RocksDB: Aborting on WAL write error."); + case RDB_IO_ERROR_DICT_COMMIT: { + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: failed to write to WAL. Error type = %s, " + "status code = %d, status = %s", + get_rdb_io_error_string(err_type), status.code(), + status.ToString().c_str()); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); abort_with_stack_traces(); break; } - case RDB_IO_ERROR_BG_THREAD: - { - sql_print_warning("RocksDB: BG Thread failed to write to RocksDB " - "- status %d, %s", status.code(), + case RDB_IO_ERROR_BG_THREAD: { + /* NO_LINT_DEBUG */ + sql_print_warning("MyRocks: BG thread failed to write to RocksDB. " + "Error type = %s, status code = %d, status = %s", + get_rdb_io_error_string(err_type), status.code(), status.ToString().c_str()); break; } - case RDB_IO_ERROR_GENERAL: - { - sql_print_error("RocksDB: Failed on I/O - status %d, %s", - status.code(), status.ToString().c_str()); - sql_print_error("RocksDB: Aborting on I/O error."); + case RDB_IO_ERROR_GENERAL: { + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: failed on I/O. Error type = %s, " + "status code = %d, status = %s", + get_rdb_io_error_string(err_type), status.code(), + status.ToString().c_str()); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on I/O error."); abort_with_stack_traces(); break; } @@ -10896,89 +10077,77 @@ void rdb_handle_io_error(rocksdb::Status status, RDB_IO_ERROR_TYPE err_type) DBUG_ASSERT(0); break; } - } - else if (status.IsCorruption()) - { + } else if (status.IsCorruption()) { /* NO_LINT_DEBUG */ - sql_print_error("RocksDB: Data Corruption detected! %d, %s", - status.code(), status.ToString().c_str()); + sql_print_error("MyRocks: data corruption detected! Error type = %s, " + "status code = %d, status = %s", + get_rdb_io_error_string(err_type), status.code(), + status.ToString().c_str()); /* NO_LINT_DEBUG */ - sql_print_error("RocksDB: Aborting because of data corruption."); + sql_print_error("MyRocks: aborting because of data corruption."); abort_with_stack_traces(); - } - else if (!status.ok()) - { + } else if (!status.ok()) { switch (err_type) { - case RDB_IO_ERROR_DICT_COMMIT: - { - sql_print_error("RocksDB: Failed to write to WAL (dictionary) - " - "status %d, %s", - status.code(), status.ToString().c_str()); - sql_print_error("RocksDB: Aborting on WAL write error."); + case RDB_IO_ERROR_DICT_COMMIT: { + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: failed to write to WAL (dictionary). " + "Error type = %s, status code = %d, status = %s", + get_rdb_io_error_string(err_type), status.code(), + status.ToString().c_str()); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); abort_with_stack_traces(); break; } default: - sql_print_warning("RocksDB: Failed to read/write in RocksDB " - "- status %d, %s", status.code(), + /* NO_LINT_DEBUG */ + sql_print_warning("MyRocks: failed to read/write in RocksDB. " + "Error type = %s, status code = %d, status = %s", + get_rdb_io_error_string(err_type), status.code(), status.ToString().c_str()); break; } } } -Rdb_dict_manager *rdb_get_dict_manager(void) -{ - return &dict_manager; -} +#pragma GCC pop_options -Rdb_ddl_manager *rdb_get_ddl_manager(void) -{ - return &ddl_manager; -} +Rdb_dict_manager *rdb_get_dict_manager(void) { return &dict_manager; } -Rdb_binlog_manager *rdb_get_binlog_manager(void) -{ - return &binlog_manager; -} +Rdb_ddl_manager *rdb_get_ddl_manager(void) { return &ddl_manager; } +Rdb_binlog_manager *rdb_get_binlog_manager(void) { return &binlog_manager; } -void -rocksdb_set_compaction_options( - my_core::THD* const thd __attribute__((__unused__)), - my_core::st_mysql_sys_var* const var __attribute__((__unused__)), - void* const var_ptr, - const void* const save) -{ +void rocksdb_set_compaction_options( + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr, const void *const save) { if (var_ptr && save) { - *(uint64_t*)var_ptr = *(const uint64_t*) save; + *(uint64_t *)var_ptr = *(const uint64_t *)save; } const Rdb_compact_params params = { - (uint64_t)rocksdb_compaction_sequential_deletes, - (uint64_t)rocksdb_compaction_sequential_deletes_window, - (uint64_t)rocksdb_compaction_sequential_deletes_file_size - }; + (uint64_t)rocksdb_compaction_sequential_deletes, + (uint64_t)rocksdb_compaction_sequential_deletes_window, + (uint64_t)rocksdb_compaction_sequential_deletes_file_size}; if (properties_collector_factory) { properties_collector_factory->SetCompactionParams(params); } } void rocksdb_set_table_stats_sampling_pct( - my_core::THD* const thd __attribute__((__unused__)), - my_core::st_mysql_sys_var* const var __attribute__((__unused__)), - void* const var_ptr __attribute__((__unused__)), - const void* const save) -{ + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { mysql_mutex_lock(&rdb_sysvars_mutex); - const uint32_t new_val= *static_cast<const uint32_t*>(save); + const uint32_t new_val = *static_cast<const uint32_t *>(save); if (new_val != rocksdb_table_stats_sampling_pct) { rocksdb_table_stats_sampling_pct = new_val; if (properties_collector_factory) { properties_collector_factory->SetTableStatsSamplingPct( - rocksdb_table_stats_sampling_pct); + rocksdb_table_stats_sampling_pct); } } @@ -10994,105 +10163,113 @@ void rocksdb_set_table_stats_sampling_pct( This is similar to the code in innodb_doublewrite_update (found in storage/innobase/handler/ha_innodb.cc). */ -void -rocksdb_set_rate_limiter_bytes_per_sec( - my_core::THD* const thd, - my_core::st_mysql_sys_var* const var __attribute__((__unused__)), - void* const var_ptr __attribute__((__unused__)), - const void* const save) -{ - const uint64_t new_val= *static_cast<const uint64_t*>(save); - if (new_val == 0 || rocksdb_rate_limiter_bytes_per_sec == 0) - { +void rocksdb_set_rate_limiter_bytes_per_sec( + my_core::THD *const thd, + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + const uint64_t new_val = *static_cast<const uint64_t *>(save); + if (new_val == 0 || rocksdb_rate_limiter_bytes_per_sec == 0) { /* If a rate_limiter was not enabled at startup we can't change it nor can we disable it if one was created at startup */ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS, "RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot " "be dynamically changed to or from 0. Do a clean " "shutdown if you want to change it from or to 0."); - } - else if (new_val != rocksdb_rate_limiter_bytes_per_sec) - { + } else if (new_val != rocksdb_rate_limiter_bytes_per_sec) { /* Apply the new value to the rate limiter and store it locally */ DBUG_ASSERT(rocksdb_rate_limiter != nullptr); - rocksdb_rate_limiter_bytes_per_sec= new_val; + rocksdb_rate_limiter_bytes_per_sec = new_val; rocksdb_rate_limiter->SetBytesPerSecond(new_val); } } -void rdb_set_collation_exception_list(const char* const exception_list) -{ +void rdb_set_collation_exception_list(const char *const exception_list) { DBUG_ASSERT(rdb_collation_exceptions != nullptr); - if (!rdb_collation_exceptions->set_patterns(exception_list)) - { + if (!rdb_collation_exceptions->set_patterns(exception_list)) { my_core::warn_about_bad_patterns(rdb_collation_exceptions, "strict_collation_exceptions"); } } -void -rocksdb_set_collation_exception_list(THD* const thd, - struct st_mysql_sys_var* const var, - void* const var_ptr, - const void* const save) -{ - const char* const val = *static_cast<const char*const*>(save); +void rocksdb_set_collation_exception_list(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) { + const char *const val = *static_cast<const char *const *>(save); rdb_set_collation_exception_list(val); - *static_cast<const char**>(var_ptr) = val; + *static_cast<const char **>(var_ptr) = val; } -void -rocksdb_set_bulk_load(THD* const thd, - struct st_mysql_sys_var* const var __attribute__((__unused__)), - void* const var_ptr, - const void* const save) -{ - Rdb_transaction*& tx= get_tx_from_thd(thd); +void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var + MY_ATTRIBUTE((__unused__)), + void *const var_ptr, const void *const save) { + Rdb_transaction *&tx = get_tx_from_thd(thd); - if (tx != nullptr) - { - const int rc= tx->finish_bulk_load(); - if (rc != 0) - { + if (tx != nullptr) { + const int rc = tx->finish_bulk_load(); + if (rc != 0) { // NO_LINT_DEBUG sql_print_error("RocksDB: Error %d finalizing last SST file while " - "setting bulk loading variable", rc); + "setting bulk loading variable", + rc); abort_with_stack_traces(); } } - *static_cast<bool*>(var_ptr) = *static_cast<const bool*>(save); + *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save); } -void rdb_queue_save_stats_request() -{ - rdb_bg_thread.request_save_stats(); +static void rocksdb_set_max_background_compactions( + THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) { + DBUG_ASSERT(save != nullptr); + + mysql_mutex_lock(&rdb_sysvars_mutex); + rocksdb_db_options.max_background_compactions = + *static_cast<const int *>(save); + rocksdb_db_options.env->SetBackgroundThreads( + rocksdb_db_options.max_background_compactions, + rocksdb::Env::Priority::LOW); + mysql_mutex_unlock(&rdb_sysvars_mutex); } -void ha_rocksdb::rpl_before_delete_rows() -{ - m_in_rpl_delete_rows= true; +void rdb_queue_save_stats_request() { rdb_bg_thread.request_save_stats(); } + +void ha_rocksdb::rpl_before_delete_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_delete_rows = true; + + DBUG_VOID_RETURN; } -void ha_rocksdb::rpl_after_delete_rows() -{ - m_in_rpl_delete_rows= false; +void ha_rocksdb::rpl_after_delete_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_delete_rows = false; + + DBUG_VOID_RETURN; } -void ha_rocksdb::rpl_before_update_rows() -{ - m_in_rpl_update_rows= true; +void ha_rocksdb::rpl_before_update_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_update_rows = true; + + DBUG_VOID_RETURN; } -void ha_rocksdb::rpl_after_update_rows() -{ - m_in_rpl_update_rows= false; +void ha_rocksdb::rpl_after_update_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_update_rows = false; + + DBUG_VOID_RETURN; } /** @@ -11102,22 +10279,25 @@ void ha_rocksdb::rpl_after_update_rows() on UPDATE or DELETE row events, and table must have user defined primary key. */ -bool ha_rocksdb::use_read_free_rpl() -{ - return ((m_in_rpl_delete_rows || m_in_rpl_update_rows) && - !has_hidden_pk(table) && m_use_read_free_rpl); +bool ha_rocksdb::use_read_free_rpl() { + DBUG_ENTER_FUNC(); + + DBUG_RETURN((m_in_rpl_delete_rows || m_in_rpl_update_rows) && + !has_hidden_pk(table) && m_use_read_free_rpl); } -double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) -{ +double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) { + DBUG_ENTER_FUNC(); + if (index != table->s->primary_key) { /* Non covering index range scan */ - return(handler::read_time(index, ranges, rows)); + DBUG_RETURN(handler::read_time(index, ranges, rows)); } - return (rows/20.0) + 1; + + DBUG_RETURN((rows / 20.0) + 1); } -} // namespace myrocks +} // namespace myrocks /* Register the storage engine plugin outside of myrocks namespace @@ -11125,33 +10305,27 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) its name generation. */ -struct st_mysql_storage_engine rocksdb_storage_engine= -{ MYSQL_HANDLERTON_INTERFACE_VERSION }; - -mysql_declare_plugin(rocksdb_se) -{ - MYSQL_STORAGE_ENGINE_PLUGIN, /* Plugin Type */ - &rocksdb_storage_engine, /* Plugin Descriptor */ - "ROCKSDB", /* Plugin Name */ - "Monty Program Ab", /* Plugin Author */ - "RocksDB storage engine", /* Plugin Description */ - PLUGIN_LICENSE_GPL, /* Plugin Licence */ - myrocks::rocksdb_init_func, /* Plugin Entry Point */ - myrocks::rocksdb_done_func, /* Plugin Deinitializer */ - 0x0001, /* version number (0.1) */ - myrocks::rocksdb_status_vars, /* status variables */ - myrocks::rocksdb_system_variables, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_storage_engine rocksdb_storage_engine = { + MYSQL_HANDLERTON_INTERFACE_VERSION}; + +mysql_declare_plugin(rocksdb_se){ + MYSQL_STORAGE_ENGINE_PLUGIN, /* Plugin Type */ + &rocksdb_storage_engine, /* Plugin Descriptor */ + "ROCKSDB", /* Plugin Name */ + "Monty Program Ab", /* Plugin Author */ + "RocksDB storage engine", /* Plugin Description */ + PLUGIN_LICENSE_GPL, /* Plugin Licence */ + myrocks::rocksdb_init_func, /* Plugin Entry Point */ + myrocks::rocksdb_done_func, /* Plugin Deinitializer */ + 0x0001, /* version number (0.1) */ + myrocks::rocksdb_status_vars, /* status variables */ + myrocks::rocksdb_system_variables, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }, -myrocks::rdb_i_s_cfstats, -myrocks::rdb_i_s_dbstats, -myrocks::rdb_i_s_perf_context, -myrocks::rdb_i_s_perf_context_global, -myrocks::rdb_i_s_cfoptions, -myrocks::rdb_i_s_global_info, -myrocks::rdb_i_s_ddl, -myrocks::rdb_i_s_index_file_map, -myrocks::rdb_i_s_lock_info, -myrocks::rdb_i_s_trx_info -mysql_declare_plugin_end; + myrocks::rdb_i_s_cfstats, myrocks::rdb_i_s_dbstats, + myrocks::rdb_i_s_perf_context, myrocks::rdb_i_s_perf_context_global, + myrocks::rdb_i_s_cfoptions, myrocks::rdb_i_s_compact_stats, + myrocks::rdb_i_s_global_info, myrocks::rdb_i_s_ddl, + myrocks::rdb_i_s_index_file_map, myrocks::rdb_i_s_lock_info, + myrocks::rdb_i_s_trx_info mysql_declare_plugin_end; diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index a15ae6dd443..db95a4d919a 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -16,7 +16,7 @@ #pragma once #ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ +#pragma interface /* gcc class implementation */ #endif /* C++ standard header files */ @@ -26,8 +26,8 @@ #include <vector> /* MySQL header files */ -#include "./handler.h" /* handler */ -#include "./my_global.h" /* ulonglong */ +#include "./handler.h" /* handler */ +#include "./my_global.h" /* ulonglong */ #include "./sql_string.h" #include "./ut0counter.h" @@ -93,41 +93,51 @@ std::vector<Rdb_trx_info> rdb_get_all_trx_info(); - the name used to set the default column family parameter for per-cf arguments. */ -const char * const DEFAULT_CF_NAME= "default"; +const char *const DEFAULT_CF_NAME = "default"; /* This is the name of the Column Family used for storing the data dictionary. */ -const char * const DEFAULT_SYSTEM_CF_NAME= "__system__"; +const char *const DEFAULT_SYSTEM_CF_NAME = "__system__"; /* This is the name of the hidden primary key for tables with no pk. */ -const char * const HIDDEN_PK_NAME= "HIDDEN_PK_ID"; +const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID"; /* Column family name which means "put this index into its own column family". See Rdb_cf_manager::get_per_index_cf_name(). */ -const char * const PER_INDEX_CF_NAME = "$per_index_cf"; +const char *const PER_INDEX_CF_NAME = "$per_index_cf"; + +/* + Name for the background thread. +*/ +const char *const BG_THREAD_NAME = "myrocks-bg"; + +/* + Name for the drop index thread. +*/ +const char *const INDEX_THREAD_NAME = "myrocks-index"; /* Default, minimal valid, and maximum valid sampling rate values when collecting statistics about table. */ -#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10 -#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1 -#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100 +#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10 +#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1 +#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100 /* Default and maximum values for rocksdb-compaction-sequential-deletes and rocksdb-compaction-sequential-deletes-window to add basic boundary checking. */ -#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0 -#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000 +#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0 +#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000 -#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0 -#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000 +#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0 +#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000 /* Default and maximum values for various compaction and flushing related @@ -139,11 +149,11 @@ const char * const PER_INDEX_CF_NAME = "$per_index_cf"; CPU-s and derive the values from there. This however has its own set of problems and we'll choose simplicity for now. */ -#define MAX_BACKGROUND_COMPACTIONS 64 -#define MAX_BACKGROUND_FLUSHES 64 +#define MAX_BACKGROUND_COMPACTIONS 64 +#define MAX_BACKGROUND_FLUSHES 64 -#define DEFAULT_SUBCOMPACTIONS 1 -#define MAX_SUBCOMPACTIONS 64 +#define DEFAULT_SUBCOMPACTIONS 1 +#define MAX_SUBCOMPACTIONS 64 /* Defines the field sizes for serializing XID object to a string representation. @@ -167,7 +177,7 @@ const char * const PER_INDEX_CF_NAME = "$per_index_cf"; The reason behind the cast issue is the lack of unsigned int support in Java. */ -#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LONGLONG_MAX) +#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LONGLONG_MAX) /* Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes). @@ -179,14 +189,13 @@ const char * const PER_INDEX_CF_NAME = "$per_index_cf"; MyRocks specific error codes. NB! Please make sure that you will update HA_ERR_ROCKSDB_LAST when adding new ones. */ -#define HA_ERR_ROCKSDB_UNIQUE_NOT_SUPPORTED (HA_ERR_LAST + 1) -#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_LAST + 2) -#define HA_ERR_ROCKSDB_TOO_MANY_LOCKS (HA_ERR_LAST + 3) -#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_TOO_MANY_LOCKS - -inline bool looks_like_per_index_cf_typo(const char* const name) -{ - return (name && name[0]=='$' && strcmp(name, PER_INDEX_CF_NAME)); +#define HA_ERR_ROCKSDB_UNIQUE_NOT_SUPPORTED (HA_ERR_LAST + 1) +#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_LAST + 2) +#define HA_ERR_ROCKSDB_TOO_MANY_LOCKS (HA_ERR_LAST + 3) +#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_TOO_MANY_LOCKS + +inline bool looks_like_per_index_cf_typo(const char *const name) { + return (name && name[0] == '$' && strcmp(name, PER_INDEX_CF_NAME)); } /** @@ -196,21 +205,18 @@ inline bool looks_like_per_index_cf_typo(const char* const name) //TODO: join this with Rdb_tbl_def ? */ -struct Rdb_table_handler -{ - char *m_table_name; - uint m_table_name_length; - int m_ref_count; +struct Rdb_table_handler { + char *m_table_name; + uint m_table_name_length; + int m_ref_count; - my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock + my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock /* Stores cumulative table statistics */ - my_io_perf_atomic_t m_io_perf_read; - Rdb_atomic_perf_counters m_table_perf_context; + my_io_perf_atomic_t m_io_perf_read; + Rdb_atomic_perf_counters m_table_perf_context; }; - - class Rdb_key_def; class Rdb_tbl_def; class Rdb_transaction; @@ -218,38 +224,32 @@ class Rdb_transaction_impl; class Rdb_writebatch_impl; class Rdb_field_encoder; -const char *const rocksdb_hton_name= "ROCKSDB"; +const char *const rocksdb_hton_name = "ROCKSDB"; typedef struct _gl_index_id_s { uint32_t cf_id; uint32_t index_id; - bool operator==(const struct _gl_index_id_s& other) const - { + bool operator==(const struct _gl_index_id_s &other) const { return cf_id == other.cf_id && index_id == other.index_id; } - bool operator!=(const struct _gl_index_id_s& other) const - { + bool operator!=(const struct _gl_index_id_s &other) const { return cf_id != other.cf_id || index_id != other.index_id; } - bool operator<(const struct _gl_index_id_s& other) const - { + bool operator<(const struct _gl_index_id_s &other) const { return cf_id < other.cf_id || - (cf_id == other.cf_id && index_id < other.index_id); + (cf_id == other.cf_id && index_id < other.index_id); } - bool operator<=(const struct _gl_index_id_s& other) const - { + bool operator<=(const struct _gl_index_id_s &other) const { return cf_id < other.cf_id || - (cf_id == other.cf_id && index_id <= other.index_id); + (cf_id == other.cf_id && index_id <= other.index_id); } - bool operator>(const struct _gl_index_id_s& other) const - { + bool operator>(const struct _gl_index_id_s &other) const { return cf_id > other.cf_id || - (cf_id == other.cf_id && index_id > other.index_id); + (cf_id == other.cf_id && index_id > other.index_id); } - bool operator>=(const struct _gl_index_id_s& other) const - { + bool operator>=(const struct _gl_index_id_s &other) const { return cf_id > other.cf_id || - (cf_id == other.cf_id && index_id >= other.index_id); + (cf_id == other.cf_id && index_id >= other.index_id); } } GL_INDEX_ID; @@ -289,23 +289,20 @@ struct st_export_stats { ulonglong system_rows_updated; }; -} // namespace myrocks +} // namespace myrocks #include "./rdb_buff.h" /* Provide hash function for GL_INDEX_ID so we can include it in sets */ namespace std { - template <> - struct hash<myrocks::GL_INDEX_ID> - { - std::size_t operator()(const myrocks::GL_INDEX_ID& gl_index_id) const - { - const uint64_t val = ((uint64_t) gl_index_id.cf_id << 32 | - (uint64_t) gl_index_id.index_id); - return std::hash<uint64_t>()(val); - } - }; -} // namespace std +template <> struct hash<myrocks::GL_INDEX_ID> { + std::size_t operator()(const myrocks::GL_INDEX_ID &gl_index_id) const { + const uint64_t val = + ((uint64_t)gl_index_id.cf_id << 32 | (uint64_t)gl_index_id.index_id); + return std::hash<uint64_t>()(val); + } +}; +} // namespace std namespace myrocks { @@ -314,11 +311,10 @@ namespace myrocks { Class definition for ROCKSDB storage engine plugin handler */ -class ha_rocksdb: public my_core::handler -{ - my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock +class ha_rocksdb : public my_core::handler { + my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock - Rdb_table_handler *m_table_handler; ///< Open table handler + Rdb_table_handler *m_table_handler; ///< Open table handler /* Iterator used for range scans and for full table/index scans */ rocksdb::Iterator *m_scan_it; @@ -354,7 +350,7 @@ class ha_rocksdb: public my_core::handler */ bool m_maybe_unpack_info; - uchar *m_pk_tuple; /* Buffer for storing PK in KeyTupleFormat */ + uchar *m_pk_tuple; /* Buffer for storing PK in KeyTupleFormat */ uchar *m_pk_packed_tuple; /* Buffer for storing PK in StorageFormat */ // ^^ todo: change it to 'char*'? TODO: ^ can we join this with last_rowkey? @@ -387,6 +383,10 @@ class ha_rocksdb: public my_core::handler uchar *m_sk_packed_tuple_old; Rdb_string_writer m_sk_tails_old; + /* Buffers used for duplicate checking during unique_index_creation */ + uchar *m_dup_sk_packed_tuple; + uchar *m_dup_sk_packed_tuple_old; + /* Temporary space for packing VARCHARs (we provide it to pack_record()/pack_index_tuple() calls). @@ -448,40 +448,37 @@ class ha_rocksdb: public my_core::handler */ int m_dupp_errkey; - int create_key_defs(const TABLE* const table_arg, - Rdb_tbl_def* const tbl_def_arg, - const TABLE* const old_table_arg= nullptr, - const Rdb_tbl_def* const old_tbl_def_arg= nullptr) const - __attribute__((__nonnull__(2, 3), __warn_unused_result__)); - int secondary_index_read(const int keyno, uchar* const buf) - __attribute__((__nonnull__, __warn_unused_result__)); + int create_key_defs(const TABLE *const table_arg, + Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg = nullptr, + const Rdb_tbl_def *const old_tbl_def_arg = nullptr) const + MY_ATTRIBUTE((__nonnull__(2, 3), __warn_unused_result__)); + int secondary_index_read(const int keyno, uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void setup_iterator_for_rnd_scan(); - void setup_scan_iterator(const Rdb_key_def& kd, rocksdb::Slice* const slice) - __attribute__((__nonnull__)) - { + void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *const slice) + MY_ATTRIBUTE((__nonnull__)) { setup_scan_iterator(kd, slice, false, false, 0); } - bool is_ascending(const Rdb_key_def& keydef, + bool is_ascending(const Rdb_key_def &keydef, enum ha_rkey_function find_flag) const - __attribute__((__nonnull__, __warn_unused_result__)); - void setup_scan_iterator(const Rdb_key_def& kd, - rocksdb::Slice *slice, const bool use_all_keys, - const bool is_ascending, const uint eq_cond_len) - __attribute__((__nonnull__)); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice, + const bool use_all_keys, const bool is_ascending, + const uint eq_cond_len) MY_ATTRIBUTE((__nonnull__)); void release_scan_iterator(void); - rocksdb::Status get_for_update(Rdb_transaction* const tx, - rocksdb::ColumnFamilyHandle* const column_family, - const rocksdb::Slice& key, - std::string* const value) const; + rocksdb::Status + get_for_update(Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, std::string *const value) const; - int get_row_by_rowid(uchar* const buf, const char* const rowid, + int get_row_by_rowid(uchar *const buf, const char *const rowid, const uint rowid_size) - __attribute__((__nonnull__, __warn_unused_result__)); - int get_row_by_rowid(uchar* const buf, const uchar* const rowid, + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int get_row_by_rowid(uchar *const buf, const uchar *const rowid, const uint rowid_size) - __attribute__((__nonnull__, __warn_unused_result__)) - { + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)) { return get_row_by_rowid(buf, reinterpret_cast<const char *>(rowid), rowid_size); } @@ -489,22 +486,22 @@ class ha_rocksdb: public my_core::handler void update_auto_incr_val(); void load_auto_incr_value(); longlong update_hidden_pk_val(); - int load_hidden_pk_value() __attribute__((__warn_unused_result__)); - int read_hidden_pk_id_from_rowkey(longlong* const hidden_pk_id) - __attribute__((__nonnull__, __warn_unused_result__)); + int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__)); + int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); bool can_use_single_delete(const uint &index) const - __attribute__((__warn_unused_result__)); - bool skip_unique_check() const __attribute__((__warn_unused_result__)); + MY_ATTRIBUTE((__warn_unused_result__)); + bool skip_unique_check() const MY_ATTRIBUTE((__warn_unused_result__)); void set_force_skip_unique_check(bool skip) override; - bool commit_in_the_middle() __attribute__((__warn_unused_result__)); - bool do_bulk_commit(Rdb_transaction* const tx) - __attribute__((__nonnull__, __warn_unused_result__)); - bool has_hidden_pk(const TABLE* const table) const - __attribute__((__nonnull__, __warn_unused_result__)); + bool commit_in_the_middle() MY_ATTRIBUTE((__warn_unused_result__)); + bool do_bulk_commit(Rdb_transaction *const tx) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + bool has_hidden_pk(const TABLE *const table) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void update_row_stats(const operation_type &type); - void set_last_rowkey(const uchar* const old_data); + void set_last_rowkey(const uchar *const old_data); /* Array of table->s->fields elements telling how to store fields in the @@ -513,15 +510,14 @@ class ha_rocksdb: public my_core::handler Rdb_field_encoder *m_encoder_arr; /* Describes instructions on how to decode the field */ - class READ_FIELD - { - public: + class READ_FIELD { + public: /* Points to Rdb_field_encoder describing the field */ - Rdb_field_encoder* m_field_enc; + Rdb_field_encoder *m_field_enc; /* if true, decode the field, otherwise skip it */ - bool m_decode; + bool m_decode; /* Skip this many bytes before reading (or skipping) this field */ - int m_skip; + int m_skip; }; /* @@ -540,11 +536,12 @@ class ha_rocksdb: public my_core::handler */ uint m_null_bytes_in_rec; - void get_storage_type(Rdb_field_encoder* const encoder, const uint &kp); + void get_storage_type(Rdb_field_encoder *const encoder, const uint &kp); void setup_field_converters(); - int alloc_key_buffers(const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) - __attribute__((__nonnull__, __warn_unused_result__)); + int alloc_key_buffers(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, + bool alloc_alter_buffers = false) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void free_key_buffers(); // the buffer size should be at least 2*Rdb_key_def::INDEX_NUMBER_SIZE @@ -568,7 +565,8 @@ class ha_rocksdb: public my_core::handler public: /* - Controls whether writes include checksums. This is updated from the session variable + Controls whether writes include checksums. This is updated from the session + variable at the start of each query. */ bool m_store_row_debug_checksums; @@ -577,12 +575,11 @@ public: bool m_verify_row_debug_checksums; int m_checksums_pct; - ha_rocksdb(my_core::handlerton* const hton, - my_core::TABLE_SHARE* const table_arg); - ~ha_rocksdb() - { - int err __attribute__((__unused__)); - err= finalize_bulk_load(); + ha_rocksdb(my_core::handlerton *const hton, + my_core::TABLE_SHARE *const table_arg); + ~ha_rocksdb() { + int err MY_ATTRIBUTE((__unused__)); + err = finalize_bulk_load(); DBUG_ASSERT(err == 0); mysql_mutex_destroy(&m_bulk_load_mutex); } @@ -590,28 +587,37 @@ public: /** @brief The name that will be used for display purposes. */ - const char *table_type() const { return rocksdb_hton_name; } + const char *table_type() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(rocksdb_hton_name); + } /* The following is only used by SHOW KEYS: */ - const char *index_type(uint inx) { return "LSMTREE"; } + const char *index_type(uint inx) override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN("LSMTREE"); + } /** @brief The file extensions. */ - const char **bas_ext() const; + const char **bas_ext() const override; /* See if this is the same base table - this should only be true for different partitions of the same table. */ - bool same_table(const ha_rocksdb& other) const; + bool same_table(const ha_rocksdb &other) const; /** @brief This is a list of flags that indicate what functionality the storage engine implements. The current table flags are documented in handler.h */ - ulonglong table_flags() const override - { + ulonglong table_flags() const override { + DBUG_ENTER_FUNC(); + /* HA_BINLOG_STMT_CAPABLE We are saying that this engine is just statement capable to have @@ -621,12 +627,11 @@ public: If we don't set it, filesort crashes, because it assumes rowids are 1..8 byte numbers */ - return HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | - HA_REC_NOT_IN_SEQ | HA_CAN_INDEX_BLOBS | - (m_pk_can_be_decoded? HA_PRIMARY_KEY_IN_READ_INDEX : 0) | - HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | - HA_NULL_IN_KEY | - HA_PARTIAL_COLUMN_READ; + DBUG_RETURN(HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | + HA_REC_NOT_IN_SEQ | HA_CAN_INDEX_BLOBS | + (m_pk_can_be_decoded ? HA_PRIMARY_KEY_IN_READ_INDEX : 0) | + HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | HA_NULL_IN_KEY | + HA_PARTIAL_COLUMN_READ); } bool init_with_fields() override; @@ -641,61 +646,62 @@ public: If all_parts is set, MySQL wants to know the flags for the combined index, up to and including 'part'. */ - ulong index_flags(uint inx, uint part, bool all_parts) const; + ulong index_flags(uint inx, uint part, bool all_parts) const override; - const key_map * keys_to_use_for_scanning() - { - return &key_map_full; + const key_map *keys_to_use_for_scanning() override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(&key_map_full); } - bool primary_key_is_clustered() - { - return true; + bool primary_key_is_clustered() override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(true); } - bool should_store_row_debug_checksums() const - { + bool should_store_row_debug_checksums() const { return m_store_row_debug_checksums && (rand() % 100 < m_checksums_pct); } - int rename_table(const char* const from, const char* const to) - __attribute__((__nonnull__, __warn_unused_result__)); + int rename_table(const char *const from, const char *const to) override + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_record_from_storage_format(const rocksdb::Slice* const key, - const rocksdb::Slice* const value, - uchar* const buf) - __attribute__((__nonnull__, __warn_unused_result__)); + int convert_record_from_storage_format(const rocksdb::Slice *const key, + const rocksdb::Slice *const value, + uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_record_from_storage_format(const rocksdb::Slice* const key, - uchar* const buf) - __attribute__((__nonnull__, __warn_unused_result__)); + int convert_record_from_storage_format(const rocksdb::Slice *const key, + uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - void convert_record_to_storage_format(const rocksdb::Slice& pk_packed_slice, - Rdb_string_writer* const pk_unpack_info, - rocksdb::Slice* const packed_rec) - __attribute__((__nonnull__)); + void convert_record_to_storage_format(const rocksdb::Slice &pk_packed_slice, + Rdb_string_writer *const pk_unpack_info, + rocksdb::Slice *const packed_rec) + MY_ATTRIBUTE((__nonnull__)); - static const char* get_key_name(const uint index, - const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) - __attribute__((__nonnull__, __warn_unused_result__)); + static const char *get_key_name(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - static const char* get_key_comment(const uint index, - const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) - __attribute__((__nonnull__, __warn_unused_result__)); + static const char *get_key_comment(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - static bool is_hidden_pk(const uint index, const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) - __attribute__((__nonnull__, __warn_unused_result__)); + static bool is_hidden_pk(const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - static uint pk_index(const TABLE* const table_arg, - const Rdb_tbl_def* const tbl_def_arg) - __attribute__((__nonnull__, __warn_unused_result__)); + static uint pk_index(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - static bool is_pk(const uint index, const TABLE* table_arg, - const Rdb_tbl_def* tbl_def_arg) - __attribute__((__nonnull__, __warn_unused_result__)); + static bool is_pk(const uint index, const TABLE *table_arg, + const Rdb_tbl_def *tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); /** @brief unireg.cc will call max_supported_record_length(), max_supported_keys(), @@ -704,11 +710,30 @@ public: send. Return *real* limits of your storage engine here; MySQL will do min(your_limits, MySQL_limits) automatically. */ - uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; } + uint max_supported_record_length() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(HA_MAX_REC_LENGTH); + } + + uint max_supported_keys() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(MAX_INDEXES); + } + + uint max_supported_key_parts() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(MAX_REF_PARTS); + } + + uint max_supported_key_part_length() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(2048); + } - uint max_supported_keys() const { return MAX_INDEXES; } - uint max_supported_key_parts() const { return MAX_REF_PARTS; } - uint max_supported_key_part_length() const { return 2048; } /** @brief unireg.cc will call this to make sure that the storage engine can handle the data it is about to send. Return *real* limits of your storage engine @@ -718,222 +743,253 @@ public: There is no need to implement ..._key_... methods if your engine doesn't support indexes. */ - uint max_supported_key_length() const { return 16*1024; /* just to return something*/ } + uint max_supported_key_length() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(16 * 1024); /* just to return something*/ + } /** TODO: return actual upper bound of number of records in the table. (e.g. save number of records seen on full table scan and/or use file size as upper bound) */ - ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; } + ha_rows estimate_rows_upper_bound() override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(HA_POS_ERROR); + } - /* At the moment, we're ok with default handler::index_init() implementation. */ - int index_read_map(uchar* const buf, const uchar* const key, + /* At the moment, we're ok with default handler::index_init() implementation. + */ + int index_read_map(uchar *const buf, const uchar *const key, key_part_map keypart_map, - enum ha_rkey_function find_flag); - __attribute__((__warn_unused_result__)); + enum ha_rkey_function find_flag) override + MY_ATTRIBUTE((__warn_unused_result__)); - int index_read_map_impl(uchar* const buf, const uchar* const key, + int index_read_map_impl(uchar *const buf, const uchar *const key, key_part_map keypart_map, enum ha_rkey_function find_flag, - const key_range* end_key) - __attribute__((__warn_unused_result__));; + const key_range *end_key) + MY_ATTRIBUTE((__warn_unused_result__)); - int index_read_last_map(uchar* const buf, const uchar* const key, - key_part_map keypart_map) - __attribute__((__warn_unused_result__));; + int index_read_last_map(uchar *const buf, const uchar *const key, + key_part_map keypart_map) override + MY_ATTRIBUTE((__warn_unused_result__)); - int read_range_first(const key_range* const start_key, - const key_range* const end_key, - bool eq_range, bool sorted) - __attribute__((__warn_unused_result__));; + int read_range_first(const key_range *const start_key, + const key_range *const end_key, bool eq_range, + bool sorted) override + MY_ATTRIBUTE((__warn_unused_result__)); + + virtual double scan_time() override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN( + static_cast<double>((stats.records + stats.deleted) / 20.0 + 10)); + } - virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; } virtual double read_time(uint, uint, ha_rows rows) override; - int open(const char* const name, int mode, uint test_if_locked) - __attribute__((__warn_unused_result__)); - int close(void) __attribute__((__warn_unused_result__)); - - int write_row(uchar* const buf) __attribute__((__warn_unused_result__)); - int update_row(const uchar* const old_data, uchar* const new_data) - __attribute__((__warn_unused_result__)); - int delete_row(const uchar* const buf) - __attribute__((__warn_unused_result__)); - rocksdb::Status delete_or_singledelete(uint index, - Rdb_transaction* const tx, - rocksdb::ColumnFamilyHandle* const cf, - const rocksdb::Slice& key) - __attribute__((__warn_unused_result__)); - - int index_next(uchar* const buf) __attribute__((__warn_unused_result__)); - int index_next_with_direction(uchar* const buf, bool move_forward) - __attribute__((__warn_unused_result__)); - int index_prev(uchar* const buf) __attribute__((__warn_unused_result__)); - - int index_first(uchar* const buf) __attribute__((__warn_unused_result__)); - int index_last(uchar* const buf) __attribute__((__warn_unused_result__)); - - class Item* idx_cond_push(uint keyno, class Item* const idx_cond); + int open(const char *const name, int mode, uint test_if_locked) override + MY_ATTRIBUTE((__warn_unused_result__)); + int close(void) override MY_ATTRIBUTE((__warn_unused_result__)); + + int write_row(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int update_row(const uchar *const old_data, uchar *const new_data) override + MY_ATTRIBUTE((__warn_unused_result__)); + int delete_row(const uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + rocksdb::Status delete_or_singledelete(uint index, Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const cf, + const rocksdb::Slice &key) + MY_ATTRIBUTE((__warn_unused_result__)); + + int index_next(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int index_next_with_direction(uchar *const buf, bool move_forward) + MY_ATTRIBUTE((__warn_unused_result__)); + int index_prev(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + + int index_first(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int index_last(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + + class Item *idx_cond_push(uint keyno, class Item *const idx_cond) override; /* Default implementation from cancel_pushed_idx_cond() suits us */ private: - struct key_def_cf_info - { - rocksdb::ColumnFamilyHandle* cf_handle; + struct key_def_cf_info { + rocksdb::ColumnFamilyHandle *cf_handle; bool is_reverse_cf; bool is_auto_cf; }; - struct update_row_info - { - Rdb_transaction* tx; - const uchar* new_data; - const uchar* old_data; - rocksdb::Slice new_pk_slice; - rocksdb::Slice old_pk_slice; + struct update_row_info { + Rdb_transaction *tx; + const uchar *new_data; + const uchar *old_data; + rocksdb::Slice new_pk_slice; + rocksdb::Slice old_pk_slice; // "unpack_info" data for the new PK value Rdb_string_writer *new_pk_unpack_info; - longlong hidden_pk_id; - bool skip_unique_check; + longlong hidden_pk_id; + bool skip_unique_check; }; - int create_cfs(const TABLE* const table_arg, Rdb_tbl_def* const tbl_def_arg, - std::array<struct key_def_cf_info, MAX_INDEXES + 1>* const cfs) const; - __attribute__((__nonnull__, __warn_unused_result__)); - - int create_key_def(const TABLE* const table_arg, const uint &i, - const Rdb_tbl_def* const tbl_def_arg, - std::shared_ptr<Rdb_key_def>* const new_key_def, - const struct key_def_cf_info& cf_info) const; - __attribute__((__nonnull__, __warn_unused_result__)); - - int create_inplace_key_defs(const TABLE* const table_arg, - Rdb_tbl_def* vtbl_def_arg, - const TABLE* const old_table_arg, - const Rdb_tbl_def* const old_tbl_def_arg, - const std::array<key_def_cf_info, MAX_INDEXES + 1>& cfs) const; - __attribute__((__nonnull__, __warn_unused_result__)); - - std::unordered_map<std::string, uint> get_old_key_positions( - const TABLE* table_arg, - const Rdb_tbl_def* tbl_def_arg, - const TABLE* old_table_arg, - const Rdb_tbl_def* old_tbl_def_arg) const - __attribute__((__nonnull__)); - - int compare_key_parts(const KEY* const old_key, - const KEY* const new_key) const; - __attribute__((__nonnull__, __warn_unused_result__)); + /* + Used to check for duplicate entries during fast unique secondary index + creation. + */ + struct unique_sk_buf_info { + bool sk_buf_switch = false; + rocksdb::Slice sk_memcmp_key; + rocksdb::Slice sk_memcmp_key_old; + uchar *dup_sk_buf; + uchar *dup_sk_buf_old; + + /* + This method is meant to be called back to back during inplace creation + of unique indexes. It will switch between two buffers, which + will each store the memcmp form of secondary keys, which are then + converted to slices in sk_memcmp_key or sk_memcmp_key_old. + + Switching buffers on each iteration allows us to retain the + sk_memcmp_key_old value for duplicate comparison. + */ + inline uchar *swap_and_get_sk_buf() { + sk_buf_switch = !sk_buf_switch; + return sk_buf_switch ? dup_sk_buf : dup_sk_buf_old; + } + }; + + int create_cfs(const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + std::array<struct key_def_cf_info, MAX_INDEXES + 1> *const cfs) + const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int create_key_def(const TABLE *const table_arg, const uint &i, + const Rdb_tbl_def *const tbl_def_arg, + std::shared_ptr<Rdb_key_def> *const new_key_def, + const struct key_def_cf_info &cf_info) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int create_inplace_key_defs( + const TABLE *const table_arg, Rdb_tbl_def *vtbl_def_arg, + const TABLE *const old_table_arg, + const Rdb_tbl_def *const old_tbl_def_arg, + const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + std::unordered_map<std::string, uint> + get_old_key_positions(const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg, + const TABLE *old_table_arg, + const Rdb_tbl_def *old_tbl_def_arg) const + MY_ATTRIBUTE((__nonnull__)); + + int compare_key_parts(const KEY *const old_key, + const KEY *const new_key) const; + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int index_first_intern(uchar *buf) - __attribute__((__nonnull__, __warn_unused_result__)); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int index_last_intern(uchar *buf) - __attribute__((__nonnull__, __warn_unused_result__)); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); enum icp_result check_index_cond() const; - int find_icp_matching_index_rec(const bool &move_forward, uchar* const buf) - __attribute__((__nonnull__, __warn_unused_result__)); + int find_icp_matching_index_rec(const bool &move_forward, uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void calc_updated_indexes(); - int update_write_row(const uchar* const old_data, const uchar* const new_data, + int update_write_row(const uchar *const old_data, const uchar *const new_data, const bool skip_unique_check) - __attribute__((__warn_unused_result__)); - int get_pk_for_update(struct update_row_info* const row_info); + MY_ATTRIBUTE((__warn_unused_result__)); + int get_pk_for_update(struct update_row_info *const row_info); int check_and_lock_unique_pk(const uint &key_id, - const struct update_row_info& row_info, - bool* const found, bool* const pk_changed) - __attribute__((__warn_unused_result__)); + const struct update_row_info &row_info, + bool *const found, bool *const pk_changed) + MY_ATTRIBUTE((__warn_unused_result__)); int check_and_lock_sk(const uint &key_id, - const struct update_row_info& row_info, - bool* const found) const - __attribute__((__warn_unused_result__)); - int check_uniqueness_and_lock(const struct update_row_info& row_info, - bool* const pk_changed) - __attribute__((__warn_unused_result__)); - bool over_bulk_load_threshold(int* err) - __attribute__((__warn_unused_result__)); - int bulk_load_key(Rdb_transaction* const tx, - const Rdb_key_def& kd, - const rocksdb::Slice& key, - const rocksdb::Slice& value) - __attribute__((__nonnull__, __warn_unused_result__)); - int update_pk(const Rdb_key_def& kd, - const struct update_row_info& row_info, - const bool &pk_changed) - __attribute__((__warn_unused_result__)); - int update_sk(const TABLE* const table_arg, - const Rdb_key_def& kd, - const struct update_row_info& row_info) - __attribute__((__warn_unused_result__)); - int update_indexes(const struct update_row_info& row_info, + const struct update_row_info &row_info, + bool *const found) const + MY_ATTRIBUTE((__warn_unused_result__)); + int check_uniqueness_and_lock(const struct update_row_info &row_info, + bool *const pk_changed) + MY_ATTRIBUTE((__warn_unused_result__)); + bool over_bulk_load_threshold(int *err) + MY_ATTRIBUTE((__warn_unused_result__)); + int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &index, + const rocksdb::Slice *key, + struct unique_sk_buf_info *sk_info) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, + const rocksdb::Slice &key, const rocksdb::Slice &value) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info, + const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); + int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, + const struct update_row_info &row_info) + MY_ATTRIBUTE((__warn_unused_result__)); + int update_indexes(const struct update_row_info &row_info, const bool &pk_changed) - __attribute__((__warn_unused_result__)); - - int read_key_exact(const Rdb_key_def& kd, - rocksdb::Iterator* const iter, const bool &using_full_key, - const rocksdb::Slice& key_slice) const - __attribute__((__nonnull__, __warn_unused_result__)); - int read_before_key(const Rdb_key_def& kd, - const bool &using_full_key, - const rocksdb::Slice& key_slice) - __attribute__((__nonnull__, __warn_unused_result__)); - int read_after_key(const Rdb_key_def& kd, + MY_ATTRIBUTE((__warn_unused_result__)); + + int read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter, const bool &using_full_key, - const rocksdb::Slice& key_slice) - __attribute__((__nonnull__, __warn_unused_result__)); - - int position_to_correct_key(const Rdb_key_def& kd, - const enum ha_rkey_function &find_flag, - const bool &full_key_match, - const uchar* const key, - const key_part_map &keypart_map, - const rocksdb::Slice& key_slice, - bool* const move_forward) - __attribute__((__warn_unused_result__)); - - int read_row_from_primary_key(uchar* const buf) - __attribute__((__nonnull__, __warn_unused_result__)); - int read_row_from_secondary_key(uchar* const buf, - const Rdb_key_def& kd, + const rocksdb::Slice &key_slice) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int read_before_key(const Rdb_key_def &kd, const bool &using_full_key, + const rocksdb::Slice &key_slice) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int read_after_key(const Rdb_key_def &kd, const bool &using_full_key, + const rocksdb::Slice &key_slice) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int position_to_correct_key( + const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, + const bool &full_key_match, const uchar *const key, + const key_part_map &keypart_map, const rocksdb::Slice &key_slice, + bool *const move_forward) MY_ATTRIBUTE((__warn_unused_result__)); + + int read_row_from_primary_key(uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int read_row_from_secondary_key(uchar *const buf, const Rdb_key_def &kd, bool move_forward) - __attribute__((__nonnull__, __warn_unused_result__)); - int try_keyonly_read_from_sk(uchar* buf, - const Rdb_key_def& kd, - const rocksdb::Slice& key, - const rocksdb::Slice& value, - uint rowid_size) - __attribute__((__nonnull__, __warn_unused_result__)); - - int calc_eq_cond_len(const Rdb_key_def& kd, + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int calc_eq_cond_len(const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const rocksdb::Slice& slice, + const rocksdb::Slice &slice, const int &bytes_changed_by_succ, - const key_range* const end_key, - uint* const end_key_packed_size) - __attribute__((__warn_unused_result__)); + const key_range *const end_key, + uint *const end_key_packed_size) + MY_ATTRIBUTE((__warn_unused_result__)); - Rdb_tbl_def* get_table_if_exists(const char* const tablename) - __attribute__((__nonnull__, __warn_unused_result__)); - void read_thd_vars(THD* const thd) - __attribute__((__nonnull__)); - const char* thd_rocksdb_tmpdir() - __attribute__((__nonnull__, __warn_unused_result__)); + Rdb_tbl_def *get_table_if_exists(const char *const tablename) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void read_thd_vars(THD *const thd) MY_ATTRIBUTE((__nonnull__)); + const char *thd_rocksdb_tmpdir() + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - bool contains_foreign_key(THD* const thd) - __attribute__((__nonnull__, __warn_unused_result__)); + bool contains_foreign_key(THD *const thd) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int inplace_populate_sk(const TABLE* const table_arg, - const std::unordered_set<std::shared_ptr<Rdb_key_def>>& indexes); + int inplace_populate_sk( + TABLE *const table_arg, + const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); public: - int index_init(uint idx, bool sorted) __attribute__((__warn_unused_result__)); - int index_end() __attribute__((__warn_unused_result__)); + int index_init(uint idx, bool sorted) override + MY_ATTRIBUTE((__warn_unused_result__)); + int index_end() override MY_ATTRIBUTE((__warn_unused_result__)); - void unlock_row(); + void unlock_row() override; /** @brief Unlike index_init(), rnd_init() can be called two consecutive times @@ -943,107 +999,114 @@ public: cursor to the start of the table; no need to deallocate and allocate it again. This is a required method. */ - int rnd_init(bool scan) __attribute__((__warn_unused_result__)); - int rnd_end() __attribute__((__warn_unused_result__)); + int rnd_init(bool scan) override MY_ATTRIBUTE((__warn_unused_result__)); + int rnd_end() override MY_ATTRIBUTE((__warn_unused_result__)); - int rnd_next(uchar* const buf) __attribute__((__warn_unused_result__)); - int rnd_next_with_direction(uchar* const buf, bool move_forward) - __attribute__((__warn_unused_result__)); + int rnd_next(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int rnd_next_with_direction(uchar *const buf, bool move_forward) + MY_ATTRIBUTE((__warn_unused_result__)); - int rnd_pos(uchar* const buf, uchar* const pos) - __attribute__((__warn_unused_result__)); - void position(const uchar* const record); + int rnd_pos(uchar *const buf, uchar *const pos) override + MY_ATTRIBUTE((__warn_unused_result__)); + void position(const uchar *const record) override; int info(uint) override; /* This function will always return success, therefore no annotation related * to checking the return value. Can't change the signature because it's * required by the interface. */ - int extra(enum ha_extra_function operation); + int extra(enum ha_extra_function operation) override; - int start_stmt(THD* const thd, thr_lock_type lock_type) - __attribute__((__warn_unused_result__)); - int external_lock(THD* const thd, int lock_type) - __attribute__((__warn_unused_result__)); - int delete_all_rows(ha_rows* const nrows) __attribute__((__warn_unused_result__)); - int truncate() __attribute__((__warn_unused_result__)); + int start_stmt(THD *const thd, thr_lock_type lock_type) override + MY_ATTRIBUTE((__warn_unused_result__)); + int external_lock(THD *const thd, int lock_type) override + MY_ATTRIBUTE((__warn_unused_result__)); + int truncate() override MY_ATTRIBUTE((__warn_unused_result__)); + + int reset() override { + DBUG_ENTER_FUNC(); - int reset() override - { /* Free blob data */ m_retrieved_record.clear(); - return 0; + + DBUG_RETURN(HA_EXIT_SUCCESS); } - int check(THD* const thd, HA_CHECK_OPT* const check_opt) - __attribute__((__warn_unused_result__)); - void remove_rows(Rdb_tbl_def* const tbl); - ha_rows records_in_range(uint inx, key_range* const min_key, - key_range* const max_key) - __attribute__((__warn_unused_result__)); - int delete_table(const char* const from) __attribute__((__warn_unused_result__)); - int create(const char* const name, TABLE* const form, - HA_CREATE_INFO* const create_info) - __attribute__((__warn_unused_result__)); - bool check_if_incompatible_data(HA_CREATE_INFO* const info, - uint table_changes) - __attribute__((__warn_unused_result__)); - - THR_LOCK_DATA **store_lock(THD* const thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type) - __attribute__((__warn_unused_result__)); - - my_bool register_query_cache_table(THD* const thd, char* const table_key, + int check(THD *const thd, HA_CHECK_OPT *const check_opt) override + MY_ATTRIBUTE((__warn_unused_result__)); + void remove_rows(Rdb_tbl_def *const tbl); + ha_rows records_in_range(uint inx, key_range *const min_key, + key_range *const max_key) override + MY_ATTRIBUTE((__warn_unused_result__)); + int delete_table(const char *const from) override + MY_ATTRIBUTE((__warn_unused_result__)); + int create(const char *const name, TABLE *const form, + HA_CREATE_INFO *const create_info) override + MY_ATTRIBUTE((__warn_unused_result__)); + bool check_if_incompatible_data(HA_CREATE_INFO *const info, + uint table_changes) override + MY_ATTRIBUTE((__warn_unused_result__)); + + THR_LOCK_DATA **store_lock(THD *const thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type) override + MY_ATTRIBUTE((__warn_unused_result__)); + + my_bool register_query_cache_table(THD *const thd, char *const table_key, uint key_length, - qc_engine_callback* const engine_callback, - ulonglong* const engine_data) - { + qc_engine_callback *const engine_callback, + ulonglong *const engine_data) override { + DBUG_ENTER_FUNC(); + /* Currently, we don't support query cache */ - return FALSE; + DBUG_RETURN(FALSE); } - bool get_error_message(const int error, String* const buf) - __attribute__((__nonnull__)); + bool get_error_message(const int error, String *const buf) override + MY_ATTRIBUTE((__nonnull__)); void get_auto_increment(ulonglong offset, ulonglong increment, ulonglong nb_desired_values, - ulonglong* const first_value, - ulonglong* const nb_reserved_values); - void update_create_info(HA_CREATE_INFO* const create_info); - int optimize(THD* const thd, HA_CHECK_OPT* const check_opt) - __attribute__((__warn_unused_result__)); - int analyze(THD* const thd, HA_CHECK_OPT* const check_opt) - __attribute__((__warn_unused_result__)); - int calculate_stats(const TABLE* const table_arg, THD* const thd, - HA_CHECK_OPT* const check_opt) - __attribute__((__warn_unused_result__)); + ulonglong *const first_value, + ulonglong *const nb_reserved_values) override; + void update_create_info(HA_CREATE_INFO *const create_info) override; + int optimize(THD *const thd, HA_CHECK_OPT *const check_opt) override + MY_ATTRIBUTE((__warn_unused_result__)); + int analyze(THD *const thd, HA_CHECK_OPT *const check_opt) override + MY_ATTRIBUTE((__warn_unused_result__)); + int calculate_stats(const TABLE *const table_arg, THD *const thd, + HA_CHECK_OPT *const check_opt) + MY_ATTRIBUTE((__warn_unused_result__)); enum_alter_inplace_result check_if_supported_inplace_alter( - TABLE *altered_table, - my_core::Alter_inplace_info* const ha_alter_info) override; + TABLE *altered_table, + my_core::Alter_inplace_info *const ha_alter_info) override; - bool prepare_inplace_alter_table(TABLE* const altered_table, - my_core::Alter_inplace_info* const ha_alter_info); + bool prepare_inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) override; - bool inplace_alter_table(TABLE* const altered_table, - my_core::Alter_inplace_info* const ha_alter_info); + bool inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) override; - bool commit_inplace_alter_table(TABLE* const altered_table, - my_core::Alter_inplace_info* const ha_alter_info, - bool commit); + bool + commit_inplace_alter_table(TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info, + bool commit) override; - int finalize_bulk_load() __attribute__((__warn_unused_result__)); + int finalize_bulk_load() MY_ATTRIBUTE((__warn_unused_result__)); - void set_use_read_free_rpl(const char* const whitelist); - void set_skip_unique_check_tables(const char* const whitelist); + void set_use_read_free_rpl(const char *const whitelist); + void set_skip_unique_check_tables(const char *const whitelist); - public: +public: virtual void rpl_before_delete_rows() override; virtual void rpl_after_delete_rows() override; virtual void rpl_before_update_rows() override; virtual void rpl_after_update_rows() override; virtual bool use_read_free_rpl(); - private: +private: /* Flags tracking if we are inside different replication operation */ bool m_in_rpl_delete_rows; bool m_in_rpl_update_rows; @@ -1055,16 +1118,15 @@ public: Helper class for in-place alter, for storing handler context between inplace alter calls */ -struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx -{ +struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx { /* The new table definition */ - Rdb_tbl_def* const m_new_tdef; + Rdb_tbl_def *const m_new_tdef; /* Stores the original key definitions */ - std::shared_ptr<Rdb_key_def>* const m_old_key_descr; + std::shared_ptr<Rdb_key_def> *const m_old_key_descr; /* Stores the new key definitions */ - std::shared_ptr<Rdb_key_def>* m_new_key_descr; + std::shared_ptr<Rdb_key_def> *m_new_key_descr; /* Stores the old number of key definitions */ const uint m_old_n_keys; @@ -1085,28 +1147,24 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx const uint m_n_dropped_keys; Rdb_inplace_alter_ctx( - Rdb_tbl_def* new_tdef, std::shared_ptr<Rdb_key_def>* old_key_descr, - std::shared_ptr<Rdb_key_def>* new_key_descr, uint old_n_keys, + Rdb_tbl_def *new_tdef, std::shared_ptr<Rdb_key_def> *old_key_descr, + std::shared_ptr<Rdb_key_def> *new_key_descr, uint old_n_keys, uint new_n_keys, std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes, - std::unordered_set<GL_INDEX_ID> dropped_index_ids, - uint n_added_keys, uint n_dropped_keys) : - my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef), - m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr), - m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys), - m_added_indexes(added_indexes), - m_dropped_index_ids(dropped_index_ids), - m_n_added_keys(n_added_keys), - m_n_dropped_keys(n_dropped_keys) - { - } + std::unordered_set<GL_INDEX_ID> dropped_index_ids, uint n_added_keys, + uint n_dropped_keys) + : my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef), + m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr), + m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys), + m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids), + m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys) {} - ~Rdb_inplace_alter_ctx(){} + ~Rdb_inplace_alter_ctx() {} - private: +private: /* Disable Copying */ - Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx&); - Rdb_inplace_alter_ctx& operator=(const Rdb_inplace_alter_ctx&); + Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &); + Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &); }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h index be7b5518423..b30585f5d37 100644 --- a/storage/rocksdb/ha_rocksdb_proto.h +++ b/storage/rocksdb/ha_rocksdb_proto.h @@ -32,27 +32,31 @@ enum RDB_IO_ERROR_TYPE { RDB_IO_ERROR_TX_COMMIT, RDB_IO_ERROR_DICT_COMMIT, RDB_IO_ERROR_BG_THREAD, - RDB_IO_ERROR_GENERAL + RDB_IO_ERROR_GENERAL, + RDB_IO_ERROR_LAST }; -void rdb_handle_io_error(rocksdb::Status status, RDB_IO_ERROR_TYPE err_type); +const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type); -int rdb_normalize_tablename(const std::string& tablename, std::string* str) - __attribute__((__nonnull__, __warn_unused_result__)); +void rdb_handle_io_error(const rocksdb::Status status, + const RDB_IO_ERROR_TYPE err_type); -int rdb_split_normalized_tablename(const std::string& fullname, std::string *db, +int rdb_normalize_tablename(const std::string &tablename, std::string *str) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + +int rdb_split_normalized_tablename(const std::string &fullname, std::string *db, std::string *table = nullptr, std::string *partition = nullptr) - __attribute__((__warn_unused_result__)); + MY_ATTRIBUTE((__warn_unused_result__)); std::vector<std::string> rdb_get_open_table_names(void); int rdb_get_table_perf_counters(const char *tablename, Rdb_perf_counters *counters) - __attribute__((__nonnull__(2))); + MY_ATTRIBUTE((__nonnull__(2))); void rdb_get_global_perf_counters(Rdb_perf_counters *counters) - __attribute__((__nonnull__(1))); + MY_ATTRIBUTE((__nonnull__(1))); void rdb_queue_save_stats_request(); @@ -63,20 +67,20 @@ void rdb_queue_save_stats_request(); rocksdb::TransactionDB *rdb_get_rocksdb_db(); class Rdb_cf_manager; -Rdb_cf_manager& rdb_get_cf_manager(); +Rdb_cf_manager &rdb_get_cf_manager(); -rocksdb::BlockBasedTableOptions& rdb_get_table_options(); +rocksdb::BlockBasedTableOptions &rdb_get_table_options(); class Rdb_dict_manager; Rdb_dict_manager *rdb_get_dict_manager(void) - __attribute__((__warn_unused_result__)); + MY_ATTRIBUTE((__warn_unused_result__)); class Rdb_ddl_manager; Rdb_ddl_manager *rdb_get_ddl_manager(void) - __attribute__((__warn_unused_result__)); + MY_ATTRIBUTE((__warn_unused_result__)); class Rdb_binlog_manager; Rdb_binlog_manager *rdb_get_binlog_manager(void) - __attribute__((__warn_unused_result__)); + MY_ATTRIBUTE((__warn_unused_result__)); -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/logger.h b/storage/rocksdb/logger.h index dd6dd138e56..f639f807549 100644 --- a/storage/rocksdb/logger.h +++ b/storage/rocksdb/logger.h @@ -21,13 +21,10 @@ namespace myrocks { -class Rdb_logger : public rocksdb::Logger -{ - public: - void Logv(const rocksdb::InfoLogLevel log_level, - const char* format, - va_list ap) override - { +class Rdb_logger : public rocksdb::Logger { +public: + void Logv(const rocksdb::InfoLogLevel log_level, const char *format, + va_list ap) override { DBUG_ASSERT(format != nullptr); enum loglevel mysql_log_level; @@ -41,11 +38,11 @@ class Rdb_logger : public rocksdb::Logger } if (log_level >= rocksdb::InfoLogLevel::ERROR_LEVEL) { - mysql_log_level= ERROR_LEVEL; + mysql_log_level = ERROR_LEVEL; } else if (log_level >= rocksdb::InfoLogLevel::WARN_LEVEL) { - mysql_log_level= WARNING_LEVEL; + mysql_log_level = WARNING_LEVEL; } else { - mysql_log_level= INFORMATION_LEVEL; + mysql_log_level = INFORMATION_LEVEL; } // log to MySQL @@ -54,20 +51,18 @@ class Rdb_logger : public rocksdb::Logger error_log_print(mysql_log_level, f.c_str(), ap); } - void Logv(const char* format, va_list ap) override - { + void Logv(const char *format, va_list ap) override { DBUG_ASSERT(format != nullptr); // If no level is specified, it is by default at information level Logv(rocksdb::InfoLogLevel::INFO_LEVEL, format, ap); } - void SetRocksDBLogger(const std::shared_ptr<rocksdb::Logger> logger) - { + void SetRocksDBLogger(const std::shared_ptr<rocksdb::Logger> logger) { m_logger = logger; } - private: +private: std::shared_ptr<rocksdb::Logger> m_logger; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result index bfa06f88011..97238282ebe 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result @@ -4,20 +4,20 @@ CREATE DATABASE mysqlslap; USE mysqlslap; CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb; # 2PC enabled, MyRocks durability enabled -SET GLOBAL rocksdb_disable_2pc=0; +SET GLOBAL rocksdb_enable_2pc=0; SET GLOBAL rocksdb_write_sync=1; ## 2PC + durability + single thread select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; select case when variable_value-@c = 1000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; case when variable_value-@c = 1000 then 'true' else 'false' end -true +false ## 2PC + durability + group commit select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end -true +false # 2PC enabled, MyRocks durability disabled -SET GLOBAL rocksdb_disable_2pc=0; +SET GLOBAL rocksdb_enable_2pc=0; SET GLOBAL rocksdb_write_sync=0; select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; @@ -28,17 +28,17 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa case when variable_value-@c = 0 then 'true' else 'false' end true # 2PC disabled, MyRocks durability enabled -SET GLOBAL rocksdb_disable_2pc=1; +SET GLOBAL rocksdb_enable_2pc=1; SET GLOBAL rocksdb_write_sync=1; select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; case when variable_value-@c = 0 then 'true' else 'false' end -true +false select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; case when variable_value-@c = 0 then 'true' else 'false' end -true -SET GLOBAL rocksdb_disable_2pc=1; +false +SET GLOBAL rocksdb_enable_2pc=1; SET GLOBAL rocksdb_write_sync=0; DROP TABLE t1; DROP DATABASE mysqlslap; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result index 987b34948e8..05455e76e5b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result @@ -60,11 +60,8 @@ CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB # crash_during_index_creation_partition flush logs; SET SESSION debug="+d,myrocks_simulate_index_create_rollback"; -# expected assertion failure from sql layer here for alter rollback -call mtr.add_suppression("Assertion `0' failed."); -call mtr.add_suppression("Attempting backtrace. You can use the following information to find out"); ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; -ERROR HY000: Lost connection to MySQL server during query +ERROR HY000: Intentional failure in inplace alter occurred. SET SESSION debug="-d,myrocks_simulate_index_create_rollback"; SHOW CREATE TABLE t1; Table Create Table diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result new file mode 100644 index 00000000000..dbd22a9f1f4 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result @@ -0,0 +1,89 @@ +drop table if exists t1; +CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +INSERT INTO t1 (a,b) VALUES (4,5); +ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE; +ERROR 23000: Duplicate entry '5' for key 'kb' +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL DEFAULT '0', + `b` int(11) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE; +INSERT INTO t1 (a,b) VALUES (4,5); +ERROR 23000: Duplicate entry '5' for key 'kb' +INSERT INTO t1 (a,b) VALUES (5,8); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL DEFAULT '0', + `b` int(11) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `kb` (`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +SELECT * FROM t1 FORCE INDEX(kb); +a b +1 5 +2 6 +3 7 +5 8 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, NULL); +INSERT INTO t1 (a, b) VALUES (3, NULL); +ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE; +INSERT INTO t1 (a, b) VALUES (4, NULL); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL DEFAULT '0', + `b` int(11) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `kb` (`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +SELECT COUNT(*) FROM t1 FORCE INDEX(kb); +COUNT(*) +4 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a,b,c) VALUES (1,1,NULL); +INSERT INTO t1 (a,b,c) VALUES (2,1,NULL); +INSERT INTO t1 (a,b,c) VALUES (3,1,NULL); +INSERT INTO t1 (a,b,c) VALUES (4,1,5); +ALTER TABLE t1 ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL DEFAULT '0', + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `kbc` (`b`,`c`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +SELECT COUNT(*) FROM t1 FORCE INDEX(kbc); +COUNT(*) +4 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD UNIQUE INDEX kb(b); +ERROR HY000: Unique index support is disabled when the table has no primary key. +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result b/storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result index c3e54a25864..2c7d37c053f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result @@ -10,19 +10,19 @@ SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP WHERE INDEX_NUMBER = (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME = 't1' AND INDEX_NAME = "PRIMARY"); -COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS -# # SSTNAME 5 # # # # # +COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX +# # SSTNAME 5 # # # # # 5 SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP WHERE INDEX_NUMBER = (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME = 't1' AND INDEX_NAME = "j"); -COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS -# # SSTNAME 5 # # # # # +COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX +# # SSTNAME 5 # # # # # 5,5 SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP WHERE INDEX_NUMBER = (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME = 't2' AND INDEX_NAME = "PRIMARY"); -COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS -# # SSTNAME 4 # # # # # +COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX +# # SSTNAME 4 # # # # # 4 DROP TABLE t1; DROP TABLE t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result index d6177a3f019..f55662183ca 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result @@ -9,6 +9,7 @@ CF_FLAGS 1 __system__ [0] select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; count(*) 3 +select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; CREATE TABLE t1 (i1 INT, i2 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB; INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3); select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; @@ -22,6 +23,11 @@ CF_FLAGS 1 __system__ [0] select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; count(*) 6 +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf='default'; +select case when VALUE-@keysIn >= 3 then 'true' else 'false' end from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; +case when VALUE-@keysIn >= 3 then 'true' else 'false' end +true CREATE INDEX tindex1 on t1 (i1); CREATE INDEX tindex2 on t1 (i2); select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where TYPE = 'CF_FLAGS'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/loaddata.result b/storage/rocksdb/mysql-test/rocksdb/r/loaddata.result index 5f6df197c94..a9f9c0b49e8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/loaddata.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/loaddata.result @@ -121,7 +121,7 @@ a b 5 loaded 7 test DROP TABLE t1; -set session rocksdb_skip_unique_check=1; +set session unique_checks=0; DROP TABLE IF EXISTS t1; CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb; LOAD DATA INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result b/storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result new file mode 100644 index 00000000000..bc5739c2d96 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (a int primary key) ENGINE=ROCKSDB; +insert into t1 values (1); +set global rocksdb_force_flush_memtable_now=1; +select * from t1 where a = 1; +a +1 +select * from t1 where a = 1; +a +1 +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result index b6a17d90221..9fb28791834 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result @@ -890,8 +890,8 @@ rocksdb_deadlock_detect OFF rocksdb_debug_optimizer_no_zero_cardinality ON rocksdb_default_cf_options rocksdb_delete_obsolete_files_period_micros 21600000000 -rocksdb_disable_2pc ON rocksdb_disabledatasync OFF +rocksdb_enable_2pc ON rocksdb_enable_bulk_load_api ON rocksdb_enable_thread_tracking OFF rocksdb_enable_write_thread_adaptive_yield OFF @@ -924,17 +924,17 @@ rocksdb_override_cf_options rocksdb_paranoid_checks ON rocksdb_pause_background_work ON rocksdb_perf_context_level 0 +rocksdb_persistent_cache_path +rocksdb_persistent_cache_size 0 rocksdb_pin_l0_filter_and_index_blocks_in_cache ON rocksdb_print_snapshot_conflict_queries OFF rocksdb_rate_limiter_bytes_per_sec 0 rocksdb_read_free_rpl_tables rocksdb_records_in_range 50 -rocksdb_rpl_skip_tx_api OFF rocksdb_seconds_between_stat_computes 3600 rocksdb_signal_drop_index_thread OFF rocksdb_skip_bloom_filter_on_read OFF rocksdb_skip_fill_cache OFF -rocksdb_skip_unique_check OFF rocksdb_skip_unique_check_tables .* rocksdb_stats_dump_period_sec 600 rocksdb_store_row_debug_checksums OFF @@ -2231,7 +2231,7 @@ DROP DATABASE test_db; # Issue #143: Split rocksdb_bulk_load option into two # CREATE TABLE t1 (id int primary key, value int) engine=RocksDB; -SET rocksdb_skip_unique_check=1; +SET unique_checks=0; INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 VALUES(1, 2); INSERT INTO t1 VALUES(1, 3); @@ -2243,7 +2243,7 @@ INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1; ERROR HY000: When unique checking is disabled in MyRocks, INSERT,UPDATE,LOAD statements with clauses that update or replace the key (i.e. INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not allowed. Query: INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1 TRUNCATE TABLE t1; SET @save_rocksdb_bulk_load_size= @@rocksdb_bulk_load_size; -SET rocksdb_skip_unique_check=0; +SET unique_checks=1; SET rocksdb_commit_in_the_middle=1; SET rocksdb_bulk_load_size=10; BEGIN; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result b/storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result index 1e0c7a5adbf..35147ac7a15 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result @@ -6,9 +6,9 @@ Note #### Storing MySQL user name or password information in the master info rep DROP TABLE IF EXISTS t1; include/stop_slave.inc create table t1 (a int) engine=rocksdb; -show variables like 'rocksdb_rpl_skip_tx_api'; +show variables like 'rpl_skip_tx_api'; Variable_name Value -rocksdb_rpl_skip_tx_api ON +rpl_skip_tx_api ON include/start_slave.inc found DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result b/storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result index 59ad709a595..64db56ca78e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result @@ -183,3 +183,24 @@ ERROR 23000: Duplicate entry '1-1' for key 'PRIMARY' INSERT INTO t2 VALUES (2,1); ERROR 23000: Duplicate entry '1' for key 'a' DROP TABLE t2; +# +# Issue #491 (https://github.com/facebook/mysql-5.6/issues/491) +# +CREATE TABLE t (a BLOB, PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb; +INSERT INTO t VALUES('a'); +CHECK TABLE t EXTENDED; +Table Op Msg_type Msg_text +test.t check status OK +DROP TABLE t; +CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a), UNIQUE KEY (a(1))) engine=rocksdb; +INSERT INTO t VALUES('a'); +CHECK TABLE t EXTENDED; +Table Op Msg_type Msg_text +test.t check status OK +DROP TABLE t; +CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb; +INSERT INTO t VALUES('a'); +CHECK TABLE t EXTENDED; +Table Op Msg_type Msg_text +test.t check status OK +DROP TABLE t; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test index c806e46aa4d..90af6617794 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test @@ -13,7 +13,7 @@ USE mysqlslap; CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb; --echo # 2PC enabled, MyRocks durability enabled -SET GLOBAL rocksdb_disable_2pc=0; +SET GLOBAL rocksdb_enable_2pc=0; SET GLOBAL rocksdb_write_sync=1; --echo ## 2PC + durability + single thread @@ -28,7 +28,7 @@ select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' --echo # 2PC enabled, MyRocks durability disabled -SET GLOBAL rocksdb_disable_2pc=0; +SET GLOBAL rocksdb_enable_2pc=0; SET GLOBAL rocksdb_write_sync=0; select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; @@ -41,7 +41,7 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa --echo # 2PC disabled, MyRocks durability enabled -SET GLOBAL rocksdb_disable_2pc=1; +SET GLOBAL rocksdb_enable_2pc=1; SET GLOBAL rocksdb_write_sync=1; select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs'; @@ -58,7 +58,7 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa -SET GLOBAL rocksdb_disable_2pc=1; +SET GLOBAL rocksdb_enable_2pc=1; SET GLOBAL rocksdb_write_sync=0; DROP TABLE t1; DROP DATABASE mysqlslap; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test index ca9122bccd7..11134f16201 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test @@ -89,22 +89,11 @@ while ($i <= $max) { --echo # crash_during_index_creation_partition flush logs; ---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect SET SESSION debug="+d,myrocks_simulate_index_create_rollback"; ---echo # expected assertion failure from sql layer here for alter rollback -call mtr.add_suppression("Assertion `0' failed."); -call mtr.add_suppression("Attempting backtrace. You can use the following information to find out"); - ---error 2013 - +--error 1105 ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; - ---enable_reconnect ---source include/wait_until_connected_again.inc - SET SESSION debug="-d,myrocks_simulate_index_create_rollback"; - SHOW CREATE TABLE t1; # here, the index numbers should be higher because previously 4 index numbers diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test new file mode 100644 index 00000000000..375a63c3a38 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test @@ -0,0 +1,82 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc + +--disable_warnings +drop table if exists t1; +--enable_warnings + +# test adding duplicate value before unique index +CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); + +INSERT INTO t1 (a,b) VALUES (4,5); + +# should cause error here, duplicate value on b +--error 1062 +ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE; + +SHOW CREATE TABLE t1; +DROP TABLE t1; + +# test dup value AFTER unique index +CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE; + +# should error here, duplicate value on b +--error 1062 +INSERT INTO t1 (a,b) VALUES (4,5); + +# should succeed +INSERT INTO t1 (a,b) VALUES (5,8); + +SHOW CREATE TABLE t1; +SELECT * FROM t1 FORCE INDEX(kb); +DROP TABLE t1; + +# test what happens when duplicate nulls exist +CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, NULL); +INSERT INTO t1 (a, b) VALUES (3, NULL); + +# should pass, because in MySQL we allow multiple NULLS in unique key +ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE; +INSERT INTO t1 (a, b) VALUES (4, NULL); + +SHOW CREATE TABLE t1; +SELECT COUNT(*) FROM t1 FORCE INDEX(kb); +DROP TABLE t1; + +## test case with multi-part key with nulls +CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY ka(a)) ENGINE=RocksDB; +INSERT INTO t1 (a,b,c) VALUES (1,1,NULL); +INSERT INTO t1 (a,b,c) VALUES (2,1,NULL); +INSERT INTO t1 (a,b,c) VALUES (3,1,NULL); +INSERT INTO t1 (a,b,c) VALUES (4,1,5); + +# should pass +ALTER TABLE t1 ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE; + +SHOW CREATE TABLE t1; +SELECT COUNT(*) FROM t1 FORCE INDEX(kbc); +DROP TABLE t1; + +## test case with table w/ no primary key, and we try to add unique key +CREATE TABLE t1 (a INT, b INT) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); + +# should fail, can't add unique index on table w/ no pk +--error 1105 +ALTER TABLE t1 ADD UNIQUE INDEX kb(b); + +SHOW CREATE TABLE t1; +DROP TABLE t1; + + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test index 39bae56bea6..c20ab17ff6c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test @@ -12,6 +12,8 @@ DROP TABLE IF EXISTS t3; select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; + CREATE TABLE t1 (i1 INT, i2 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB; INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3); @@ -21,6 +23,10 @@ INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3); select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +set global rocksdb_force_flush_memtable_now = true; +set global rocksdb_compact_cf='default'; +select case when VALUE-@keysIn >= 3 then 'true' else 'false' end from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; + CREATE INDEX tindex1 on t1 (i1); --let $start_max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1) diff --git a/storage/rocksdb/mysql-test/rocksdb/t/loaddata.test b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.test index 837fa746ed7..1f59d5ce204 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/loaddata.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.test @@ -3,6 +3,5 @@ let $skip_unique_check = 0; --source loaddata.inc let $skip_unique_check = 1; -set session rocksdb_skip_unique_check=1; +set session unique_checks=0; --source loaddata.inc - diff --git a/storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test b/storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test new file mode 100644 index 00000000000..ec00ddee5db --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test @@ -0,0 +1,41 @@ +--source include/have_rocksdb.inc + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--let $_server_id= `SELECT @@server_id` +--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect +--let $_cache_file_name= $MYSQLTEST_VARDIR/tmp/persistent_cache +--exec echo "wait" >$_expect_file_name + +# restart server with correct parameters +shutdown_server 10; +--exec echo "restart:--rocksdb_persistent_cache_path=$_cache_file_name --rocksdb_persistent_cache_size=1000000000" >$_expect_file_name +--sleep 5 +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect + + +# insert values and flush out of memtable +CREATE TABLE t1 (a int primary key) ENGINE=ROCKSDB; +insert into t1 values (1); +set global rocksdb_force_flush_memtable_now=1; + +# pull data through cache +select * from t1 where a = 1; + +# restart server to re-read cache +--exec echo "wait" >$_expect_file_name +shutdown_server 10; +--exec echo "restart:--rocksdb_persistent_cache_path=$_cache_file_name --rocksdb_persistent_cache_size=1000000000" >$_expect_file_name +--sleep 5 +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect + +# pull values from cache again +select * from t1 where a = 1; + +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test index 7ec15d157a7..ed26d036e9a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test @@ -1768,7 +1768,7 @@ DROP DATABASE test_db; --echo # Issue #143: Split rocksdb_bulk_load option into two --echo # CREATE TABLE t1 (id int primary key, value int) engine=RocksDB; -SET rocksdb_skip_unique_check=1; +SET unique_checks=0; INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 VALUES(1, 2); INSERT INTO t1 VALUES(1, 3); @@ -1779,7 +1779,7 @@ REPLACE INTO t1 VALUES(4, 4); INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1; TRUNCATE TABLE t1; SET @save_rocksdb_bulk_load_size= @@rocksdb_bulk_load_size; -SET rocksdb_skip_unique_check=0; +SET unique_checks=1; SET rocksdb_commit_in_the_middle=1; SET rocksdb_bulk_load_size=10; BEGIN; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf index f5b725932e4..f4257d80fdb 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf +++ b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf @@ -5,4 +5,4 @@ binlog_format=row [mysqld.2] binlog_format=row slave_parallel_workers=1 -rocksdb_rpl_skip_tx_api=ON +rpl_skip_tx_api=ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test index 19499765140..452a7989b0b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test @@ -17,7 +17,7 @@ while ($aa < 1000) { --enable_query_log connection slave; -show variables like 'rocksdb_rpl_skip_tx_api'; +show variables like 'rpl_skip_tx_api'; --source include/start_slave.inc --let $it=0 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test index 28b52f262cc..4bc6d6262f7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test @@ -31,3 +31,21 @@ INSERT INTO t2 VALUES (1,1); --error ER_DUP_ENTRY INSERT INTO t2 VALUES (2,1); DROP TABLE t2; + +--echo # +--echo # Issue #491 (https://github.com/facebook/mysql-5.6/issues/491) +--echo # +CREATE TABLE t (a BLOB, PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb; +INSERT INTO t VALUES('a'); +CHECK TABLE t EXTENDED; +DROP TABLE t; + +CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a), UNIQUE KEY (a(1))) engine=rocksdb; +INSERT INTO t VALUES('a'); +CHECK TABLE t EXTENDED; +DROP TABLE t; + +CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb; +INSERT INTO t VALUES('a'); +CHECK TABLE t EXTENDED; +DROP TABLE t; diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh new file mode 100755 index 00000000000..6174e5d1864 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh @@ -0,0 +1,2 @@ +src_data_dir="${MYSQLTEST_VARDIR}/mysqld.1/data/" +python -c "import socket as s; sock = s.socket(s.AF_UNIX); sock.bind('${src_data_dir}/slocket')" diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh new file mode 100755 index 00000000000..ed0b3cb5c1c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh @@ -0,0 +1,43 @@ +set -e + +# Insert 10 batches of 10 records each to a table with following schema: +# create table slocket.t1 ( +# `id` int(10) not null auto_increment, +# `k` int(10), +# `data` varchar(2048), +# primary key (`id`), +# key (`k`) +# ) engine=innodb; + +MAX_INSERTS=10 +MAX_ROWS_PER_INSERT=10 + +insertData() { + for ((i=1; i<=$MAX_INSERTS; i++)); + do + stmt='INSERT INTO slocket.t1 values' + for ((j=1; j<=$MAX_ROWS_PER_INSERT; j++)); + do + k=$RANDOM + data=$(head -c 2048 /dev/urandom|tr -cd 'a-zA-Z0-9') + stmt=$stmt' (NULL, '$k', "'$data'")' + if [ $j -lt $MAX_ROWS_PER_INSERT ]; then + stmt=$stmt',' + fi + done + stmt=$stmt';' + $MYSQL --defaults-group-suffix=.1 -e "$stmt" + done +} + +NUM_PARALLEL_INSERTS=25 +pids=() +for ((k=1; k<=$NUM_PARALLEL_INSERTS; k++)); +do + insertData & + pids+=($!) +done +for ((k=1; k<=$NUM_PARALLEL_INSERTS; k++)); +do + wait ${pids[k]} +done diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh new file mode 100755 index 00000000000..0c2c71aad68 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh @@ -0,0 +1,2 @@ +src_data_dir="${MYSQLTEST_VARDIR}/mysqld.1/data/" +rm "${src_data_dir}/slocket" diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc new file mode 100644 index 00000000000..ce889164219 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc @@ -0,0 +1,10 @@ +connection server_1; +create database slocket; + +create table slocket.t1 ( + `id` int(10) not null auto_increment, + `k` int(10), + `data` varchar(2048), + primary key (`id`), + key (`k`) +) engine=rocksdb; diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh index b83b957cff0..ef505e4b888 100755 --- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh @@ -39,6 +39,11 @@ elif [ "$STREAM_TYPE" == 'xbstream' ]; then --stream=xbstream --checkpoint_dir=$checkpoint_dir 2> \ $COPY_LOG | xbstream -x \ --directory=$backup_dir" +elif [ "$STREAM_TYPE" == "xbstream_socket" ]; then + BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --socket=${MASTER_MYSOCK} \ + --stream=xbstream --checkpoint_dir=$checkpoint_dir 2> \ + $COPY_LOG | xbstream -x \ + --directory=$backup_dir" else BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --stream=wdt \ --port=${MASTER_MYPORT} --destination=localhost --backup_dir=$backup_dir \ diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result new file mode 100644 index 00000000000..9accd18b294 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result @@ -0,0 +1,41 @@ +include/rpl_init.inc [topology=none] +include/rpl_default_connections.inc +create database db1; +create table db1.t1 ( +`id` int(10) not null auto_increment, +`k` int(10), +`data` varchar(2048), +primary key (`id`), +key (`k`) +) engine=rocksdb; +create database slocket; +create table slocket.t1 ( +`id` int(10) not null auto_increment, +`k` int(10), +`data` varchar(2048), +primary key (`id`), +key (`k`) +) engine=rocksdb; +include/rpl_stop_server.inc [server_number=2] +myrocks_hotbackup copy phase +myrocks_hotbackup move-back phase +include/rpl_start_server.inc [server_number=2] +select count(*) from db1.t1; +count(*) +250000 +select count(*) from slocket.t1; +count(*) +2500 +drop database slocket; +drop database db1; +drop database slocket; +include/rpl_stop_server.inc [server_number=2] +myrocks_hotbackup copy phase +myrocks_hotbackup move-back phase +include/rpl_start_server.inc [server_number=2] +select count(*) from db1.t1; +count(*) +250000 +drop database db1; +drop database db1; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result new file mode 100644 index 00000000000..d3f2ebc4e6f --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result @@ -0,0 +1,20 @@ +include/rpl_init.inc [topology=none] +include/rpl_default_connections.inc +create database db1; +create table db1.t1 ( +`id` int(10) not null auto_increment, +`k` int(10), +`data` varchar(2048), +primary key (`id`), +key (`k`) +) engine=rocksdb; +include/rpl_stop_server.inc [server_number=2] +myrocks_hotbackup copy phase +myrocks_hotbackup move-back phase +include/rpl_start_server.inc [server_number=2] +select count(*) from db1.t1; +count(*) +250000 +drop database db1; +drop database db1; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test new file mode 100644 index 00000000000..14ad8d23376 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test @@ -0,0 +1,46 @@ +source suite/rocksdb_hotbackup/include/setup.inc; +source suite/rocksdb_hotbackup/include/setup_slocket.inc; + +--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1 +--exec suite/rocksdb_hotbackup/include/load_data_slocket.sh 2>&1 + +--let $rpl_server_number= 2 +--source include/rpl_stop_server.inc + +--exec suite/rocksdb_hotbackup/include/stream_run.sh 2>&1 + +--let $rpl_server_number= 2 +--source include/rpl_start_server.inc + +connection server_2; +select count(*) from db1.t1; +select count(*) from slocket.t1; + +connection server_1; +drop database slocket; +connection server_2; +drop database db1; +drop database slocket; + +--exec sleep 2 +--exec suite/rocksdb_hotbackup/include/create_slocket_socket.sh 2>&1 + +--let $rpl_server_number= 2 +--source include/rpl_stop_server.inc + +--exec suite/rocksdb_hotbackup/include/stream_run.sh 2>&1 + +--let $rpl_server_number= 2 +--source include/rpl_start_server.inc + +connection server_2; +select count(*) from db1.t1; + +connection server_1; +drop database db1; +connection server_2; +drop database db1; + +--exec suite/rocksdb_hotbackup/include/remove_slocket_socket.sh 2>&1 + +source suite/rocksdb_hotbackup/include/cleanup.inc; diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test new file mode 100644 index 00000000000..28edff072e7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test @@ -0,0 +1,22 @@ + +source suite/rocksdb_hotbackup/include/setup.inc; + +--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1 +--let $rpl_server_number= 2 +--source include/rpl_stop_server.inc + +--exec STREAM_TYPE=xbstream_socket suite/rocksdb_hotbackup/include/stream_run.sh 2>&1 + +--let $rpl_server_number= 2 +--source include/rpl_start_server.inc + +connection server_2; +select count(*) from db1.t1; + +connection server_1; +drop database db1; +connection server_2; +drop database db1; + +source suite/rocksdb_hotbackup/include/cleanup.inc; + diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result index 71c0d6d5dbf..7a7400f17e1 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS t1; -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb; 'con1' SET SESSION debug="d,crash_commit_after_log"; @@ -7,11 +7,11 @@ SET DEBUG_SYNC='rocksdb.prepared SIGNAL parked WAIT_FOR go'; insert into t1 values (1, 1, "iamtheogthealphaandomega");; 'con2' insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush"); -SET GLOBAL ROCKSDB_DISABLE_2PC = ON; +SET GLOBAL ROCKSDB_ENABLE_2PC = OFF; SET GLOBAL ROCKSDB_WRITE_SYNC = OFF; SET GLOBAL SYNC_BINLOG = 0; SET DEBUG_SYNC='now WAIT_FOR parked'; -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; SET GLOBAL ROCKSDB_WRITE_SYNC = ON; SET GLOBAL SYNC_BINLOG = 1; insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush"); diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result index 325df314216..59d1a231327 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result @@ -1,18 +1,18 @@ DROP TABLE IF EXISTS t1; create table t1 (a int primary key, msg varchar(255)) engine=rocksdb; -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; SET SESSION debug="d,crash_commit_after_prepare"; insert into t1 values (1, 'dogz'); select * from t1; a msg -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; SET SESSION debug="d,crash_commit_after_log"; insert into t1 values (2, 'catz'), (3, 'men'); select * from t1; a msg 2 catz 3 men -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; SET SESSION debug="d,crash_commit_after"; insert into t1 values (4, 'cars'), (5, 'foo'); select * from t1; @@ -21,7 +21,7 @@ a msg 3 men 4 cars 5 foo -SET GLOBAL ROCKSDB_DISABLE_2PC = ON; +SET GLOBAL ROCKSDB_ENABLE_2PC = OFF; SET SESSION debug="d,crash_commit_after_log"; insert into t1 values (6, 'shipz'), (7, 'tankz'); select * from t1; @@ -30,7 +30,7 @@ a msg 3 men 4 cars 5 foo -SET GLOBAL ROCKSDB_DISABLE_2PC = ON; +SET GLOBAL ROCKSDB_ENABLE_2PC = OFF; SET SESSION debug="d,crash_commit_after"; insert into t1 values (8, 'space'), (9, 'time'); select * from t1; diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result new file mode 100644 index 00000000000..e0dbc92cdf5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result @@ -0,0 +1,27 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +call mtr.add_suppression("Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave"); +set global rpl_skip_tx_api=ON; +set global rocksdb_unsafe_for_binlog=1; +create table t1(a int); +set session binlog_format=STATEMENT; +insert into t1 values(1); +include/wait_for_slave_sql_error.inc [errno=1756] +Last_SQL_Error = 'Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave. rpl_skip_tx_api recovery should only be used when master's binlog format is ROW.' +"Table after error" +select * from t1; +a +set global rpl_skip_tx_api=OFF; +include/start_slave.inc +include/sync_slave_sql_with_master.inc +"Table after error fixed" +select * from t1; +a +1 +drop table t1; +set global rocksdb_unsafe_for_binlog=0; +set global rpl_skip_tx_api=0; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test index 69d2e87e40e..f47f83b0bd2 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test @@ -10,7 +10,7 @@ DROP TABLE IF EXISTS t1; --enable_warnings -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb; connect (con1, localhost, root,,); @@ -35,7 +35,7 @@ insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush"); # Disable 2PC and syncing for faster inserting of dummy rows # These rows only purpose is to rotate the binlog -SET GLOBAL ROCKSDB_DISABLE_2PC = ON; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; SET GLOBAL ROCKSDB_WRITE_SYNC = OFF; SET GLOBAL SYNC_BINLOG = 0; @@ -50,7 +50,7 @@ while ($pk < 1000000) { # re-enable 2PC an syncing then write to trigger a flush # before we trigger the crash to simulate full-durability -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; SET GLOBAL ROCKSDB_WRITE_SYNC = ON; SET GLOBAL SYNC_BINLOG = 1; diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf index 454c9eb887a..71c81a892ed 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf @@ -2,8 +2,10 @@ [mysqld.1] log_slave_updates +rocksdb_enable_2pc=OFF [mysqld.2] relay_log_recovery=1 relay_log_info_repository=TABLE log_slave_updates +rocksdb_enable_2pc=OFF diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf index b6e8beb8fcb..c69c987b0d9 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf @@ -4,6 +4,7 @@ log_slave_updates gtid_mode=ON enforce_gtid_consistency=ON +rocksdb_enable_2pc=OFF [mysqld.2] sync_relay_log_info=100 @@ -12,3 +13,4 @@ relay_log_info_repository=FILE log_slave_updates gtid_mode=ON enforce_gtid_consistency=ON +rocksdb_enable_2pc=OFF diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test index 5f99e1aabd1..ea1fe3e34d6 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test @@ -8,7 +8,7 @@ DROP TABLE IF EXISTS t1; create table t1 (a int primary key, msg varchar(255)) engine=rocksdb; -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect SET SESSION debug="d,crash_commit_after_prepare"; --error 0,2013 @@ -17,7 +17,7 @@ insert into t1 values (1, 'dogz'); --source include/wait_until_connected_again.inc select * from t1; -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect SET SESSION debug="d,crash_commit_after_log"; --error 0,2013 @@ -26,7 +26,7 @@ insert into t1 values (2, 'catz'), (3, 'men'); --source include/wait_until_connected_again.inc select * from t1; -SET GLOBAL ROCKSDB_DISABLE_2PC = OFF; +SET GLOBAL ROCKSDB_ENABLE_2PC = ON; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect SET SESSION debug="d,crash_commit_after"; --error 0,2013 @@ -35,7 +35,7 @@ insert into t1 values (4, 'cars'), (5, 'foo'); --source include/wait_until_connected_again.inc select * from t1; -SET GLOBAL ROCKSDB_DISABLE_2PC = ON; +SET GLOBAL ROCKSDB_ENABLE_2PC = OFF; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect SET SESSION debug="d,crash_commit_after_log"; --error 0,2013 @@ -44,7 +44,7 @@ insert into t1 values (6, 'shipz'), (7, 'tankz'); --source include/wait_until_connected_again.inc select * from t1; -SET GLOBAL ROCKSDB_DISABLE_2PC = ON; +SET GLOBAL ROCKSDB_ENABLE_2PC = OFF; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect SET SESSION debug="d,crash_commit_after"; --error 0,2013 diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt new file mode 100644 index 00000000000..39bb3238861 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt @@ -0,0 +1,2 @@ +--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates +--binlog_format=STATEMENT --default-storage-engine=rocksdb diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt new file mode 100644 index 00000000000..826f1ee9cb6 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt @@ -0,0 +1,2 @@ +--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates +--sync_binlog=1000 --relay_log_recovery=1 --default-storage-engine=rocksdb diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test new file mode 100644 index 00000000000..22151d14547 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test @@ -0,0 +1,51 @@ +# Checks if the slave stops executing transactions when master's binlog format +# is STATEMENT but rpl_skip_tx_api is enabled +-- source include/master-slave.inc + +call mtr.add_suppression("Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave"); + +connection slave; +let $old_rpl_skip_tx_api= `SELECT @@global.rpl_skip_tx_api`; +set global rpl_skip_tx_api=ON; + +connection master; +let $old_rocksdb_unsafe_for_binlog= `SELECT @@global.rocksdb_unsafe_for_binlog`; +set global rocksdb_unsafe_for_binlog=1; +create table t1(a int); +set session binlog_format=STATEMENT; +insert into t1 values(1); + +# Wait till we hit the binlog format mismatch error +connection slave; +let $slave_sql_errno= convert_error(ER_MTS_INCONSISTENT_DATA); # 1756 +let $show_slave_sql_error= 1; +source include/wait_for_slave_sql_error.inc; + +# Print table +connection slave; +echo "Table after error"; +select * from t1; + +connection slave; +# Turn off rpl_skip_tx_api and start the slave again +set global rpl_skip_tx_api=OFF; +source include/start_slave.inc; + +connection slave; +source include/sync_slave_sql_with_master.inc; + +connection slave; +# Print table again +echo "Table after error fixed"; +select * from t1; + +# Cleanup +connection master; +drop table t1; +eval set global rocksdb_unsafe_for_binlog=$old_rocksdb_unsafe_for_binlog; +sync_slave_with_master; + +connection slave; +eval set global rpl_skip_tx_api=$old_rpl_skip_tx_api; + +-- source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result index 159d6a983c8..9f21825d262 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result @@ -9,5 +9,7 @@ There should be *no* long test name listed below: select variable_name as `There should be *no* variables listed below:` from t2 left join t1 on variable_name=test_name where test_name is null ORDER BY variable_name; There should be *no* variables listed below: +ROCKSDB_ENABLE_2PC +ROCKSDB_ENABLE_2PC drop table t1; drop table t2; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_disable_2pc_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_disable_2pc_basic.result index 708dd462dfe..686f8bcd39a 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_disable_2pc_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_disable_2pc_basic.result @@ -6,70 +6,70 @@ INSERT INTO valid_values VALUES('off'); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; INSERT INTO invalid_values VALUES('\'aaa\''); INSERT INTO invalid_values VALUES('\'bbb\''); -SET @start_global_value = @@global.ROCKSDB_DISABLE_2PC; +SET @start_global_value = @@global.ROCKSDB_ENABLE_2PC; SELECT @start_global_value; @start_global_value 1 '# Setting to valid values in global scope#' -"Trying to set variable @@global.ROCKSDB_DISABLE_2PC to 1" -SET @@global.ROCKSDB_DISABLE_2PC = 1; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 1" +SET @@global.ROCKSDB_ENABLE_2PC = 1; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 "Setting the global scope variable back to default" -SET @@global.ROCKSDB_DISABLE_2PC = DEFAULT; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 -"Trying to set variable @@global.ROCKSDB_DISABLE_2PC to 0" -SET @@global.ROCKSDB_DISABLE_2PC = 0; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 0" +SET @@global.ROCKSDB_ENABLE_2PC = 0; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 0 "Setting the global scope variable back to default" -SET @@global.ROCKSDB_DISABLE_2PC = DEFAULT; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 -"Trying to set variable @@global.ROCKSDB_DISABLE_2PC to on" -SET @@global.ROCKSDB_DISABLE_2PC = on; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to on" +SET @@global.ROCKSDB_ENABLE_2PC = on; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 "Setting the global scope variable back to default" -SET @@global.ROCKSDB_DISABLE_2PC = DEFAULT; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 -"Trying to set variable @@global.ROCKSDB_DISABLE_2PC to off" -SET @@global.ROCKSDB_DISABLE_2PC = off; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to off" +SET @@global.ROCKSDB_ENABLE_2PC = off; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 0 "Setting the global scope variable back to default" -SET @@global.ROCKSDB_DISABLE_2PC = DEFAULT; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 -"Trying to set variable @@session.ROCKSDB_DISABLE_2PC to 444. It should fail because it is not session." -SET @@session.ROCKSDB_DISABLE_2PC = 444; -ERROR HY000: Variable 'rocksdb_disable_2pc' is a GLOBAL variable and should be set with SET GLOBAL +"Trying to set variable @@session.ROCKSDB_ENABLE_2PC to 444. It should fail because it is not session." +SET @@session.ROCKSDB_ENABLE_2PC = 444; +ERROR HY000: Variable 'rocksdb_enable_2pc' is a GLOBAL variable and should be set with SET GLOBAL '# Testing with invalid values in global scope #' -"Trying to set variable @@global.ROCKSDB_DISABLE_2PC to 'aaa'" -SET @@global.ROCKSDB_DISABLE_2PC = 'aaa'; +"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 'aaa'" +SET @@global.ROCKSDB_ENABLE_2PC = 'aaa'; Got one of the listed errors -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 -"Trying to set variable @@global.ROCKSDB_DISABLE_2PC to 'bbb'" -SET @@global.ROCKSDB_DISABLE_2PC = 'bbb'; +"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 'bbb'" +SET @@global.ROCKSDB_ENABLE_2PC = 'bbb'; Got one of the listed errors -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 -SET @@global.ROCKSDB_DISABLE_2PC = @start_global_value; -SELECT @@global.ROCKSDB_DISABLE_2PC; -@@global.ROCKSDB_DISABLE_2PC +SET @@global.ROCKSDB_ENABLE_2PC = @start_global_value; +SELECT @@global.ROCKSDB_ENABLE_2PC; +@@global.ROCKSDB_ENABLE_2PC 1 DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result index 903e393d5ea..714f2101127 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result @@ -1,7 +1,46 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(64); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'abc\''); SET @start_global_value = @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS; SELECT @start_global_value; @start_global_value 1 -"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 444; -ERROR HY000: Variable 'rocksdb_max_background_compactions' is a read only variable +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 1" +SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 1; +SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS; +@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = DEFAULT; +SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS; +@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS +1 +"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 64" +SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 64; +SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS; +@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS +64 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = DEFAULT; +SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS; +@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS +1 +"Trying to set variable @@session.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 444. It should fail because it is not session." +SET @@session.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 444; +ERROR HY000: Variable 'rocksdb_max_background_compactions' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 'abc'" +SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 'abc'; +Got one of the listed errors +SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS; +@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS +1 +SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = @start_global_value; +SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS; +@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS +1 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result new file mode 100644 index 00000000000..10b187d44e9 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result @@ -0,0 +1,13 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES('abc'); +INSERT INTO valid_values VALUES('def'); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +SET @start_global_value = @@global.ROCKSDB_PERSISTENT_CACHE_PATH; +SELECT @start_global_value; +@start_global_value + +"Trying to set variable @@global.ROCKSDB_PERSISTENT_CACHE_PATH to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_PERSISTENT_CACHE_PATH = 444; +ERROR HY000: Variable 'rocksdb_persistent_cache_path' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_basic.result new file mode 100644 index 00000000000..87440ae0bcb --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_basic.result @@ -0,0 +1,14 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(1024); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +SET @start_global_value = @@global.ROCKSDB_PERSISTENT_CACHE_SIZE; +SELECT @start_global_value; +@start_global_value +0 +"Trying to set variable @@global.ROCKSDB_PERSISTENT_CACHE_SIZE to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_PERSISTENT_CACHE_SIZE = 444; +ERROR HY000: Variable 'rocksdb_persistent_cache_size' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rpl_skip_tx_api_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rpl_skip_tx_api_basic.result deleted file mode 100644 index 5f6522e4488..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rpl_skip_tx_api_basic.result +++ /dev/null @@ -1,68 +0,0 @@ -CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; -INSERT INTO valid_values VALUES(1); -INSERT INTO valid_values VALUES(0); -INSERT INTO valid_values VALUES('on'); -INSERT INTO valid_values VALUES('off'); -CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; -INSERT INTO invalid_values VALUES('\'aaa\''); -SET @start_global_value = @@global.ROCKSDB_RPL_SKIP_TX_API; -SELECT @start_global_value; -@start_global_value -1 -'# Setting to valid values in global scope#' -"Trying to set variable @@global.ROCKSDB_RPL_SKIP_TX_API to 1" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = 1; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = DEFAULT; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -"Trying to set variable @@global.ROCKSDB_RPL_SKIP_TX_API to 0" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = 0; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -0 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = DEFAULT; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -"Trying to set variable @@global.ROCKSDB_RPL_SKIP_TX_API to on" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = on; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = DEFAULT; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -"Trying to set variable @@global.ROCKSDB_RPL_SKIP_TX_API to off" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = off; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -0 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = DEFAULT; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -"Trying to set variable @@session.ROCKSDB_RPL_SKIP_TX_API to 444. It should fail because it is not session." -SET @@session.ROCKSDB_RPL_SKIP_TX_API = 444; -ERROR HY000: Variable 'rocksdb_rpl_skip_tx_api' is a GLOBAL variable and should be set with SET GLOBAL -'# Testing with invalid values in global scope #' -"Trying to set variable @@global.ROCKSDB_RPL_SKIP_TX_API to 'aaa'" -SET @@global.ROCKSDB_RPL_SKIP_TX_API = 'aaa'; -Got one of the listed errors -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -SET @@global.ROCKSDB_RPL_SKIP_TX_API = @start_global_value; -SELECT @@global.ROCKSDB_RPL_SKIP_TX_API; -@@global.ROCKSDB_RPL_SKIP_TX_API -1 -DROP TABLE valid_values; -DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_basic.result deleted file mode 100644 index a1244723b05..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_basic.result +++ /dev/null @@ -1,163 +0,0 @@ -CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; -INSERT INTO valid_values VALUES(0); -INSERT INTO valid_values VALUES(1); -INSERT INTO valid_values VALUES('on'); -INSERT INTO valid_values VALUES('off'); -INSERT INTO valid_values VALUES('true'); -INSERT INTO valid_values VALUES('false'); -CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; -INSERT INTO invalid_values VALUES('\'aaa\''); -INSERT INTO invalid_values VALUES('\'bbb\''); -SET @start_global_value = @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -SELECT @start_global_value; -@start_global_value -0 -SET @start_session_value = @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -SELECT @start_session_value; -@start_session_value -0 -'# Setting to valid values in global scope#' -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to 0" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = 0; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to 1" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = 1; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -1 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to on" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = on; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -1 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to off" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = off; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to true" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = true; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -1 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to false" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = false; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Setting the global scope variable back to default" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -'# Setting to valid values in session scope#' -"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK to 0" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = 0; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK to 1" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = 1; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -1 -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK to on" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = on; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -1 -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK to off" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = off; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK to true" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = true; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -1 -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK to false" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = false; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Setting the session scope variable back to default" -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = DEFAULT; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -'# Testing with invalid values in global scope #' -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to 'aaa'" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = 'aaa'; -Got one of the listed errors -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK to 'bbb'" -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = 'bbb'; -Got one of the listed errors -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK = @start_global_value; -SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK; -@@global.ROCKSDB_SKIP_UNIQUE_CHECK -0 -SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK = @start_session_value; -SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK; -@@session.ROCKSDB_SKIP_UNIQUE_CHECK -0 -DROP TABLE valid_values; -DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_disable_2pc_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_disable_2pc_basic.test index 061a4c902b5..1badcef0347 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_disable_2pc_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_disable_2pc_basic.test @@ -10,7 +10,7 @@ CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; INSERT INTO invalid_values VALUES('\'aaa\''); INSERT INTO invalid_values VALUES('\'bbb\''); ---let $sys_var=ROCKSDB_DISABLE_2PC +--let $sys_var=ROCKSDB_ENABLE_2PC --let $read_only=0 --let $session=0 --let $sticky=1 diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_compactions_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_compactions_basic.test index 441c0577c10..5fcc4e6ef25 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_compactions_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_compactions_basic.test @@ -1,7 +1,16 @@ --source include/have_rocksdb.inc +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(64); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'abc\''); + --let $sys_var=ROCKSDB_MAX_BACKGROUND_COMPACTIONS ---let $read_only=1 +--let $read_only=0 --let $session=0 --source suite/sys_vars/inc/rocksdb_sys_var.inc +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test new file mode 100644 index 00000000000..c0840274253 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test @@ -0,0 +1,16 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES('abc'); +INSERT INTO valid_values VALUES('def'); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; + +--let $sys_var=ROCKSDB_PERSISTENT_CACHE_PATH +--let $read_only=1 +--let $session=0 +--let $sticky=1 +--source suite/sys_vars/inc/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rpl_skip_tx_api_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_size_basic.test index f6c0a219a9f..32fafcaf232 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rpl_skip_tx_api_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_size_basic.test @@ -2,15 +2,13 @@ CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; INSERT INTO valid_values VALUES(1); -INSERT INTO valid_values VALUES(0); -INSERT INTO valid_values VALUES('on'); -INSERT INTO valid_values VALUES('off'); +INSERT INTO valid_values VALUES(1024); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; INSERT INTO invalid_values VALUES('\'aaa\''); ---let $sys_var=ROCKSDB_RPL_SKIP_TX_API ---let $read_only=0 +--let $sys_var=ROCKSDB_PERSISTENT_CACHE_SIZE +--let $read_only=1 --let $session=0 --source suite/sys_vars/inc/rocksdb_sys_var.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_basic.test deleted file mode 100644 index fe90a49365b..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_basic.test +++ /dev/null @@ -1,21 +0,0 @@ ---source include/have_rocksdb.inc - -CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; -INSERT INTO valid_values VALUES(0); -INSERT INTO valid_values VALUES(1); -INSERT INTO valid_values VALUES('on'); -INSERT INTO valid_values VALUES('off'); -INSERT INTO valid_values VALUES('true'); -INSERT INTO valid_values VALUES('false'); - -CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; -INSERT INTO invalid_values VALUES('\'aaa\''); -INSERT INTO invalid_values VALUES('\'bbb\''); - ---let $sys_var=ROCKSDB_SKIP_UNIQUE_CHECK ---let $read_only=0 ---let $session=1 ---source suite/sys_vars/inc/rocksdb_sys_var.inc - -DROP TABLE valid_values; -DROP TABLE invalid_values; diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc index e5b5b25792d..26a967bc088 100644 --- a/storage/rocksdb/properties_collector.cc +++ b/storage/rocksdb/properties_collector.cc @@ -41,21 +41,15 @@ std::atomic<uint64_t> rocksdb_num_sst_entry_merge(0); std::atomic<uint64_t> rocksdb_num_sst_entry_other(0); my_bool rocksdb_compaction_sequential_deletes_count_sd = false; -Rdb_tbl_prop_coll::Rdb_tbl_prop_coll( - Rdb_ddl_manager* const ddl_manager, - const Rdb_compact_params ¶ms, - const uint32_t &cf_id, - const uint8_t &table_stats_sampling_pct -) : - m_cf_id(cf_id), - m_ddl_manager(ddl_manager), - m_last_stats(nullptr), - m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l), - m_file_size(0), m_params(params), - m_table_stats_sampling_pct(table_stats_sampling_pct), - m_seed(time(nullptr)), - m_card_adj_extra(1.) -{ +Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, + const Rdb_compact_params ¶ms, + const uint32_t &cf_id, + const uint8_t &table_stats_sampling_pct) + : m_cf_id(cf_id), m_ddl_manager(ddl_manager), m_last_stats(nullptr), + m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l), + m_file_size(0), m_params(params), + m_table_stats_sampling_pct(table_stats_sampling_pct), + m_seed(time(nullptr)), m_card_adj_extra(1.) { DBUG_ASSERT(ddl_manager != nullptr); // We need to adjust the index cardinality numbers based on the sampling @@ -71,12 +65,11 @@ Rdb_tbl_prop_coll::Rdb_tbl_prop_coll( /* This function is called by RocksDB for every key in the SST file */ -rocksdb::Status -Rdb_tbl_prop_coll::AddUserKey( - const rocksdb::Slice& key, const rocksdb::Slice& value, - rocksdb::EntryType type, rocksdb::SequenceNumber seq, - uint64_t file_size -) { +rocksdb::Status Rdb_tbl_prop_coll::AddUserKey(const rocksdb::Slice &key, + const rocksdb::Slice &value, + rocksdb::EntryType type, + rocksdb::SequenceNumber seq, + uint64_t file_size) { if (key.size() >= 4) { AdjustDeletedRows(type); @@ -88,10 +81,8 @@ Rdb_tbl_prop_coll::AddUserKey( return rocksdb::Status::OK(); } -void Rdb_tbl_prop_coll::AdjustDeletedRows(rocksdb::EntryType type) -{ - if (m_params.m_window > 0) - { +void Rdb_tbl_prop_coll::AdjustDeletedRows(rocksdb::EntryType type) { + if (m_params.m_window > 0) { // record the "is deleted" flag into the sliding window // the sliding window is implemented as a circular buffer // in m_deleted_rows_window vector @@ -99,42 +90,33 @@ void Rdb_tbl_prop_coll::AdjustDeletedRows(rocksdb::EntryType type) // m_rows % m_deleted_rows_window.size() // m_deleted_rows is the current number of 1's in the vector // --update the counter for the element which will be overridden - const bool is_delete= (type == rocksdb::kEntryDelete || - (type == rocksdb::kEntrySingleDelete && - rocksdb_compaction_sequential_deletes_count_sd)); + const bool is_delete = (type == rocksdb::kEntryDelete || + (type == rocksdb::kEntrySingleDelete && + rocksdb_compaction_sequential_deletes_count_sd)); // Only make changes if the value at the current position needs to change - if (is_delete != m_deleted_rows_window[m_window_pos]) - { + if (is_delete != m_deleted_rows_window[m_window_pos]) { // Set or clear the flag at the current position as appropriate - m_deleted_rows_window[m_window_pos]= is_delete; - if (!is_delete) - { + m_deleted_rows_window[m_window_pos] = is_delete; + if (!is_delete) { m_deleted_rows--; - } - else if (++m_deleted_rows > m_max_deleted_rows) - { + } else if (++m_deleted_rows > m_max_deleted_rows) { m_max_deleted_rows = m_deleted_rows; } } - if (++m_window_pos == m_params.m_window) - { + if (++m_window_pos == m_params.m_window) { m_window_pos = 0; } } } -Rdb_index_stats* Rdb_tbl_prop_coll::AccessStats( - const rocksdb::Slice& key) -{ - GL_INDEX_ID gl_index_id = { - .cf_id = m_cf_id, - .index_id = rdb_netbuf_to_uint32(reinterpret_cast<const uchar*>(key.data())) - }; +Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) { + GL_INDEX_ID gl_index_id = {.cf_id = m_cf_id, + .index_id = rdb_netbuf_to_uint32( + reinterpret_cast<const uchar *>(key.data()))}; - if (m_last_stats == nullptr || m_last_stats->m_gl_index_id != gl_index_id) - { + if (m_last_stats == nullptr || m_last_stats->m_gl_index_id != gl_index_id) { m_keydef = nullptr; // starting a new table @@ -142,8 +124,7 @@ Rdb_index_stats* Rdb_tbl_prop_coll::AccessStats( m_stats.emplace_back(gl_index_id); m_last_stats = &m_stats.back(); - if (m_ddl_manager) - { + if (m_ddl_manager) { // safe_find() returns a std::shared_ptr<Rdb_key_def> with the count // incremented (so it can't be deleted out from under us) and with // the mutex locked (if setup has not occurred yet). We must make @@ -152,8 +133,7 @@ Rdb_index_stats* Rdb_tbl_prop_coll::AccessStats( // when we are switching to a new Rdb_key_def and when this object // is destructed. m_keydef = m_ddl_manager->safe_find(gl_index_id); - if (m_keydef != nullptr) - { + if (m_keydef != nullptr) { // resize the array to the number of columns. // It will be initialized with zeroes m_last_stats->m_distinct_keys_per_prefix.resize( @@ -167,13 +147,13 @@ Rdb_index_stats* Rdb_tbl_prop_coll::AccessStats( return m_last_stats; } -void Rdb_tbl_prop_coll::CollectStatsForRow( - const rocksdb::Slice& key, const rocksdb::Slice& value, - const rocksdb::EntryType &type, const uint64_t &file_size) -{ +void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key, + const rocksdb::Slice &value, + const rocksdb::EntryType &type, + const uint64_t &file_size) { const auto stats = AccessStats(key); - stats->m_data_size += key.size()+value.size(); + stats->m_data_size += key.size() + value.size(); // Incrementing per-index entry-type statistics switch (type) { @@ -195,7 +175,8 @@ void Rdb_tbl_prop_coll::CollectStatsForRow( default: // NO_LINT_DEBUG sql_print_error("RocksDB: Unexpected entry type found: %u. " - "This should not happen so aborting the system.", type); + "This should not happen so aborting the system.", + type); abort_with_stack_traces(); break; } @@ -203,23 +184,19 @@ void Rdb_tbl_prop_coll::CollectStatsForRow( stats->m_actual_disk_size += file_size - m_file_size; m_file_size = file_size; - if (m_keydef != nullptr && ShouldCollectStats()) - { + if (m_keydef != nullptr && ShouldCollectStats()) { std::size_t column = 0; bool new_key = true; - if (!m_last_key.empty()) - { + if (!m_last_key.empty()) { rocksdb::Slice last(m_last_key.data(), m_last_key.size()); new_key = (m_keydef->compare_keys(&last, &key, &column) == 0); } - if (new_key) - { + if (new_key) { DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size()); - for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) - { + for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) { stats->m_distinct_keys_per_prefix[i]++; } @@ -228,23 +205,20 @@ void Rdb_tbl_prop_coll::CollectStatsForRow( // if one of the first n-1 columns is different // If the n-1 prefix is the same, no sense in storing // the new key - if (column < stats->m_distinct_keys_per_prefix.size()) - { + if (column < stats->m_distinct_keys_per_prefix.size()) { m_last_key.assign(key.data(), key.size()); } } } } -const char* Rdb_tbl_prop_coll::INDEXSTATS_KEY = "__indexstats__"; +const char *Rdb_tbl_prop_coll::INDEXSTATS_KEY = "__indexstats__"; /* This function is called by RocksDB to compute properties to store in sst file */ rocksdb::Status -Rdb_tbl_prop_coll::Finish( - rocksdb::UserCollectedProperties* const properties -) { +Rdb_tbl_prop_coll::Finish(rocksdb::UserCollectedProperties *const properties) { uint64_t num_sst_entry_put = 0; uint64_t num_sst_entry_delete = 0; uint64_t num_sst_entry_singledelete = 0; @@ -253,8 +227,7 @@ Rdb_tbl_prop_coll::Finish( DBUG_ASSERT(properties != nullptr); - for (auto it = m_stats.begin(); it != m_stats.end(); it++) - { + for (auto it = m_stats.begin(); it != m_stats.end(); it++) { num_sst_entry_put += it->m_rows; num_sst_entry_delete += it->m_entry_deletes; num_sst_entry_singledelete += it->m_entry_single_deletes; @@ -262,42 +235,35 @@ Rdb_tbl_prop_coll::Finish( num_sst_entry_other += it->m_entry_others; } - if (num_sst_entry_put > 0) - { + if (num_sst_entry_put > 0) { rocksdb_num_sst_entry_put += num_sst_entry_put; } - if (num_sst_entry_delete > 0) - { + if (num_sst_entry_delete > 0) { rocksdb_num_sst_entry_delete += num_sst_entry_delete; } - if (num_sst_entry_singledelete > 0) - { + if (num_sst_entry_singledelete > 0) { rocksdb_num_sst_entry_singledelete += num_sst_entry_singledelete; } - if (num_sst_entry_merge > 0) - { + if (num_sst_entry_merge > 0) { rocksdb_num_sst_entry_merge += num_sst_entry_merge; } - if (num_sst_entry_other > 0) - { + if (num_sst_entry_other > 0) { rocksdb_num_sst_entry_other += num_sst_entry_other; } properties->insert({INDEXSTATS_KEY, - Rdb_index_stats::materialize(m_stats, m_card_adj_extra)}); + Rdb_index_stats::materialize(m_stats, m_card_adj_extra)}); return rocksdb::Status::OK(); } bool Rdb_tbl_prop_coll::NeedCompact() const { - return - m_params.m_deletes && - (m_params.m_window > 0) && - (m_file_size > m_params.m_file_size) && - (m_max_deleted_rows > m_params.m_deletes); + return m_params.m_deletes && (m_params.m_window > 0) && + (m_file_size > m_params.m_file_size) && + (m_max_deleted_rows > m_params.m_deletes); } bool Rdb_tbl_prop_coll::ShouldCollectStats() { @@ -307,9 +273,9 @@ bool Rdb_tbl_prop_coll::ShouldCollectStats() { return true; } - const int val = rand_r(&m_seed) % - (RDB_TBL_STATS_SAMPLE_PCT_MAX - RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) + - RDB_TBL_STATS_SAMPLE_PCT_MIN; + const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX - + RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) + + RDB_TBL_STATS_SAMPLE_PCT_MIN; DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN); DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX); @@ -337,14 +303,11 @@ Rdb_tbl_prop_coll::GetReadableProperties() const { } s.append(GetReadableStats(it)); } - #endif +#endif return rocksdb::UserCollectedProperties{{INDEXSTATS_KEY, s}}; } -std::string -Rdb_tbl_prop_coll::GetReadableStats( - const Rdb_index_stats& it -) { +std::string Rdb_tbl_prop_coll::GetReadableStats(const Rdb_index_stats &it) { std::string s; s.append("("); s.append(std::to_string(it.m_gl_index_id.cf_id)); @@ -380,28 +343,24 @@ Rdb_tbl_prop_coll::GetReadableStats( */ void Rdb_tbl_prop_coll::read_stats_from_tbl_props( - const std::shared_ptr<const rocksdb::TableProperties>& table_props, - std::vector<Rdb_index_stats>* const out_stats_vector) -{ + const std::shared_ptr<const rocksdb::TableProperties> &table_props, + std::vector<Rdb_index_stats> *const out_stats_vector) { DBUG_ASSERT(out_stats_vector != nullptr); - const auto& user_properties = table_props->user_collected_properties; + const auto &user_properties = table_props->user_collected_properties; const auto it2 = user_properties.find(std::string(INDEXSTATS_KEY)); - if (it2 != user_properties.end()) - { - auto result __attribute__((__unused__)) = + if (it2 != user_properties.end()) { + auto result MY_ATTRIBUTE((__unused__)) = Rdb_index_stats::unmaterialize(it2->second, out_stats_vector); DBUG_ASSERT(result == 0); } } - /* Serializes an array of Rdb_index_stats into a network string. */ -std::string Rdb_index_stats::materialize( - const std::vector<Rdb_index_stats>& stats, - const float card_adj_extra) -{ +std::string +Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats, + const float card_adj_extra) { String ret; rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES); for (const auto &i : stats) { @@ -422,105 +381,92 @@ std::string Rdb_index_stats::materialize( } } - return std::string((char*) ret.ptr(), ret.length()); + return std::string((char *)ret.ptr(), ret.length()); } /** @brief Reads an array of Rdb_index_stats from a string. - @return 1 if it detects any inconsistency in the input - @return 0 if completes successfully + @return HA_EXIT_FAILURE if it detects any inconsistency in the input + @return HA_EXIT_SUCCESS if completes successfully */ -int Rdb_index_stats::unmaterialize( - const std::string& s, std::vector<Rdb_index_stats>* const ret) -{ - const uchar* p= rdb_std_str_to_uchar_ptr(s); - const uchar* const p2= p + s.size(); +int Rdb_index_stats::unmaterialize(const std::string &s, + std::vector<Rdb_index_stats> *const ret) { + const uchar *p = rdb_std_str_to_uchar_ptr(s); + const uchar *const p2 = p + s.size(); DBUG_ASSERT(ret != nullptr); - if (p+2 > p2) - { - return 1; + if (p + 2 > p2) { + return HA_EXIT_FAILURE; } - const int version= rdb_netbuf_read_uint16(&p); + const int version = rdb_netbuf_read_uint16(&p); Rdb_index_stats stats; // Make sure version is within supported range. if (version < INDEX_STATS_VERSION_INITIAL || - version > INDEX_STATS_VERSION_ENTRY_TYPES) - { + version > INDEX_STATS_VERSION_ENTRY_TYPES) { // NO_LINT_DEBUG sql_print_error("Index stats version %d was outside of supported range. " - "This should not happen so aborting the system.", version); + "This should not happen so aborting the system.", + version); abort_with_stack_traces(); } - size_t needed = sizeof(stats.m_gl_index_id.cf_id)+ - sizeof(stats.m_gl_index_id.index_id)+ - sizeof(stats.m_data_size)+ - sizeof(stats.m_rows)+ - sizeof(stats.m_actual_disk_size)+ - sizeof(uint64); - if (version >= INDEX_STATS_VERSION_ENTRY_TYPES) - { - needed += sizeof(stats.m_entry_deletes)+ - sizeof(stats.m_entry_single_deletes)+ - sizeof(stats.m_entry_merges)+ - sizeof(stats.m_entry_others); + size_t needed = sizeof(stats.m_gl_index_id.cf_id) + + sizeof(stats.m_gl_index_id.index_id) + + sizeof(stats.m_data_size) + sizeof(stats.m_rows) + + sizeof(stats.m_actual_disk_size) + sizeof(uint64); + if (version >= INDEX_STATS_VERSION_ENTRY_TYPES) { + needed += sizeof(stats.m_entry_deletes) + + sizeof(stats.m_entry_single_deletes) + + sizeof(stats.m_entry_merges) + sizeof(stats.m_entry_others); } - while (p < p2) - { - if (p+needed > p2) - { - return 1; + while (p < p2) { + if (p + needed > p2) { + return HA_EXIT_FAILURE; } rdb_netbuf_read_gl_index(&p, &stats.m_gl_index_id); - stats.m_data_size= rdb_netbuf_read_uint64(&p); - stats.m_rows= rdb_netbuf_read_uint64(&p); - stats.m_actual_disk_size= rdb_netbuf_read_uint64(&p); + stats.m_data_size = rdb_netbuf_read_uint64(&p); + stats.m_rows = rdb_netbuf_read_uint64(&p); + stats.m_actual_disk_size = rdb_netbuf_read_uint64(&p); stats.m_distinct_keys_per_prefix.resize(rdb_netbuf_read_uint64(&p)); - if (version >= INDEX_STATS_VERSION_ENTRY_TYPES) - { - stats.m_entry_deletes= rdb_netbuf_read_uint64(&p); - stats.m_entry_single_deletes= rdb_netbuf_read_uint64(&p); - stats.m_entry_merges= rdb_netbuf_read_uint64(&p); - stats.m_entry_others= rdb_netbuf_read_uint64(&p); + if (version >= INDEX_STATS_VERSION_ENTRY_TYPES) { + stats.m_entry_deletes = rdb_netbuf_read_uint64(&p); + stats.m_entry_single_deletes = rdb_netbuf_read_uint64(&p); + stats.m_entry_merges = rdb_netbuf_read_uint64(&p); + stats.m_entry_others = rdb_netbuf_read_uint64(&p); } - if (p+stats.m_distinct_keys_per_prefix.size() - *sizeof(stats.m_distinct_keys_per_prefix[0]) > p2) - { - return 1; + if (p + + stats.m_distinct_keys_per_prefix.size() * + sizeof(stats.m_distinct_keys_per_prefix[0]) > + p2) { + return HA_EXIT_FAILURE; } - for (std::size_t i= 0; i < stats.m_distinct_keys_per_prefix.size(); i++) - { - stats.m_distinct_keys_per_prefix[i]= rdb_netbuf_read_uint64(&p); + for (std::size_t i = 0; i < stats.m_distinct_keys_per_prefix.size(); i++) { + stats.m_distinct_keys_per_prefix[i] = rdb_netbuf_read_uint64(&p); } ret->push_back(stats); } - return 0; + return HA_EXIT_SUCCESS; } /* Merges one Rdb_index_stats into another. Can be used to come up with the stats for the index based on stats for each sst */ -void Rdb_index_stats::merge( - const Rdb_index_stats& s, const bool &increment, - const int64_t &estimated_data_len) -{ +void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment, + const int64_t &estimated_data_len) { std::size_t i; DBUG_ASSERT(estimated_data_len >= 0); m_gl_index_id = s.m_gl_index_id; - if (m_distinct_keys_per_prefix.size() < s.m_distinct_keys_per_prefix.size()) - { + if (m_distinct_keys_per_prefix.size() < s.m_distinct_keys_per_prefix.size()) { m_distinct_keys_per_prefix.resize(s.m_distinct_keys_per_prefix.size()); } - if (increment) - { + if (increment) { m_rows += s.m_rows; m_data_size += s.m_data_size; @@ -531,32 +477,28 @@ void Rdb_index_stats::merge( we make a reasoned estimate for the data_file_length for the index in the current SST. */ - m_actual_disk_size += s.m_actual_disk_size ? s.m_actual_disk_size : - estimated_data_len * s.m_rows; + m_actual_disk_size += s.m_actual_disk_size ? s.m_actual_disk_size + : estimated_data_len * s.m_rows; m_entry_deletes += s.m_entry_deletes; m_entry_single_deletes += s.m_entry_single_deletes; m_entry_merges += s.m_entry_merges; m_entry_others += s.m_entry_others; - for (i = 0; i < s.m_distinct_keys_per_prefix.size(); i++) - { + for (i = 0; i < s.m_distinct_keys_per_prefix.size(); i++) { m_distinct_keys_per_prefix[i] += s.m_distinct_keys_per_prefix[i]; } - } - else - { + } else { m_rows -= s.m_rows; m_data_size -= s.m_data_size; - m_actual_disk_size -= s.m_actual_disk_size ? s.m_actual_disk_size : - estimated_data_len * s.m_rows; + m_actual_disk_size -= s.m_actual_disk_size ? s.m_actual_disk_size + : estimated_data_len * s.m_rows; m_entry_deletes -= s.m_entry_deletes; m_entry_single_deletes -= s.m_entry_single_deletes; m_entry_merges -= s.m_entry_merges; m_entry_others -= s.m_entry_others; - for (i = 0; i < s.m_distinct_keys_per_prefix.size(); i++) - { + for (i = 0; i < s.m_distinct_keys_per_prefix.size(); i++) { m_distinct_keys_per_prefix[i] -= s.m_distinct_keys_per_prefix[i]; } } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h index b0f8e5deb59..9ae519d95c7 100644 --- a/storage/rocksdb/properties_collector.h +++ b/storage/rocksdb/properties_collector.h @@ -40,102 +40,87 @@ extern std::atomic<uint64_t> rocksdb_num_sst_entry_merge; extern std::atomic<uint64_t> rocksdb_num_sst_entry_other; extern my_bool rocksdb_compaction_sequential_deletes_count_sd; - -struct Rdb_compact_params -{ +struct Rdb_compact_params { uint64_t m_deletes, m_window, m_file_size; }; - -struct Rdb_index_stats -{ - enum { - INDEX_STATS_VERSION_INITIAL= 1, - INDEX_STATS_VERSION_ENTRY_TYPES= 2, +struct Rdb_index_stats { + enum { + INDEX_STATS_VERSION_INITIAL = 1, + INDEX_STATS_VERSION_ENTRY_TYPES = 2, }; GL_INDEX_ID m_gl_index_id; int64_t m_data_size, m_rows, m_actual_disk_size; int64_t m_entry_deletes, m_entry_single_deletes; int64_t m_entry_merges, m_entry_others; std::vector<int64_t> m_distinct_keys_per_prefix; - std::string m_name; // name is not persisted + std::string m_name; // name is not persisted - static std::string materialize(const std::vector<Rdb_index_stats>& stats, + static std::string materialize(const std::vector<Rdb_index_stats> &stats, const float card_adj_extra); - static int unmaterialize(const std::string& s, - std::vector<Rdb_index_stats>* const ret); + static int unmaterialize(const std::string &s, + std::vector<Rdb_index_stats> *const ret); Rdb_index_stats() : Rdb_index_stats({0, 0}) {} - explicit Rdb_index_stats(GL_INDEX_ID gl_index_id) : - m_gl_index_id(gl_index_id), - m_data_size(0), - m_rows(0), - m_actual_disk_size(0), - m_entry_deletes(0), - m_entry_single_deletes(0), - m_entry_merges(0), - m_entry_others(0) {} - - void merge(const Rdb_index_stats& s, const bool &increment = true, + explicit Rdb_index_stats(GL_INDEX_ID gl_index_id) + : m_gl_index_id(gl_index_id), m_data_size(0), m_rows(0), + m_actual_disk_size(0), m_entry_deletes(0), m_entry_single_deletes(0), + m_entry_merges(0), m_entry_others(0) {} + + void merge(const Rdb_index_stats &s, const bool &increment = true, const int64_t &estimated_data_len = 0); }; - -class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector -{ - public: - Rdb_tbl_prop_coll( - Rdb_ddl_manager* const ddl_manager, - const Rdb_compact_params ¶ms, - const uint32_t &cf_id, - const uint8_t &table_stats_sampling_pct - ); +class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector { +public: + Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, + const Rdb_compact_params ¶ms, const uint32_t &cf_id, + const uint8_t &table_stats_sampling_pct); /* Override parent class's virtual methods of interest. */ - virtual rocksdb::Status AddUserKey( - const rocksdb::Slice& key, const rocksdb::Slice& value, - rocksdb::EntryType type, rocksdb::SequenceNumber seq, - uint64_t file_size); + virtual rocksdb::Status AddUserKey(const rocksdb::Slice &key, + const rocksdb::Slice &value, + rocksdb::EntryType type, + rocksdb::SequenceNumber seq, + uint64_t file_size); - virtual rocksdb::Status Finish(rocksdb::UserCollectedProperties* properties) override; + virtual rocksdb::Status + Finish(rocksdb::UserCollectedProperties *properties) override; - virtual const char* Name() const override { - return "Rdb_tbl_prop_coll"; - } + virtual const char *Name() const override { return "Rdb_tbl_prop_coll"; } rocksdb::UserCollectedProperties GetReadableProperties() const override; bool NeedCompact() const override; - public: - uint64_t GetMaxDeletedRows() const { - return m_max_deleted_rows; - } +public: + uint64_t GetMaxDeletedRows() const { return m_max_deleted_rows; } static void read_stats_from_tbl_props( - const std::shared_ptr<const rocksdb::TableProperties>& table_props, - std::vector<Rdb_index_stats>* out_stats_vector); + const std::shared_ptr<const rocksdb::TableProperties> &table_props, + std::vector<Rdb_index_stats> *out_stats_vector); - private: - static std::string GetReadableStats(const Rdb_index_stats& it); +private: + static std::string GetReadableStats(const Rdb_index_stats &it); bool ShouldCollectStats(); - void CollectStatsForRow(const rocksdb::Slice& key, - const rocksdb::Slice& value, const rocksdb::EntryType &type, - const uint64_t &file_size); - Rdb_index_stats* AccessStats(const rocksdb::Slice& key); + void CollectStatsForRow(const rocksdb::Slice &key, + const rocksdb::Slice &value, + const rocksdb::EntryType &type, + const uint64_t &file_size); + Rdb_index_stats *AccessStats(const rocksdb::Slice &key); void AdjustDeletedRows(rocksdb::EntryType type); - private: +private: uint32_t m_cf_id; std::shared_ptr<const Rdb_key_def> m_keydef; - Rdb_ddl_manager* m_ddl_manager; + Rdb_ddl_manager *m_ddl_manager; std::vector<Rdb_index_stats> m_stats; - Rdb_index_stats* m_last_stats; - static const char* INDEXSTATS_KEY; + Rdb_index_stats *m_last_stats; + static const char *INDEXSTATS_KEY; // last added key std::string m_last_key; @@ -150,34 +135,33 @@ class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector float m_card_adj_extra; }; - class Rdb_tbl_prop_coll_factory : public rocksdb::TablePropertiesCollectorFactory { - public: - Rdb_tbl_prop_coll_factory(const Rdb_tbl_prop_coll_factory&) = delete; - Rdb_tbl_prop_coll_factory& operator=(const Rdb_tbl_prop_coll_factory&) = delete; +public: + Rdb_tbl_prop_coll_factory(const Rdb_tbl_prop_coll_factory &) = delete; + Rdb_tbl_prop_coll_factory & + operator=(const Rdb_tbl_prop_coll_factory &) = delete; - explicit Rdb_tbl_prop_coll_factory(Rdb_ddl_manager* ddl_manager) - : m_ddl_manager(ddl_manager) { - } + explicit Rdb_tbl_prop_coll_factory(Rdb_ddl_manager *ddl_manager) + : m_ddl_manager(ddl_manager) {} /* Override parent class's virtual methods of interest. */ - virtual rocksdb::TablePropertiesCollector* CreateTablePropertiesCollector( + virtual rocksdb::TablePropertiesCollector *CreateTablePropertiesCollector( rocksdb::TablePropertiesCollectorFactory::Context context) override { - return new Rdb_tbl_prop_coll( - m_ddl_manager, m_params, context.column_family_id, - m_table_stats_sampling_pct); + return new Rdb_tbl_prop_coll(m_ddl_manager, m_params, + context.column_family_id, + m_table_stats_sampling_pct); } - virtual const char* Name() const override { + virtual const char *Name() const override { return "Rdb_tbl_prop_coll_factory"; } - public: - void SetCompactionParams(const Rdb_compact_params& params) { +public: + void SetCompactionParams(const Rdb_compact_params ¶ms) { m_params = params; } @@ -185,10 +169,10 @@ class Rdb_tbl_prop_coll_factory m_table_stats_sampling_pct = table_stats_sampling_pct; } - private: - Rdb_ddl_manager* const m_ddl_manager; +private: + Rdb_ddl_manager *const m_ddl_manager; Rdb_compact_params m_params; uint8_t m_table_stats_sampling_pct; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h index deb718e88c9..f035085cae6 100644 --- a/storage/rocksdb/rdb_buff.h +++ b/storage/rocksdb/rdb_buff.h @@ -27,97 +27,86 @@ namespace myrocks { ("netstr") which stores data in Network Byte Order (Big Endian). */ -inline void rdb_netstr_append_uint64(my_core::String* const out_netstr, - const uint64 &val) -{ +inline void rdb_netstr_append_uint64(my_core::String *const out_netstr, + const uint64 &val) { DBUG_ASSERT(out_netstr != nullptr); // Convert from host machine byte order (usually Little Endian) to network // byte order (Big Endian). - uint64 net_val= htobe64(val); - out_netstr->append(reinterpret_cast<char*>(&net_val), sizeof(net_val)); + uint64 net_val = htobe64(val); + out_netstr->append(reinterpret_cast<char *>(&net_val), sizeof(net_val)); } -inline void rdb_netstr_append_uint32(my_core::String* const out_netstr, - const uint32 &val) -{ +inline void rdb_netstr_append_uint32(my_core::String *const out_netstr, + const uint32 &val) { DBUG_ASSERT(out_netstr != nullptr); // Convert from host machine byte order (usually Little Endian) to network // byte order (Big Endian). - uint32 net_val= htobe32(val); - out_netstr->append(reinterpret_cast<char*>(&net_val), sizeof(net_val)); + uint32 net_val = htobe32(val); + out_netstr->append(reinterpret_cast<char *>(&net_val), sizeof(net_val)); } -inline void rdb_netstr_append_uint16(my_core::String* const out_netstr, - const uint16 &val) -{ +inline void rdb_netstr_append_uint16(my_core::String *const out_netstr, + const uint16 &val) { DBUG_ASSERT(out_netstr != nullptr); // Convert from host machine byte order (usually Little Endian) to network // byte order (Big Endian). - uint16 net_val= htobe16(val); - out_netstr->append(reinterpret_cast<char*>(&net_val), sizeof(net_val)); + uint16 net_val = htobe16(val); + out_netstr->append(reinterpret_cast<char *>(&net_val), sizeof(net_val)); } - /* Basic network buffer ("netbuf") write helper functions. */ -inline void rdb_netbuf_store_uint64(uchar* const dst_netbuf, const uint64 &n) -{ +inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 &n) { DBUG_ASSERT(dst_netbuf != nullptr); // Convert from host byte order (usually Little Endian) to network byte order // (Big Endian). - uint64 net_val= htobe64(n); + uint64 net_val = htobe64(n); memcpy(dst_netbuf, &net_val, sizeof(net_val)); } -inline void rdb_netbuf_store_uint32(uchar* const dst_netbuf, const uint32 &n) -{ +inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 &n) { DBUG_ASSERT(dst_netbuf != nullptr); // Convert from host byte order (usually Little Endian) to network byte order // (Big Endian). - uint32 net_val= htobe32(n); + uint32 net_val = htobe32(n); memcpy(dst_netbuf, &net_val, sizeof(net_val)); } -inline void rdb_netbuf_store_uint16(uchar* const dst_netbuf, const uint16 &n) -{ +inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 &n) { DBUG_ASSERT(dst_netbuf != nullptr); // Convert from host byte order (usually Little Endian) to network byte order // (Big Endian). - uint16 net_val= htobe16(n); + uint16 net_val = htobe16(n); memcpy(dst_netbuf, &net_val, sizeof(net_val)); } -inline void rdb_netbuf_store_byte(uchar* const dst_netbuf, const uchar &c) -{ +inline void rdb_netbuf_store_byte(uchar *const dst_netbuf, const uchar &c) { DBUG_ASSERT(dst_netbuf != nullptr); - *dst_netbuf= c; + *dst_netbuf = c; } -inline void rdb_netbuf_store_index(uchar* const dst_netbuf, - const uint32 &number) -{ +inline void rdb_netbuf_store_index(uchar *const dst_netbuf, + const uint32 &number) { DBUG_ASSERT(dst_netbuf != nullptr); rdb_netbuf_store_uint32(dst_netbuf, number); } - /* Basic conversion helper functions from network byte order (Big Endian) to host machine byte order (usually Little Endian). */ -inline uint64 rdb_netbuf_to_uint64(const uchar* const netbuf) -{ +inline uint64 rdb_netbuf_to_uint64(const uchar *const netbuf) { DBUG_ASSERT(netbuf != nullptr); uint64 net_val; @@ -128,8 +117,7 @@ inline uint64 rdb_netbuf_to_uint64(const uchar* const netbuf) return be64toh(net_val); } -inline uint32 rdb_netbuf_to_uint32(const uchar* const netbuf) -{ +inline uint32 rdb_netbuf_to_uint32(const uchar *const netbuf) { DBUG_ASSERT(netbuf != nullptr); uint32 net_val; @@ -140,8 +128,7 @@ inline uint32 rdb_netbuf_to_uint32(const uchar* const netbuf) return be32toh(net_val); } -inline uint16 rdb_netbuf_to_uint16(const uchar* const netbuf) -{ +inline uint16 rdb_netbuf_to_uint16(const uchar *const netbuf) { DBUG_ASSERT(netbuf != nullptr); uint16 net_val; @@ -152,14 +139,12 @@ inline uint16 rdb_netbuf_to_uint16(const uchar* const netbuf) return be16toh(net_val); } -inline uchar rdb_netbuf_to_byte(const uchar* const netbuf) -{ +inline uchar rdb_netbuf_to_byte(const uchar *const netbuf) { DBUG_ASSERT(netbuf != nullptr); - return(uchar)netbuf[0]; + return (uchar)netbuf[0]; } - /* Basic network buffer ("netbuf") read helper functions. Network buffer stores data in Network Byte Order (Big Endian). @@ -167,13 +152,12 @@ inline uchar rdb_netbuf_to_byte(const uchar* const netbuf) the netbuf pointer gets advanced to the following byte. */ -inline uint64 rdb_netbuf_read_uint64(const uchar **netbuf_ptr) -{ +inline uint64 rdb_netbuf_read_uint64(const uchar **netbuf_ptr) { DBUG_ASSERT(netbuf_ptr != nullptr); // Convert from network byte order (Big Endian) to host machine byte order // (usually Little Endian). - const uint64 host_val= rdb_netbuf_to_uint64(*netbuf_ptr); + const uint64 host_val = rdb_netbuf_to_uint64(*netbuf_ptr); // Advance pointer. *netbuf_ptr += sizeof(host_val); @@ -181,13 +165,12 @@ inline uint64 rdb_netbuf_read_uint64(const uchar **netbuf_ptr) return host_val; } -inline uint32 rdb_netbuf_read_uint32(const uchar **netbuf_ptr) -{ +inline uint32 rdb_netbuf_read_uint32(const uchar **netbuf_ptr) { DBUG_ASSERT(netbuf_ptr != nullptr); // Convert from network byte order (Big Endian) to host machine byte order // (usually Little Endian). - const uint32 host_val= rdb_netbuf_to_uint32(*netbuf_ptr); + const uint32 host_val = rdb_netbuf_to_uint32(*netbuf_ptr); // Advance pointer. *netbuf_ptr += sizeof(host_val); @@ -195,13 +178,12 @@ inline uint32 rdb_netbuf_read_uint32(const uchar **netbuf_ptr) return host_val; } -inline uint16 rdb_netbuf_read_uint16(const uchar **netbuf_ptr) -{ +inline uint16 rdb_netbuf_read_uint16(const uchar **netbuf_ptr) { DBUG_ASSERT(netbuf_ptr != nullptr); // Convert from network byte order (Big Endian) to host machine byte order // (usually Little Endian). - const uint16 host_val= rdb_netbuf_to_uint16(*netbuf_ptr); + const uint16 host_val = rdb_netbuf_to_uint16(*netbuf_ptr); // Advance pointer. *netbuf_ptr += sizeof(host_val); @@ -210,13 +192,12 @@ inline uint16 rdb_netbuf_read_uint16(const uchar **netbuf_ptr) } inline void rdb_netbuf_read_gl_index(const uchar **netbuf_ptr, - GL_INDEX_ID* const gl_index_id) -{ + GL_INDEX_ID *const gl_index_id) { DBUG_ASSERT(gl_index_id != nullptr); DBUG_ASSERT(netbuf_ptr != nullptr); - gl_index_id->cf_id= rdb_netbuf_read_uint32(netbuf_ptr); - gl_index_id->index_id= rdb_netbuf_read_uint32(netbuf_ptr); + gl_index_id->cf_id = rdb_netbuf_read_uint32(netbuf_ptr); + gl_index_id->index_id = rdb_netbuf_read_uint32(netbuf_ptr); } /* @@ -225,17 +206,17 @@ inline void rdb_netbuf_read_gl_index(const uchar **netbuf_ptr, - it prevents one from reading beyond the end of the string. */ -class Rdb_string_reader -{ - const char* m_ptr; +class Rdb_string_reader { + const char *m_ptr; uint m_len; - private: - Rdb_string_reader& operator=(const Rdb_string_reader&) = default; - public: - Rdb_string_reader(const Rdb_string_reader&) = default; + +private: + Rdb_string_reader &operator=(const Rdb_string_reader &) = default; + +public: + Rdb_string_reader(const Rdb_string_reader &) = default; /* named constructor */ - static Rdb_string_reader read_or_empty(const rocksdb::Slice* const slice) - { + static Rdb_string_reader read_or_empty(const rocksdb::Slice *const slice) { if (!slice) { return Rdb_string_reader(""); } else { @@ -243,72 +224,59 @@ class Rdb_string_reader } } - explicit Rdb_string_reader(const std::string &str) - { - m_len= str.length(); - if (m_len) - { - m_ptr= &str.at(0); - } - else - { + explicit Rdb_string_reader(const std::string &str) { + m_len = str.length(); + if (m_len) { + m_ptr = &str.at(0); + } else { /* One can a create a Rdb_string_reader for reading from an empty string (although attempts to read anything will fail). We must not access str.at(0), since len==0, we can set ptr to any value. */ - m_ptr= nullptr; + m_ptr = nullptr; } } - explicit Rdb_string_reader(const rocksdb::Slice* const slice) - { - m_ptr= slice->data(); - m_len= slice->size(); + explicit Rdb_string_reader(const rocksdb::Slice *const slice) { + m_ptr = slice->data(); + m_len = slice->size(); } /* Read the next @param size bytes. Returns pointer to the bytes read, or nullptr if the remaining string doesn't have that many bytes. */ - const char *read(const uint &size) - { + const char *read(const uint &size) { const char *res; - if (m_len < size) - { - res= nullptr; - } - else - { - res= m_ptr; + if (m_len < size) { + res = nullptr; + } else { + res = m_ptr; m_ptr += size; m_len -= size; } return res; } - bool read_uint8(uint* const res) - { + bool read_uint8(uint *const res) { const uchar *p; - if (!(p= reinterpret_cast<const uchar*>(read(1)))) - return true; // error - else - { - *res= *p; - return false; // Ok + if (!(p = reinterpret_cast<const uchar *>(read(1)))) + return true; // error + else { + *res = *p; + return false; // Ok } } - bool read_uint16(uint* const res) - { + bool read_uint16(uint *const res) { const uchar *p; - if (!(p= reinterpret_cast<const uchar*>(read(2)))) - return true; // error - else - { - *res= rdb_netbuf_to_uint16(p); - return false; // Ok + if (!(p = reinterpret_cast<const uchar *>(read(2)))) + return true; // error + else { + *res = rdb_netbuf_to_uint16(p); + return false; // Ok } } @@ -322,7 +290,6 @@ class Rdb_string_reader const char *get_current_ptr() const { return m_ptr; } }; - /* @brief A buffer one can write the data to. @@ -338,132 +305,112 @@ class Rdb_string_reader */ -class Rdb_string_writer -{ +class Rdb_string_writer { std::vector<uchar> m_data; - public: - Rdb_string_writer(const Rdb_string_writer&) = delete; - Rdb_string_writer& operator=(const Rdb_string_writer&) = delete; + +public: + Rdb_string_writer(const Rdb_string_writer &) = delete; + Rdb_string_writer &operator=(const Rdb_string_writer &) = delete; Rdb_string_writer() = default; void clear() { m_data.clear(); } - void write_uint8(const uint &val) - { + void write_uint8(const uint &val) { m_data.push_back(static_cast<uchar>(val)); } - void write_uint16(const uint &val) - { - const auto size= m_data.size(); + void write_uint16(const uint &val) { + const auto size = m_data.size(); m_data.resize(size + 2); rdb_netbuf_store_uint16(m_data.data() + size, val); } - void write_uint32(const uint &val) - { - const auto size= m_data.size(); + void write_uint32(const uint &val) { + const auto size = m_data.size(); m_data.resize(size + 4); rdb_netbuf_store_uint32(m_data.data() + size, val); } - void write(const uchar* const new_data, const size_t &len) - { + void write(const uchar *const new_data, const size_t &len) { DBUG_ASSERT(new_data != nullptr); m_data.insert(m_data.end(), new_data, new_data + len); } - uchar* ptr() { return m_data.data(); } + uchar *ptr() { return m_data.data(); } size_t get_current_pos() const { return m_data.size(); } - void write_uint8_at(const size_t &pos, const uint &new_val) - { + void write_uint8_at(const size_t &pos, const uint &new_val) { // This function will only overwrite what was written DBUG_ASSERT(pos < get_current_pos()); - m_data.data()[pos]= new_val; + m_data.data()[pos] = new_val; } - void write_uint16_at(const size_t &pos, const uint &new_val) - { + void write_uint16_at(const size_t &pos, const uint &new_val) { // This function will only overwrite what was written DBUG_ASSERT(pos < get_current_pos() && (pos + 1) < get_current_pos()); rdb_netbuf_store_uint16(m_data.data() + pos, new_val); } }; - /* A helper class for writing bits into Rdb_string_writer. The class assumes (but doesn't check) that nobody tries to write anything to the Rdb_string_writer that it is writing to. */ -class Rdb_bit_writer -{ +class Rdb_bit_writer { Rdb_string_writer *m_writer; uchar m_offset; - public: - Rdb_bit_writer(const Rdb_bit_writer&) = delete; - Rdb_bit_writer& operator=(const Rdb_bit_writer&) = delete; - - explicit Rdb_bit_writer(Rdb_string_writer* writer_arg) - : m_writer(writer_arg), - m_offset(0) - { - } - void write(uint size, const uint &value) - { +public: + Rdb_bit_writer(const Rdb_bit_writer &) = delete; + Rdb_bit_writer &operator=(const Rdb_bit_writer &) = delete; + + explicit Rdb_bit_writer(Rdb_string_writer *writer_arg) + : m_writer(writer_arg), m_offset(0) {} + + void write(uint size, const uint &value) { DBUG_ASSERT((value & ((1 << size) - 1)) == value); - while (size > 0) - { - if (m_offset == 0) - { + while (size > 0) { + if (m_offset == 0) { m_writer->write_uint8(0); } // number of bits to put in this byte const uint bits = std::min(size, (uint)(8 - m_offset)); - uchar* const last_byte= m_writer->ptr() + m_writer->get_current_pos() - 1; - *last_byte |= - (uchar) ((value >> (size - bits)) & ((1 << bits) - 1)) << m_offset; + uchar *const last_byte = + m_writer->ptr() + m_writer->get_current_pos() - 1; + *last_byte |= (uchar)((value >> (size - bits)) & ((1 << bits) - 1)) + << m_offset; size -= bits; m_offset = (m_offset + bits) & 0x7; } } }; -class Rdb_bit_reader -{ +class Rdb_bit_reader { const uchar *m_cur; uchar m_offset; uint m_ret; - Rdb_string_reader* const m_reader; - public: - Rdb_bit_reader(const Rdb_bit_reader&) = delete; - Rdb_bit_reader& operator=(const Rdb_bit_reader&) = delete; - - explicit Rdb_bit_reader(Rdb_string_reader* const reader) - : m_cur(nullptr), - m_offset(0), - m_reader(reader) - { - } + Rdb_string_reader *const m_reader; + +public: + Rdb_bit_reader(const Rdb_bit_reader &) = delete; + Rdb_bit_reader &operator=(const Rdb_bit_reader &) = delete; + + explicit Rdb_bit_reader(Rdb_string_reader *const reader) + : m_cur(nullptr), m_offset(0), m_reader(reader) {} // Returns a pointer to an uint containing the bits read. On subsequent // reads, the value being pointed to will be overwritten. Returns nullptr // on failure. - uint *read(uint size) - { - m_ret= 0; + uint *read(uint size) { + m_ret = 0; DBUG_ASSERT(size <= 32); - while (size > 0) - { - if (m_offset == 0) - { - m_cur= (const uchar *)m_reader->read(1); - if (m_cur == nullptr) - { + while (size > 0) { + if (m_offset == 0) { + m_cur = (const uchar *)m_reader->read(1); + if (m_cur == nullptr) { return nullptr; } } @@ -479,4 +426,4 @@ class Rdb_bit_reader } }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_manager.cc b/storage/rocksdb/rdb_cf_manager.cc index 7ae7d362159..2bd89fffee5 100644 --- a/storage/rocksdb/rdb_cf_manager.cc +++ b/storage/rocksdb/rdb_cf_manager.cc @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* This C++ files header file */ @@ -28,8 +28,7 @@ namespace myrocks { /* Check if ColumnFamily name says it's a reverse-ordered CF */ -bool Rdb_cf_manager::is_cf_name_reverse(const char* const name) -{ +bool Rdb_cf_manager::is_cf_name_reverse(const char *const name) { /* nullptr means the default CF is used.. (TODO: can the default CF be * reverse?) */ if (name && !strncmp(name, "rev:", 4)) @@ -43,9 +42,8 @@ static PSI_mutex_key ex_key_cfm; #endif void Rdb_cf_manager::init( - Rdb_cf_options* const cf_options, - std::vector<rocksdb::ColumnFamilyHandle*>* const handles) -{ + Rdb_cf_options *const cf_options, + std::vector<rocksdb::ColumnFamilyHandle *> *const handles) { mysql_mutex_init(ex_key_cfm, &m_mutex, MY_MUTEX_INIT_FAST); DBUG_ASSERT(cf_options != nullptr); @@ -61,33 +59,28 @@ void Rdb_cf_manager::init( } } - -void Rdb_cf_manager::cleanup() -{ +void Rdb_cf_manager::cleanup() { for (auto it : m_cf_name_map) { delete it.second; } mysql_mutex_destroy(&m_mutex); } - /** Generate Column Family name for per-index column families @param res OUT Column Family name */ -void Rdb_cf_manager::get_per_index_cf_name(const std::string& db_table_name, - const char* const index_name, - std::string* const res) -{ +void Rdb_cf_manager::get_per_index_cf_name(const std::string &db_table_name, + const char *const index_name, + std::string *const res) { DBUG_ASSERT(index_name != nullptr); DBUG_ASSERT(res != nullptr); *res = db_table_name + "." + index_name; } - /* @brief Find column family by name. If it doesn't exist, create it @@ -95,53 +88,50 @@ void Rdb_cf_manager::get_per_index_cf_name(const std::string& db_table_name, @detail See Rdb_cf_manager::get_cf */ -rocksdb::ColumnFamilyHandle* -Rdb_cf_manager::get_or_create_cf(rocksdb::DB* const rdb, - const char *cf_name, - const std::string& db_table_name, - const char* const index_name, - bool* const is_automatic) -{ +rocksdb::ColumnFamilyHandle * +Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb, const char *cf_name, + const std::string &db_table_name, + const char *const index_name, + bool *const is_automatic) { DBUG_ASSERT(rdb != nullptr); DBUG_ASSERT(is_automatic != nullptr); - rocksdb::ColumnFamilyHandle* cf_handle; + rocksdb::ColumnFamilyHandle *cf_handle; mysql_mutex_lock(&m_mutex); - *is_automatic= false; + *is_automatic = false; if (cf_name == nullptr) - cf_name= DEFAULT_CF_NAME; + cf_name = DEFAULT_CF_NAME; std::string per_index_name; - if (!strcmp(cf_name, PER_INDEX_CF_NAME)) - { + if (!strcmp(cf_name, PER_INDEX_CF_NAME)) { get_per_index_cf_name(db_table_name, index_name, &per_index_name); - cf_name= per_index_name.c_str(); - *is_automatic= true; + cf_name = per_index_name.c_str(); + *is_automatic = true; } const auto it = m_cf_name_map.find(cf_name); if (it != m_cf_name_map.end()) - cf_handle= it->second; - else - { + cf_handle = it->second; + else { /* Create a Column Family. */ const std::string cf_name_str(cf_name); rocksdb::ColumnFamilyOptions opts; m_cf_options->get_cf_options(cf_name_str, &opts); - sql_print_information("RocksDB: creating column family %s", cf_name_str.c_str()); - sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); + sql_print_information("RocksDB: creating column family %s", + cf_name_str.c_str()); + sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); sql_print_information(" target_file_size_base=%" PRIu64, opts.target_file_size_base); - const rocksdb::Status s= - rdb->CreateColumnFamily(opts, cf_name_str, &cf_handle); + const rocksdb::Status s = + rdb->CreateColumnFamily(opts, cf_name_str, &cf_handle); if (s.ok()) { m_cf_name_map[cf_handle->GetName()] = cf_handle; m_cf_id_map[cf_handle->GetID()] = cf_handle; } else { - cf_handle= nullptr; + cf_handle = nullptr; } } mysql_mutex_unlock(&m_mutex); @@ -149,7 +139,6 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB* const rdb, return cf_handle; } - /* Find column family by its cf_name. @@ -162,27 +151,24 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB* const rdb, db_table_name and index_name. */ -rocksdb::ColumnFamilyHandle* -Rdb_cf_manager::get_cf(const char *cf_name, - const std::string& db_table_name, - const char* const index_name, - bool* const is_automatic) const -{ +rocksdb::ColumnFamilyHandle * +Rdb_cf_manager::get_cf(const char *cf_name, const std::string &db_table_name, + const char *const index_name, + bool *const is_automatic) const { DBUG_ASSERT(is_automatic != nullptr); - rocksdb::ColumnFamilyHandle* cf_handle; + rocksdb::ColumnFamilyHandle *cf_handle; - *is_automatic= false; + *is_automatic = false; mysql_mutex_lock(&m_mutex); if (cf_name == nullptr) - cf_name= DEFAULT_CF_NAME; + cf_name = DEFAULT_CF_NAME; std::string per_index_name; - if (!strcmp(cf_name, PER_INDEX_CF_NAME)) - { + if (!strcmp(cf_name, PER_INDEX_CF_NAME)) { get_per_index_cf_name(db_table_name, index_name, &per_index_name); - cf_name= per_index_name.c_str(); - *is_automatic= true; + cf_name = per_index_name.c_str(); + *is_automatic = true; } const auto it = m_cf_name_map.find(cf_name); @@ -193,9 +179,8 @@ Rdb_cf_manager::get_cf(const char *cf_name, return cf_handle; } -rocksdb::ColumnFamilyHandle* Rdb_cf_manager::get_cf(const uint32_t &id) const -{ - rocksdb::ColumnFamilyHandle* cf_handle = nullptr; +rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(const uint32_t &id) const { + rocksdb::ColumnFamilyHandle *cf_handle = nullptr; mysql_mutex_lock(&m_mutex); const auto it = m_cf_id_map.find(id); @@ -206,9 +191,7 @@ rocksdb::ColumnFamilyHandle* Rdb_cf_manager::get_cf(const uint32_t &id) const return cf_handle; } -std::vector<std::string> -Rdb_cf_manager::get_cf_names(void) const -{ +std::vector<std::string> Rdb_cf_manager::get_cf_names(void) const { std::vector<std::string> names; mysql_mutex_lock(&m_mutex); @@ -219,10 +202,9 @@ Rdb_cf_manager::get_cf_names(void) const return names; } -std::vector<rocksdb::ColumnFamilyHandle*> -Rdb_cf_manager::get_all_cf(void) const -{ - std::vector<rocksdb::ColumnFamilyHandle*> list; +std::vector<rocksdb::ColumnFamilyHandle *> +Rdb_cf_manager::get_all_cf(void) const { + std::vector<rocksdb::ColumnFamilyHandle *> list; mysql_mutex_lock(&m_mutex); for (auto it : m_cf_id_map) { @@ -233,4 +215,4 @@ Rdb_cf_manager::get_all_cf(void) const return list; } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_manager.h b/storage/rocksdb/rdb_cf_manager.h index 4fb5f7437e8..7b9654f3537 100644 --- a/storage/rocksdb/rdb_cf_manager.h +++ b/storage/rocksdb/rdb_cf_manager.h @@ -46,33 +46,32 @@ namespace myrocks { - CFs are created in a synchronized way. We can't remove them, yet. */ -class Rdb_cf_manager -{ - std::map<std::string, rocksdb::ColumnFamilyHandle*> m_cf_name_map; - std::map<uint32_t, rocksdb::ColumnFamilyHandle*> m_cf_id_map; +class Rdb_cf_manager { + std::map<std::string, rocksdb::ColumnFamilyHandle *> m_cf_name_map; + std::map<uint32_t, rocksdb::ColumnFamilyHandle *> m_cf_id_map; mutable mysql_mutex_t m_mutex; - static - void get_per_index_cf_name(const std::string& db_table_name, - const char* const index_name, - std::string* const res); + static void get_per_index_cf_name(const std::string &db_table_name, + const char *const index_name, + std::string *const res); - Rdb_cf_options* m_cf_options= nullptr; + Rdb_cf_options *m_cf_options = nullptr; public: - Rdb_cf_manager(const Rdb_cf_manager&) = delete; - Rdb_cf_manager& operator=(const Rdb_cf_manager&) = delete; + Rdb_cf_manager(const Rdb_cf_manager &) = delete; + Rdb_cf_manager &operator=(const Rdb_cf_manager &) = delete; Rdb_cf_manager() = default; - static bool is_cf_name_reverse(const char* const name); + static bool is_cf_name_reverse(const char *const name); /* - This is called right after the DB::Open() call. The parameters describe column + This is called right after the DB::Open() call. The parameters describe + column families that are present in the database. The first CF is the default CF. */ - void init(Rdb_cf_options* cf_options, - std::vector<rocksdb::ColumnFamilyHandle*>* const handles); + void init(Rdb_cf_options *cf_options, + std::vector<rocksdb::ColumnFamilyHandle *> *const handles); void cleanup(); /* @@ -80,33 +79,33 @@ public: - cf_name=nullptr means use default column family - cf_name=_auto_ means use 'dbname.tablename.indexname' */ - rocksdb::ColumnFamilyHandle* get_or_create_cf( - rocksdb::DB* const rdb, const char *cf_name, - const std::string& db_table_name, const char* const index_name, - bool* const is_automatic); + rocksdb::ColumnFamilyHandle * + get_or_create_cf(rocksdb::DB *const rdb, const char *cf_name, + const std::string &db_table_name, + const char *const index_name, bool *const is_automatic); /* Used by table open */ - rocksdb::ColumnFamilyHandle* get_cf(const char *cf_name, - const std::string& db_table_name, - const char* const index_name, - bool* const is_automatic) const; + rocksdb::ColumnFamilyHandle *get_cf(const char *cf_name, + const std::string &db_table_name, + const char *const index_name, + bool *const is_automatic) const; /* Look up cf by id; used by datadic */ - rocksdb::ColumnFamilyHandle* get_cf(const uint32_t &id) const; + rocksdb::ColumnFamilyHandle *get_cf(const uint32_t &id) const; /* Used to iterate over column families for show status */ std::vector<std::string> get_cf_names(void) const; /* Used to iterate over column families */ - std::vector<rocksdb::ColumnFamilyHandle*> get_all_cf(void) const; + std::vector<rocksdb::ColumnFamilyHandle *> get_all_cf(void) const; // void drop_cf(); -- not implemented so far. - void get_cf_options( - const std::string &cf_name, - rocksdb::ColumnFamilyOptions* const opts) __attribute__((__nonnull__)) { - m_cf_options->get_cf_options(cf_name, opts); + void get_cf_options(const std::string &cf_name, + rocksdb::ColumnFamilyOptions *const opts) + MY_ATTRIBUTE((__nonnull__)) { + m_cf_options->get_cf_options(cf_name, opts); } }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc index bd4d78d0796..97dc16fe4e6 100644 --- a/storage/rocksdb/rdb_cf_options.cc +++ b/storage/rocksdb/rdb_cf_options.cc @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* This C++ files header file */ @@ -41,24 +41,23 @@ Rdb_pk_comparator Rdb_cf_options::s_pk_comparator; Rdb_rev_comparator Rdb_cf_options::s_rev_pk_comparator; bool Rdb_cf_options::init( - const rocksdb::BlockBasedTableOptions& table_options, - std::shared_ptr<rocksdb::TablePropertiesCollectorFactory> prop_coll_factory, - const char* const default_cf_options, - const char* const override_cf_options) -{ + const rocksdb::BlockBasedTableOptions &table_options, + std::shared_ptr<rocksdb::TablePropertiesCollectorFactory> prop_coll_factory, + const char *const default_cf_options, + const char *const override_cf_options) { DBUG_ASSERT(default_cf_options != nullptr); DBUG_ASSERT(override_cf_options != nullptr); m_default_cf_opts.comparator = &s_pk_comparator; m_default_cf_opts.compaction_filter_factory.reset( - new Rdb_compact_filter_factory); + new Rdb_compact_filter_factory); m_default_cf_opts.table_factory.reset( - rocksdb::NewBlockBasedTableFactory(table_options)); + rocksdb::NewBlockBasedTableFactory(table_options)); if (prop_coll_factory) { m_default_cf_opts.table_properties_collector_factories.push_back( - prop_coll_factory); + prop_coll_factory); } if (!set_default(std::string(default_cf_options)) || @@ -70,34 +69,27 @@ bool Rdb_cf_options::init( } void Rdb_cf_options::get(const std::string &cf_name, - rocksdb::ColumnFamilyOptions* const opts) -{ + rocksdb::ColumnFamilyOptions *const opts) { DBUG_ASSERT(opts != nullptr); // set defaults - rocksdb::GetColumnFamilyOptionsFromString(*opts, - m_default_config, - opts); + rocksdb::GetColumnFamilyOptionsFromString(*opts, m_default_config, opts); // set per-cf config if we have one Name_to_config_t::iterator it = m_name_map.find(cf_name); if (it != m_name_map.end()) { - rocksdb::GetColumnFamilyOptionsFromString(*opts, - it->second, - opts); + rocksdb::GetColumnFamilyOptionsFromString(*opts, it->second, opts); } } -bool Rdb_cf_options::set_default(const std::string &default_config) -{ +bool Rdb_cf_options::set_default(const std::string &default_config) { rocksdb::ColumnFamilyOptions options; if (!default_config.empty() && - !rocksdb::GetColumnFamilyOptionsFromString(options, - default_config, - &options).ok()) { - fprintf(stderr, - "Invalid default column family config: %s\n", + !rocksdb::GetColumnFamilyOptionsFromString(options, default_config, + &options) + .ok()) { + fprintf(stderr, "Invalid default column family config: %s\n", default_config.c_str()); return false; } @@ -107,8 +99,7 @@ bool Rdb_cf_options::set_default(const std::string &default_config) } // Skip over any spaces in the input string. -void Rdb_cf_options::skip_spaces(const std::string& input, size_t* const pos) -{ +void Rdb_cf_options::skip_spaces(const std::string &input, size_t *const pos) { DBUG_ASSERT(pos != nullptr); while (*pos < input.size() && isspace(input[*pos])) @@ -118,10 +109,9 @@ void Rdb_cf_options::skip_spaces(const std::string& input, size_t* const pos) // Find a valid column family name. Note that all characters except a // semicolon are valid (should this change?) and all spaces are trimmed from // the beginning and end but are not removed between other characters. -bool Rdb_cf_options::find_column_family(const std::string& input, - size_t* const pos, - std::string* const key) -{ +bool Rdb_cf_options::find_column_family(const std::string &input, + size_t *const pos, + std::string *const key) { DBUG_ASSERT(pos != nullptr); DBUG_ASSERT(key != nullptr); @@ -129,15 +119,13 @@ bool Rdb_cf_options::find_column_family(const std::string& input, size_t end_pos = *pos - 1; // Loop through the characters in the string until we see a '='. - for ( ; *pos < input.size() && input[*pos] != '='; ++(*pos)) - { + for (; *pos < input.size() && input[*pos] != '='; ++(*pos)) { // If this is not a space, move the end position to the current position. if (input[*pos] != ' ') end_pos = *pos; } - if (end_pos == beg_pos - 1) - { + if (end_pos == beg_pos - 1) { // NO_LINT_DEBUG sql_print_warning("No column family found (options: %s)", input.c_str()); return false; @@ -150,18 +138,16 @@ bool Rdb_cf_options::find_column_family(const std::string& input, // Find a valid options portion. Everything is deemed valid within the options // portion until we hit as many close curly braces as we have seen open curly // braces. -bool Rdb_cf_options::find_options(const std::string& input, size_t* const pos, - std::string* const options) -{ +bool Rdb_cf_options::find_options(const std::string &input, size_t *const pos, + std::string *const options) { DBUG_ASSERT(pos != nullptr); DBUG_ASSERT(options != nullptr); // Make sure we have an open curly brace at the current position. - if (*pos < input.size() && input[*pos] != '{') - { + if (*pos < input.size() && input[*pos] != '{') { // NO_LINT_DEBUG sql_print_warning("Invalid cf options, '{' expected (options: %s)", - input.c_str()); + input.c_str()); return false; } @@ -175,29 +161,26 @@ bool Rdb_cf_options::find_options(const std::string& input, size_t* const pos, // Loop through the characters in the string until we find the appropriate // number of closing curly braces. - while (*pos < input.size()) - { - switch (input[*pos]) - { - case '}': - // If this is a closing curly brace and we bring the count down to zero - // we can exit the loop with a valid options string. - if (--brace_count == 0) - { - *options = input.substr(beg_pos, *pos - beg_pos); - ++(*pos); // Move past the last closing curly brace - return true; - } - - break; - - case '{': - // If this is an open curly brace increment the count. - ++brace_count; - break; - - default: - break; + while (*pos < input.size()) { + switch (input[*pos]) { + case '}': + // If this is a closing curly brace and we bring the count down to zero + // we can exit the loop with a valid options string. + if (--brace_count == 0) { + *options = input.substr(beg_pos, *pos - beg_pos); + ++(*pos); // Move past the last closing curly brace + return true; + } + + break; + + case '{': + // If this is an open curly brace increment the count. + ++brace_count; + break; + + default: + break; } // Move to the next character. @@ -208,15 +191,14 @@ bool Rdb_cf_options::find_options(const std::string& input, size_t* const pos, // Generate an error. // NO_LINT_DEBUG sql_print_warning("Mismatched cf options, '}' expected (options: %s)", - input.c_str()); + input.c_str()); return false; } -bool Rdb_cf_options::find_cf_options_pair(const std::string& input, - size_t* const pos, - std::string* const cf, - std::string* const opt_str) -{ +bool Rdb_cf_options::find_cf_options_pair(const std::string &input, + size_t *const pos, + std::string *const cf, + std::string *const opt_str) { DBUG_ASSERT(pos != nullptr); DBUG_ASSERT(cf != nullptr); DBUG_ASSERT(opt_str != nullptr); @@ -229,11 +211,10 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string& input, return false; // If we are at the end of the input then we generate an error. - if (*pos == input.size()) - { + if (*pos == input.size()) { // NO_LINT_DEBUG sql_print_warning("Invalid cf options, '=' expected (options: %s)", - input.c_str()); + input.c_str()); return false; } @@ -250,13 +231,11 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string& input, skip_spaces(input, pos); // We should either be at the end of the input string or at a semicolon. - if (*pos < input.size()) - { - if (input[*pos] != ';') - { + if (*pos < input.size()) { + if (input[*pos] != ';') { // NO_LINT_DEBUG sql_print_warning("Invalid cf options, ';' expected (options: %s)", - input.c_str()); + input.c_str()); return false; } @@ -266,8 +245,7 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string& input, return true; } -bool Rdb_cf_options::set_override(const std::string &override_config) -{ +bool Rdb_cf_options::set_override(const std::string &override_config) { // TODO(???): support updates? std::string cf; @@ -277,15 +255,13 @@ bool Rdb_cf_options::set_override(const std::string &override_config) // Loop through the characters of the string until we reach the end. size_t pos = 0; - while (pos < override_config.size()) - { + while (pos < override_config.size()) { // Attempt to find <cf>={<opt_str>}. if (!find_cf_options_pair(override_config, &pos, &cf, &opt_str)) return false; // Generate an error if we have already seen this column family. - if (configs.find(cf) != configs.end()) - { + if (configs.find(cf) != configs.end()) { // NO_LINT_DEBUG sql_print_warning( "Duplicate entry for %s in override options (options: %s)", @@ -294,9 +270,8 @@ bool Rdb_cf_options::set_override(const std::string &override_config) } // Generate an error if the <opt_str> is not valid according to RocksDB. - if (!rocksdb::GetColumnFamilyOptionsFromString( - options, opt_str, &options).ok()) - { + if (!rocksdb::GetColumnFamilyOptionsFromString(options, opt_str, &options) + .ok()) { // NO_LINT_DEBUG sql_print_warning( "Invalid cf config for %s in override options (options: %s)", @@ -314,29 +289,24 @@ bool Rdb_cf_options::set_override(const std::string &override_config) return true; } -const rocksdb::Comparator* Rdb_cf_options::get_cf_comparator( - const std::string& cf_name) -{ - if (Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str())) - { +const rocksdb::Comparator * +Rdb_cf_options::get_cf_comparator(const std::string &cf_name) { + if (Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str())) { return &s_rev_pk_comparator; - } - else - { + } else { return &s_pk_comparator; } } void Rdb_cf_options::get_cf_options(const std::string &cf_name, - rocksdb::ColumnFamilyOptions* const opts) -{ + rocksdb::ColumnFamilyOptions *const opts) { DBUG_ASSERT(opts != nullptr); *opts = m_default_cf_opts; get(cf_name, opts); // Set the comparator according to 'rev:' - opts->comparator= get_cf_comparator(cf_name); + opts->comparator = get_cf_comparator(cf_name); } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h index 8151d907eb7..1cd80a131ad 100644 --- a/storage/rocksdb/rdb_cf_options.h +++ b/storage/rocksdb/rdb_cf_options.h @@ -38,47 +38,47 @@ namespace myrocks { and also there is a default value which applies to column families not found in the map. */ -class Rdb_cf_options -{ - public: - Rdb_cf_options(const Rdb_cf_options&) = delete; - Rdb_cf_options& operator=(const Rdb_cf_options&) = delete; +class Rdb_cf_options { +public: + Rdb_cf_options(const Rdb_cf_options &) = delete; + Rdb_cf_options &operator=(const Rdb_cf_options &) = delete; Rdb_cf_options() = default; - void get(const std::string &cf_name, rocksdb::ColumnFamilyOptions* const opts); + void get(const std::string &cf_name, + rocksdb::ColumnFamilyOptions *const opts); - bool init( - const rocksdb::BlockBasedTableOptions& table_options, - std::shared_ptr<rocksdb::TablePropertiesCollectorFactory> prop_coll_factory, - const char* const default_cf_options, - const char* const override_cf_options); + bool init(const rocksdb::BlockBasedTableOptions &table_options, + std::shared_ptr<rocksdb::TablePropertiesCollectorFactory> + prop_coll_factory, + const char *const default_cf_options, + const char *const override_cf_options); - const rocksdb::ColumnFamilyOptions& get_defaults() const { + const rocksdb::ColumnFamilyOptions &get_defaults() const { return m_default_cf_opts; } - static const rocksdb::Comparator* get_cf_comparator( - const std::string& cf_name); + static const rocksdb::Comparator * + get_cf_comparator(const std::string &cf_name); - void get_cf_options( - const std::string &cf_name, - rocksdb::ColumnFamilyOptions* const opts) __attribute__((__nonnull__)); + void get_cf_options(const std::string &cf_name, + rocksdb::ColumnFamilyOptions *const opts) + MY_ATTRIBUTE((__nonnull__)); - private: +private: bool set_default(const std::string &default_config); bool set_override(const std::string &overide_config); /* Helper string manipulation functions */ - static void skip_spaces(const std::string& input, size_t* const pos); - static bool find_column_family(const std::string& input, size_t* const pos, - std::string* const key); - static bool find_options(const std::string& input, size_t* const pos, - std::string* const options); - static bool find_cf_options_pair(const std::string& input, size_t* const pos, - std::string* const cf, - std::string* const opt_str); - - private: + static void skip_spaces(const std::string &input, size_t *const pos); + static bool find_column_family(const std::string &input, size_t *const pos, + std::string *const key); + static bool find_options(const std::string &input, size_t *const pos, + std::string *const options); + static bool find_cf_options_pair(const std::string &input, size_t *const pos, + std::string *const cf, + std::string *const opt_str); + +private: static Rdb_pk_comparator s_pk_comparator; static Rdb_rev_comparator s_rev_pk_comparator; @@ -93,4 +93,4 @@ class Rdb_cf_options rocksdb::ColumnFamilyOptions m_default_cf_opts; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h index ca634f74d43..9e0d69597ff 100644 --- a/storage/rocksdb/rdb_compact_filter.h +++ b/storage/rocksdb/rdb_compact_filter.h @@ -17,7 +17,7 @@ #pragma once #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* C++ system header files */ @@ -32,11 +32,10 @@ namespace myrocks { -class Rdb_compact_filter : public rocksdb::CompactionFilter -{ - public: - Rdb_compact_filter(const Rdb_compact_filter&) = delete; - Rdb_compact_filter& operator=(const Rdb_compact_filter&) = delete; +class Rdb_compact_filter : public rocksdb::CompactionFilter { +public: + Rdb_compact_filter(const Rdb_compact_filter &) = delete; + Rdb_compact_filter &operator=(const Rdb_compact_filter &) = delete; explicit Rdb_compact_filter(uint32_t _cf_id) : m_cf_id(_cf_id) {} ~Rdb_compact_filter() {} @@ -45,32 +44,28 @@ class Rdb_compact_filter : public rocksdb::CompactionFilter // V1 Filter is thread safe on our usage (creating from Factory). // Make sure to protect instance variables when switching to thread // unsafe in the future. - virtual bool Filter(int level, - const rocksdb::Slice& key, - const rocksdb::Slice& existing_value, - std::string* new_value, - bool* value_changed) const override - { + virtual bool Filter(int level, const rocksdb::Slice &key, + const rocksdb::Slice &existing_value, + std::string *new_value, + bool *value_changed) const override { DBUG_ASSERT(key.size() >= sizeof(uint32)); GL_INDEX_ID gl_index_id; - gl_index_id.cf_id= m_cf_id; - gl_index_id.index_id= rdb_netbuf_to_uint32((const uchar*)key.data()); + gl_index_id.cf_id = m_cf_id; + gl_index_id.index_id = rdb_netbuf_to_uint32((const uchar *)key.data()); DBUG_ASSERT(gl_index_id.index_id >= 1); - if (gl_index_id != m_prev_index) // processing new index id + if (gl_index_id != m_prev_index) // processing new index id { - if (m_num_deleted > 0) - { - m_num_deleted= 0; + if (m_num_deleted > 0) { + m_num_deleted = 0; } - m_should_delete= + m_should_delete = rdb_get_dict_manager()->is_drop_index_ongoing(gl_index_id); - m_prev_index= gl_index_id; + m_prev_index = gl_index_id; } - if (m_should_delete) - { + if (m_should_delete) { m_num_deleted++; } @@ -79,42 +74,35 @@ class Rdb_compact_filter : public rocksdb::CompactionFilter virtual bool IgnoreSnapshots() const override { return true; } - virtual const char* Name() const override - { - return "Rdb_compact_filter"; - } + virtual const char *Name() const override { return "Rdb_compact_filter"; } - private: +private: // Column family for this compaction filter const uint32_t m_cf_id; // Index id of the previous record - mutable GL_INDEX_ID m_prev_index= {0, 0}; + mutable GL_INDEX_ID m_prev_index = {0, 0}; // Number of rows deleted for the same index id - mutable uint64 m_num_deleted= 0; + mutable uint64 m_num_deleted = 0; // Current index id should be deleted or not (should be deleted if true) - mutable bool m_should_delete= false; + mutable bool m_should_delete = false; }; -class Rdb_compact_filter_factory : public rocksdb::CompactionFilterFactory -{ - public: - Rdb_compact_filter_factory(const Rdb_compact_filter_factory&) = delete; - Rdb_compact_filter_factory& operator=(const Rdb_compact_filter_factory&) = delete; +class Rdb_compact_filter_factory : public rocksdb::CompactionFilterFactory { +public: + Rdb_compact_filter_factory(const Rdb_compact_filter_factory &) = delete; + Rdb_compact_filter_factory & + operator=(const Rdb_compact_filter_factory &) = delete; Rdb_compact_filter_factory() {} ~Rdb_compact_filter_factory() {} - const char* Name() const override - { - return "Rdb_compact_filter_factory"; - } + const char *Name() const override { return "Rdb_compact_filter_factory"; } std::unique_ptr<rocksdb::CompactionFilter> CreateCompactionFilter( - const rocksdb::CompactionFilter::Context& context) override - { + const rocksdb::CompactionFilter::Context &context) override { return std::unique_ptr<rocksdb::CompactionFilter>( - new Rdb_compact_filter(context.column_family_id)); + new Rdb_compact_filter(context.column_family_id)); } }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_comparator.h b/storage/rocksdb/rdb_comparator.h index 0e47556a778..47b83abc70a 100644 --- a/storage/rocksdb/rdb_comparator.h +++ b/storage/rocksdb/rdb_comparator.h @@ -24,6 +24,9 @@ /* RocksDB header files */ #include "rocksdb/comparator.h" +/* MyRocks header files */ +#include "./rdb_utils.h" + namespace myrocks { /* @@ -32,70 +35,65 @@ namespace myrocks { (todo: knowledge about this format is shared between this class and Rdb_key_def) */ -class Rdb_pk_comparator : public rocksdb::Comparator -{ - public: - Rdb_pk_comparator(const Rdb_pk_comparator&) = delete; - Rdb_pk_comparator& operator=(const Rdb_pk_comparator&) = delete; +class Rdb_pk_comparator : public rocksdb::Comparator { +public: + Rdb_pk_comparator(const Rdb_pk_comparator &) = delete; + Rdb_pk_comparator &operator=(const Rdb_pk_comparator &) = delete; Rdb_pk_comparator() = default; - static int bytewise_compare(const rocksdb::Slice& a, const rocksdb::Slice& b) - { - const size_t a_size= a.size(); - const size_t b_size= b.size(); - const size_t len= (a_size < b_size) ? a_size : b_size; + static int bytewise_compare(const rocksdb::Slice &a, + const rocksdb::Slice &b) { + const size_t a_size = a.size(); + const size_t b_size = b.size(); + const size_t len = (a_size < b_size) ? a_size : b_size; int res; - if ((res= memcmp(a.data(), b.data(), len))) + if ((res = memcmp(a.data(), b.data(), len))) return res; /* Ok, res== 0 */ - if (a_size != b_size) - { - return a_size < b_size? -1 : 1; + if (a_size != b_size) { + return a_size < b_size ? -1 : 1; } - return 0; + return HA_EXIT_SUCCESS; } /* Override virtual methods of interest */ - int Compare(const rocksdb::Slice& a, const rocksdb::Slice& b) const override - { - return bytewise_compare(a,b); + int Compare(const rocksdb::Slice &a, const rocksdb::Slice &b) const override { + return bytewise_compare(a, b); } - const char* Name() const override { return "RocksDB_SE_v3.10"; } + const char *Name() const override { return "RocksDB_SE_v3.10"; } - //TODO: advanced funcs: + // TODO: advanced funcs: // - FindShortestSeparator // - FindShortSuccessor // for now, do-nothing implementations: - void FindShortestSeparator(std::string* start, - const rocksdb::Slice& limit) const override {} - void FindShortSuccessor(std::string* key) const override {} + void FindShortestSeparator(std::string *start, + const rocksdb::Slice &limit) const override {} + void FindShortSuccessor(std::string *key) const override {} }; -class Rdb_rev_comparator : public rocksdb::Comparator -{ - public: - Rdb_rev_comparator(const Rdb_rev_comparator&) = delete; - Rdb_rev_comparator& operator=(const Rdb_rev_comparator&) = delete; +class Rdb_rev_comparator : public rocksdb::Comparator { +public: + Rdb_rev_comparator(const Rdb_rev_comparator &) = delete; + Rdb_rev_comparator &operator=(const Rdb_rev_comparator &) = delete; Rdb_rev_comparator() = default; - static int bytewise_compare(const rocksdb::Slice& a, const rocksdb::Slice& b) - { + static int bytewise_compare(const rocksdb::Slice &a, + const rocksdb::Slice &b) { return -Rdb_pk_comparator::bytewise_compare(a, b); } - int Compare(const rocksdb::Slice& a, const rocksdb::Slice& b) const override - { + int Compare(const rocksdb::Slice &a, const rocksdb::Slice &b) const override { return -Rdb_pk_comparator::bytewise_compare(a, b); } - const char* Name() const override { return "rev:RocksDB_SE_v3.10"; } - void FindShortestSeparator(std::string* start, - const rocksdb::Slice& limit) const override {} - void FindShortSuccessor(std::string* key) const override {} + const char *Name() const override { return "rev:RocksDB_SE_v3.10"; } + void FindShortestSeparator(std::string *start, + const rocksdb::Slice &limit) const override {} + void FindShortSuccessor(std::string *key) const override {} }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index 3fa5f07844b..bd6bd2a0834 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* This C++ file's header file */ @@ -44,76 +44,56 @@ namespace myrocks { void get_mem_comparable_space(const CHARSET_INFO *cs, - const std::vector<uchar> **xfrm, - size_t *xfrm_len, + const std::vector<uchar> **xfrm, size_t *xfrm_len, size_t *mb_len); /* Rdb_key_def class implementation */ -Rdb_key_def::Rdb_key_def( - uint indexnr_arg, uint keyno_arg, - rocksdb::ColumnFamilyHandle* cf_handle_arg, - uint16_t index_dict_version_arg, - uchar index_type_arg, - uint16_t kv_format_version_arg, - bool is_reverse_cf_arg, bool is_auto_cf_arg, - const char* _name, - Rdb_index_stats _stats -) : - m_index_number(indexnr_arg), - m_cf_handle(cf_handle_arg), - m_index_dict_version(index_dict_version_arg), - m_index_type(index_type_arg), - m_kv_format_version(kv_format_version_arg), - m_is_reverse_cf(is_reverse_cf_arg), - m_is_auto_cf(is_auto_cf_arg), - m_name(_name), - m_stats(_stats), - m_pk_part_no(nullptr), - m_pack_info(nullptr), - m_keyno(keyno_arg), - m_key_parts(0), - m_maxlength(0) // means 'not intialized' +Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg, + rocksdb::ColumnFamilyHandle *cf_handle_arg, + uint16_t index_dict_version_arg, uchar index_type_arg, + uint16_t kv_format_version_arg, bool is_reverse_cf_arg, + bool is_auto_cf_arg, const char *_name, + Rdb_index_stats _stats) + : m_index_number(indexnr_arg), m_cf_handle(cf_handle_arg), + m_index_dict_version(index_dict_version_arg), + m_index_type(index_type_arg), m_kv_format_version(kv_format_version_arg), + m_is_reverse_cf(is_reverse_cf_arg), m_is_auto_cf(is_auto_cf_arg), + m_name(_name), m_stats(_stats), m_pk_part_no(nullptr), + m_pack_info(nullptr), m_keyno(keyno_arg), m_key_parts(0), + m_prefix_extractor(nullptr), m_maxlength(0) // means 'not intialized' { mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); rdb_netbuf_store_index(m_index_number_storage_form, m_index_number); DBUG_ASSERT(m_cf_handle != nullptr); } -Rdb_key_def::Rdb_key_def(const Rdb_key_def& k) : - m_index_number(k.m_index_number), - m_cf_handle(k.m_cf_handle), - m_is_reverse_cf(k.m_is_reverse_cf), - m_is_auto_cf(k.m_is_auto_cf), - m_name(k.m_name), - m_stats(k.m_stats), - m_pk_part_no(k.m_pk_part_no), - m_pack_info(k.m_pack_info), - m_keyno(k.m_keyno), - m_key_parts(k.m_key_parts), - m_maxlength(k.m_maxlength) -{ +Rdb_key_def::Rdb_key_def(const Rdb_key_def &k) + : m_index_number(k.m_index_number), m_cf_handle(k.m_cf_handle), + m_is_reverse_cf(k.m_is_reverse_cf), m_is_auto_cf(k.m_is_auto_cf), + m_name(k.m_name), m_stats(k.m_stats), m_pk_part_no(k.m_pk_part_no), + m_pack_info(k.m_pack_info), m_keyno(k.m_keyno), + m_key_parts(k.m_key_parts), m_prefix_extractor(k.m_prefix_extractor), + m_maxlength(k.m_maxlength) { mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); rdb_netbuf_store_index(m_index_number_storage_form, m_index_number); - if (k.m_pack_info) - { - const size_t size= sizeof(Rdb_field_packing) * k.m_key_parts; - m_pack_info= reinterpret_cast<Rdb_field_packing*>(my_malloc(size, MYF(0))); + if (k.m_pack_info) { + const size_t size = sizeof(Rdb_field_packing) * k.m_key_parts; + m_pack_info = + reinterpret_cast<Rdb_field_packing *>(my_malloc(size, MYF(0))); memcpy(m_pack_info, k.m_pack_info, size); } - if (k.m_pk_part_no) - { - const size_t size = sizeof(uint)*m_key_parts; - m_pk_part_no= reinterpret_cast<uint*>(my_malloc(size, MYF(0))); + if (k.m_pk_part_no) { + const size_t size = sizeof(uint) * m_key_parts; + m_pk_part_no = reinterpret_cast<uint *>(my_malloc(size, MYF(0))); memcpy(m_pk_part_no, k.m_pk_part_no, size); } } -Rdb_key_def::~Rdb_key_def() -{ +Rdb_key_def::~Rdb_key_def() { mysql_mutex_destroy(&m_mutex); my_free(m_pk_part_no); @@ -123,9 +103,8 @@ Rdb_key_def::~Rdb_key_def() m_pack_info = nullptr; } -void Rdb_key_def::setup(const TABLE* const tbl, - const Rdb_tbl_def* const tbl_def) -{ +void Rdb_key_def::setup(const TABLE *const tbl, + const Rdb_tbl_def *const tbl_def) { DBUG_ASSERT(tbl != nullptr); DBUG_ASSERT(tbl_def != nullptr); @@ -133,45 +112,38 @@ void Rdb_key_def::setup(const TABLE* const tbl, Set max_length based on the table. This can be called concurrently from multiple threads, so there is a mutex to protect this code. */ - const bool is_hidden_pk= (m_index_type == INDEX_TYPE_HIDDEN_PRIMARY); - const bool hidden_pk_exists= table_has_hidden_pk(tbl); - const bool secondary_key= (m_index_type == INDEX_TYPE_SECONDARY); - if (!m_maxlength) - { + const bool is_hidden_pk = (m_index_type == INDEX_TYPE_HIDDEN_PRIMARY); + const bool hidden_pk_exists = table_has_hidden_pk(tbl); + const bool secondary_key = (m_index_type == INDEX_TYPE_SECONDARY); + if (!m_maxlength) { mysql_mutex_lock(&m_mutex); - if (m_maxlength != 0) - { + if (m_maxlength != 0) { mysql_mutex_unlock(&m_mutex); return; } - KEY *key_info= nullptr; - KEY *pk_info= nullptr; - if (!is_hidden_pk) - { - key_info= &tbl->key_info[m_keyno]; + KEY *key_info = nullptr; + KEY *pk_info = nullptr; + if (!is_hidden_pk) { + key_info = &tbl->key_info[m_keyno]; if (!hidden_pk_exists) - pk_info= &tbl->key_info[tbl->s->primary_key]; - m_name= std::string(key_info->name); - } - else - { - m_name= HIDDEN_PK_NAME; + pk_info = &tbl->key_info[tbl->s->primary_key]; + m_name = std::string(key_info->name); + } else { + m_name = HIDDEN_PK_NAME; } if (secondary_key) - m_pk_key_parts= hidden_pk_exists ? 1 : pk_info->actual_key_parts; - else - { - pk_info= nullptr; - m_pk_key_parts= 0; + m_pk_key_parts = hidden_pk_exists ? 1 : pk_info->actual_key_parts; + else { + pk_info = nullptr; + m_pk_key_parts = 0; } // "unique" secondary keys support: - m_key_parts= is_hidden_pk ? 1 : key_info->actual_key_parts; + m_key_parts = is_hidden_pk ? 1 : key_info->actual_key_parts; - if (secondary_key) - { + if (secondary_key) { /* In most cases, SQL layer puts PK columns as invisible suffix at the end of secondary key. There are cases where this doesn't happen: @@ -190,115 +162,102 @@ void Rdb_key_def::setup(const TABLE* const tbl, } if (secondary_key) - m_pk_part_no= reinterpret_cast<uint*>(my_malloc(sizeof(uint)*m_key_parts, - MYF(0))); + m_pk_part_no = reinterpret_cast<uint *>( + my_malloc(sizeof(uint) * m_key_parts, MYF(0))); else - m_pk_part_no= nullptr; + m_pk_part_no = nullptr; - const size_t size= sizeof(Rdb_field_packing) * m_key_parts; - m_pack_info= reinterpret_cast<Rdb_field_packing*>(my_malloc(size, MYF(0))); + const size_t size = sizeof(Rdb_field_packing) * m_key_parts; + m_pack_info = + reinterpret_cast<Rdb_field_packing *>(my_malloc(size, MYF(0))); - size_t max_len= INDEX_NUMBER_SIZE; - int unpack_len= 0; - int max_part_len= 0; - bool simulating_extkey= false; - uint dst_i= 0; + size_t max_len = INDEX_NUMBER_SIZE; + int unpack_len = 0; + int max_part_len = 0; + bool simulating_extkey = false; + uint dst_i = 0; - uint keyno_to_set= m_keyno; - uint keypart_to_set= 0; + uint keyno_to_set = m_keyno; + uint keypart_to_set = 0; - if (is_hidden_pk) - { - Field *field= nullptr; + if (is_hidden_pk) { + Field *field = nullptr; m_pack_info[dst_i].setup(this, field, keyno_to_set, 0, 0); - m_pack_info[dst_i].m_unpack_data_offset= unpack_len; - max_len += m_pack_info[dst_i].m_max_image_len; - max_part_len= std::max(max_part_len, m_pack_info[dst_i].m_max_image_len); + m_pack_info[dst_i].m_unpack_data_offset = unpack_len; + max_len += m_pack_info[dst_i].m_max_image_len; + max_part_len = std::max(max_part_len, m_pack_info[dst_i].m_max_image_len); dst_i++; - } - else - { - KEY_PART_INFO *key_part= key_info->key_part; + } else { + KEY_PART_INFO *key_part = key_info->key_part; /* this loop also loops over the 'extended key' tail */ - for (uint src_i= 0; src_i < m_key_parts; src_i++, keypart_to_set++) - { - Field* const field= key_part ? key_part->field : nullptr; + for (uint src_i = 0; src_i < m_key_parts; src_i++, keypart_to_set++) { + Field *const field = key_part ? key_part->field : nullptr; - if (simulating_extkey && !hidden_pk_exists) - { + if (simulating_extkey && !hidden_pk_exists) { + DBUG_ASSERT(secondary_key); /* Check if this field is already present in the key definition */ - bool found= false; - for (uint j= 0; j < key_info->actual_key_parts; j++) - { - if (field->field_index == key_info->key_part[j].field->field_index) - { - found= true; + bool found = false; + for (uint j = 0; j < key_info->actual_key_parts; j++) { + if (field->field_index == + key_info->key_part[j].field->field_index && + key_part->length == key_info->key_part[j].length) { + found = true; break; } } - if (found) - { + if (found) { key_part++; continue; } } if (field && field->real_maybe_null()) - max_len +=1; // NULL-byte + max_len += 1; // NULL-byte m_pack_info[dst_i].setup(this, field, keyno_to_set, keypart_to_set, key_part ? key_part->length : 0); - m_pack_info[dst_i].m_unpack_data_offset= unpack_len; - - if (pk_info) - { - m_pk_part_no[dst_i]= -1; - for (uint j= 0; j < m_pk_key_parts; j++) - { - if (field->field_index == pk_info->key_part[j].field->field_index) - { - m_pk_part_no[dst_i]= j; + m_pack_info[dst_i].m_unpack_data_offset = unpack_len; + + if (pk_info) { + m_pk_part_no[dst_i] = -1; + for (uint j = 0; j < m_pk_key_parts; j++) { + if (field->field_index == pk_info->key_part[j].field->field_index) { + m_pk_part_no[dst_i] = j; break; } } - } - else if (secondary_key && hidden_pk_exists) - { + } else if (secondary_key && hidden_pk_exists) { /* The hidden pk can never be part of the sk. So it is always appended to the end of the sk. */ - m_pk_part_no[dst_i]= -1; + m_pk_part_no[dst_i] = -1; if (simulating_extkey) - m_pk_part_no[dst_i]= 0; + m_pk_part_no[dst_i] = 0; } - max_len += m_pack_info[dst_i].m_max_image_len; + max_len += m_pack_info[dst_i].m_max_image_len; - max_part_len= std::max(max_part_len, - m_pack_info[dst_i].m_max_image_len); + max_part_len = + std::max(max_part_len, m_pack_info[dst_i].m_max_image_len); key_part++; /* For "unique" secondary indexes, pretend they have "index extensions" */ - if (secondary_key && src_i+1 == key_info->actual_key_parts) - { - simulating_extkey= true; - if (!hidden_pk_exists) - { - keyno_to_set= tbl->s->primary_key; - key_part= pk_info->key_part; - keypart_to_set= (uint)-1; - } - else - { - keyno_to_set= tbl_def->m_key_count - 1; - key_part= nullptr; - keypart_to_set= 0; + if (secondary_key && src_i + 1 == key_info->actual_key_parts) { + simulating_extkey = true; + if (!hidden_pk_exists) { + keyno_to_set = tbl->s->primary_key; + key_part = pk_info->key_part; + keypart_to_set = (uint)-1; + } else { + keyno_to_set = tbl_def->m_key_count - 1; + key_part = nullptr; + keypart_to_set = 0; } } @@ -306,21 +265,61 @@ void Rdb_key_def::setup(const TABLE* const tbl, } } - m_key_parts= dst_i; + m_key_parts = dst_i; /* Initialize the memory needed by the stats structure */ m_stats.m_distinct_keys_per_prefix.resize(get_key_parts()); + /* Cache prefix extractor for bloom filter usage later */ + rocksdb::Options opt = rdb_get_rocksdb_db()->GetOptions(get_cf()); + m_prefix_extractor = opt.prefix_extractor; + /* This should be the last member variable set before releasing the mutex so that other threads can't see the object partially set up. */ - m_maxlength= max_len; + m_maxlength = max_len; mysql_mutex_unlock(&m_mutex); } } +/** + Read a memcmp key part from a slice using the passed in reader. + + Returns -1 if field was null, 1 if error, 0 otherwise. +*/ +int Rdb_key_def::read_memcmp_key_part(const TABLE *table_arg, + Rdb_string_reader *reader, + const uint part_num) const { + /* It is impossible to unpack the column. Skip it. */ + if (m_pack_info[part_num].m_maybe_null) { + const char *nullp; + if (!(nullp = reader->read(1))) + return 1; + if (*nullp == 0) { + /* This is a NULL value */ + return -1; + } else { + /* If NULL marker is not '0', it can be only '1' */ + if (*nullp != 1) + return 1; + } + } + + Rdb_field_packing *fpi = &m_pack_info[part_num]; + DBUG_ASSERT(table_arg->s != nullptr); + + bool is_hidden_pk_part = (part_num + 1 == m_key_parts) && + (table_arg->s->primary_key == MAX_INDEXES); + Field *field = nullptr; + if (!is_hidden_pk_part) + field = fpi->get_field_in_table(table_arg); + if (fpi->m_skip_func(fpi, field, reader)) + return 1; + + return 0; +} /** Get a mem-comparable form of Primary Key from mem-comparable form of this key @@ -347,17 +346,16 @@ void Rdb_key_def::setup(const TABLE* const tbl, set of queries for which we would check the checksum twice. */ -uint Rdb_key_def::get_primary_key_tuple(const TABLE* const table, - const Rdb_key_def& pk_descr, - const rocksdb::Slice* const key, - uchar* const pk_buffer) const -{ +uint Rdb_key_def::get_primary_key_tuple(const TABLE *const table, + const Rdb_key_def &pk_descr, + const rocksdb::Slice *const key, + uchar *const pk_buffer) const { DBUG_ASSERT(table != nullptr); DBUG_ASSERT(key != nullptr); DBUG_ASSERT(pk_buffer); - uint size= 0; - uchar *buf= pk_buffer; + uint size = 0; + uchar *buf = pk_buffer; DBUG_ASSERT(m_pk_key_parts); /* Put the PK number */ @@ -365,8 +363,8 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE* const table, buf += INDEX_NUMBER_SIZE; size += INDEX_NUMBER_SIZE; - const char* start_offs[MAX_REF_PARTS]; - const char* end_offs[MAX_REF_PARTS]; + const char *start_offs[MAX_REF_PARTS]; + const char *end_offs[MAX_REF_PARTS]; int pk_key_part; uint i; Rdb_string_reader reader(key); @@ -375,56 +373,22 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE* const table, if ((!reader.read(INDEX_NUMBER_SIZE))) return RDB_INVALID_KEY_LEN; - for (i= 0; i < m_key_parts; i++) - { - if ((pk_key_part= m_pk_part_no[i]) != -1) - { - start_offs[pk_key_part]= reader.get_current_ptr(); + for (i = 0; i < m_key_parts; i++) { + if ((pk_key_part = m_pk_part_no[i]) != -1) { + start_offs[pk_key_part] = reader.get_current_ptr(); } - bool have_value= true; - /* It is impossible to unpack the column. Skip it. */ - if (m_pack_info[i].m_maybe_null) - { - const char* nullp; - if (!(nullp= reader.read(1))) - return RDB_INVALID_KEY_LEN; - if (*nullp == 0) - { - /* This is a NULL value */ - have_value= false; - } - else - { - /* If NULL marker is not '0', it can be only '1' */ - if (*nullp != 1) - return RDB_INVALID_KEY_LEN; - } - } - - if (have_value) - { - Rdb_field_packing* const fpi= &m_pack_info[i]; - - DBUG_ASSERT(table->s != nullptr); - const bool is_hidden_pk_part= (i + 1 == m_key_parts) && - (table->s->primary_key == MAX_INDEXES); - Field *field= nullptr; - if (!is_hidden_pk_part) - field= fpi->get_field_in_table(table); - if (fpi->m_skip_func(fpi, field, &reader)) - return RDB_INVALID_KEY_LEN; + if (read_memcmp_key_part(table, &reader, i) > 0) { + return RDB_INVALID_KEY_LEN; } - if (pk_key_part != -1) - { - end_offs[pk_key_part]= reader.get_current_ptr(); + if (pk_key_part != -1) { + end_offs[pk_key_part] = reader.get_current_ptr(); } } - for (i= 0; i < m_pk_key_parts; i++) - { - const uint part_size= end_offs[i] - start_offs[i]; + for (i = 0; i < m_pk_key_parts; i++) { + const uint part_size = end_offs[i] - start_offs[i]; memcpy(buf, start_offs[i], end_offs[i] - start_offs[i]); buf += part_size; size += part_size; @@ -433,6 +397,46 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE* const table, return size; } +/** + Get a mem-comparable form of Secondary Key from mem-comparable form of this + key, without the extended primary key tail. + + @param + key Index tuple from this key in mem-comparable form + sk_buffer OUT Put here mem-comparable form of the Secondary Key. + n_null_fields OUT Put number of null fields contained within sk entry +*/ +uint Rdb_key_def::get_memcmp_sk_parts(const TABLE *table, + const rocksdb::Slice &key, + uchar *sk_buffer, + uint *n_null_fields) const { + DBUG_ASSERT(table != nullptr); + DBUG_ASSERT(sk_buffer != nullptr); + DBUG_ASSERT(n_null_fields != nullptr); + DBUG_ASSERT(m_keyno != table->s->primary_key && !table_has_hidden_pk(table)); + + uchar *buf = sk_buffer; + + int res; + Rdb_string_reader reader(&key); + const char *start = reader.get_current_ptr(); + + // Skip the index number + if ((!reader.read(INDEX_NUMBER_SIZE))) + return RDB_INVALID_KEY_LEN; + + for (uint i = 0; i < table->key_info[m_keyno].user_defined_key_parts; i++) { + if ((res = read_memcmp_key_part(table, &reader, i)) > 0) { + return RDB_INVALID_KEY_LEN; + } else if (res == -1) { + (*n_null_fields)++; + } + } + + uint sk_memcmp_len = reader.get_current_ptr() - start; + memcpy(buf, start, sk_memcmp_len); + return sk_memcmp_len; +} /** Convert index tuple into storage (i.e. mem-comparable) format @@ -445,30 +449,28 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE* const table, size is at least max_storage_fmt_length() bytes. */ -uint Rdb_key_def::pack_index_tuple(TABLE* const tbl, uchar* const pack_buffer, - uchar* const packed_tuple, - const uchar* const key_tuple, - const key_part_map &keypart_map) const -{ +uint Rdb_key_def::pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer, + uchar *const packed_tuple, + const uchar *const key_tuple, + const key_part_map &keypart_map) const { DBUG_ASSERT(tbl != nullptr); DBUG_ASSERT(pack_buffer != nullptr); DBUG_ASSERT(packed_tuple != nullptr); DBUG_ASSERT(key_tuple != nullptr); /* We were given a record in KeyTupleFormat. First, save it to record */ - const uint key_len= calculate_key_len(tbl, m_keyno, key_tuple, keypart_map); + const uint key_len = calculate_key_len(tbl, m_keyno, key_tuple, keypart_map); key_restore(tbl->record[0], key_tuple, &tbl->key_info[m_keyno], key_len); - uint n_used_parts= my_count_bits(keypart_map); + uint n_used_parts = my_count_bits(keypart_map); if (keypart_map == HA_WHOLE_KEY) - n_used_parts= 0; // Full key is used + n_used_parts = 0; // Full key is used /* Then, convert the record into a mem-comparable form */ return pack_record(tbl, pack_buffer, tbl->record[0], packed_tuple, nullptr, false, 0, n_used_parts); } - /** @brief Check if "unpack info" data includes checksum. @@ -478,16 +480,13 @@ uint Rdb_key_def::pack_index_tuple(TABLE* const tbl, uchar* const pack_buffer, checksums. */ -bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) -{ - const uchar* ptr= (const uchar*)unpack_info.data(); - size_t size= unpack_info.size(); +bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) { + const uchar *ptr = (const uchar *)unpack_info.data(); + size_t size = unpack_info.size(); // Skip unpack info if present. - if (size >= RDB_UNPACK_HEADER_SIZE && - ptr[0] == RDB_UNPACK_DATA_TAG) - { - const uint16 skip_len= rdb_netbuf_to_uint16(ptr + 1); + if (size >= RDB_UNPACK_HEADER_SIZE && ptr[0] == RDB_UNPACK_DATA_TAG) { + const uint16 skip_len = rdb_netbuf_to_uint16(ptr + 1); SHIP_ASSERT(size >= skip_len); size -= skip_len; @@ -500,26 +499,22 @@ bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) /* @return Number of bytes that were changed */ -int Rdb_key_def::successor(uchar* const packed_tuple, const uint &len) -{ +int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) { DBUG_ASSERT(packed_tuple != nullptr); - int changed= 0; - uchar *p= packed_tuple + len - 1; - for (; p > packed_tuple; p--) - { + int changed = 0; + uchar *p = packed_tuple + len - 1; + for (; p > packed_tuple; p--) { changed++; - if (*p != uchar(0xFF)) - { - *p= *p + 1; + if (*p != uchar(0xFF)) { + *p = *p + 1; break; } - *p='\0'; + *p = '\0'; } return changed; } - /** Get index columns from the record and pack them into mem-comparable form. @@ -542,15 +537,13 @@ int Rdb_key_def::successor(uchar* const packed_tuple, const uint &len) Length of the packed tuple */ -uint Rdb_key_def::pack_record(const TABLE* const tbl, uchar* const pack_buffer, - const uchar* const record, - uchar* const packed_tuple, - Rdb_string_writer* const unpack_info, +uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer, + const uchar *const record, + uchar *const packed_tuple, + Rdb_string_writer *const unpack_info, const bool &should_store_row_debug_checksums, - const longlong &hidden_pk_id, - uint n_key_parts, - uint* const n_null_fields) const -{ + const longlong &hidden_pk_id, uint n_key_parts, + uint *const n_null_fields) const { DBUG_ASSERT(tbl != nullptr); DBUG_ASSERT(pack_buffer != nullptr); DBUG_ASSERT(record != nullptr); @@ -560,9 +553,9 @@ uint Rdb_key_def::pack_record(const TABLE* const tbl, uchar* const pack_buffer, DBUG_ASSERT_IMP(should_store_row_debug_checksums, (m_index_type == INDEX_TYPE_SECONDARY)); - uchar *tuple= packed_tuple; - size_t unpack_len_pos= size_t(-1); - const bool hidden_pk_exists= table_has_hidden_pk(tbl); + uchar *tuple = packed_tuple; + size_t unpack_len_pos = size_t(-1); + const bool hidden_pk_exists = table_has_hidden_pk(tbl); rdb_netbuf_store_index(tuple, m_index_number); tuple += INDEX_NUMBER_SIZE; @@ -576,62 +569,55 @@ uint Rdb_key_def::pack_record(const TABLE* const tbl, uchar* const pack_buffer, // If hidden pk exists, but hidden pk wasnt passed in, we can't pack the // hidden key part. So we skip it (its always 1 part). if (hidden_pk_exists && !hidden_pk_id && use_all_columns) - n_key_parts= m_key_parts - 1; + n_key_parts = m_key_parts - 1; else if (use_all_columns) - n_key_parts= m_key_parts; + n_key_parts = m_key_parts; if (n_null_fields) *n_null_fields = 0; - if (unpack_info) - { + if (unpack_info) { unpack_info->clear(); unpack_info->write_uint8(RDB_UNPACK_DATA_TAG); - unpack_len_pos= unpack_info->get_current_pos(); + unpack_len_pos = unpack_info->get_current_pos(); // we don't know the total length yet, so write a zero unpack_info->write_uint16(0); } - for (uint i=0; i < n_key_parts; i++) - { + for (uint i = 0; i < n_key_parts; i++) { // Fill hidden pk id into the last key part for secondary keys for tables // with no pk - if (hidden_pk_exists && hidden_pk_id && i + 1 == n_key_parts) - { + if (hidden_pk_exists && hidden_pk_id && i + 1 == n_key_parts) { m_pack_info[i].fill_hidden_pk_val(&tuple, hidden_pk_id); break; } - Field* const field= m_pack_info[i].get_field_in_table(tbl); + Field *const field = m_pack_info[i].get_field_in_table(tbl); DBUG_ASSERT(field != nullptr); // Old Field methods expected the record pointer to be at tbl->record[0]. // The quick and easy way to fix this was to pass along the offset // for the pointer. - const my_ptrdiff_t ptr_diff= record - tbl->record[0]; + const my_ptrdiff_t ptr_diff = record - tbl->record[0]; - if (field->real_maybe_null()) - { + if (field->real_maybe_null()) { DBUG_ASSERT(is_storage_available(tuple - packed_tuple, 1)); - if (field->is_real_null(ptr_diff)) - { + if (field->is_real_null(ptr_diff)) { /* NULL value. store '\0' so that it sorts before non-NULL values */ *tuple++ = 0; /* That's it, don't store anything else */ if (n_null_fields) (*n_null_fields)++; continue; - } - else - { + } else { /* Not a NULL value. Store '1' */ *tuple++ = 1; } } - const bool create_unpack_info= - (unpack_info && // we were requested to generate unpack_info - m_pack_info[i].uses_unpack_info()); // and this keypart uses it + const bool create_unpack_info = + (unpack_info && // we were requested to generate unpack_info + m_pack_info[i].uses_unpack_info()); // and this keypart uses it Rdb_pack_field_context pack_ctx(unpack_info); // Set the offset for methods which do not take an offset as an argument @@ -639,21 +625,19 @@ uint Rdb_key_def::pack_record(const TABLE* const tbl, uchar* const pack_buffer, m_pack_info[i].m_max_image_len)); field->move_field_offset(ptr_diff); - m_pack_info[i].m_pack_func(&m_pack_info[i], field, - pack_buffer, &tuple, &pack_ctx); + m_pack_info[i].m_pack_func(&m_pack_info[i], field, pack_buffer, &tuple, + &pack_ctx); /* Make "unpack info" to be stored in the value */ - if (create_unpack_info) - { + if (create_unpack_info) { m_pack_info[i].m_make_unpack_info_func(m_pack_info[i].m_charset_codec, field, &pack_ctx); } field->move_field_offset(-ptr_diff); } - if (unpack_info) - { - const size_t len= unpack_info->get_current_pos(); + if (unpack_info) { + const size_t len = unpack_info->get_current_pos(); DBUG_ASSERT(len <= std::numeric_limits<uint16_t>::max()); // Don't store the unpack_info if it has only the header (that is, there's @@ -662,12 +646,9 @@ uint Rdb_key_def::pack_record(const TABLE* const tbl, uchar* const pack_buffer, // empty (provided m_maybe_unpack_info==true, see // ha_rocksdb::convert_record_to_storage_format) if (len == RDB_UNPACK_HEADER_SIZE && - m_index_type != Rdb_key_def::INDEX_TYPE_PRIMARY) - { + m_index_type != Rdb_key_def::INDEX_TYPE_PRIMARY) { unpack_info->clear(); - } - else - { + } else { unpack_info->write_uint16_at(unpack_len_pos, len); } @@ -677,11 +658,10 @@ uint Rdb_key_def::pack_record(const TABLE* const tbl, uchar* const pack_buffer, // so the checksums are computed and stored by // ha_rocksdb::convert_record_to_storage_format // - if (should_store_row_debug_checksums) - { - const uint32_t key_crc32= crc32(0, packed_tuple, tuple - packed_tuple); - const uint32_t val_crc32= crc32(0, unpack_info->ptr(), - unpack_info->get_current_pos()); + if (should_store_row_debug_checksums) { + const uint32_t key_crc32 = crc32(0, packed_tuple, tuple - packed_tuple); + const uint32_t val_crc32 = + crc32(0, unpack_info->ptr(), unpack_info->get_current_pos()); unpack_info->write_uint8(RDB_CHECKSUM_DATA_TAG); unpack_info->write_uint32(key_crc32); @@ -707,11 +687,10 @@ uint Rdb_key_def::pack_record(const TABLE* const tbl, uchar* const pack_buffer, */ uint Rdb_key_def::pack_hidden_pk(const longlong &hidden_pk_id, - uchar* const packed_tuple) const -{ + uchar *const packed_tuple) const { DBUG_ASSERT(packed_tuple != nullptr); - uchar *tuple= packed_tuple; + uchar *tuple = packed_tuple; rdb_netbuf_store_index(tuple, m_index_number); tuple += INDEX_NUMBER_SIZE; DBUG_ASSERT(m_key_parts == 1); @@ -724,24 +703,20 @@ uint Rdb_key_def::pack_hidden_pk(const longlong &hidden_pk_id, return tuple - packed_tuple; } - /* Function of type rdb_index_field_pack_t */ -void rdb_pack_with_make_sort_key(Rdb_field_packing* const fpi, - Field* const field, - uchar* const buf __attribute__((__unused__)), - uchar **dst, - Rdb_pack_field_context* const pack_ctx - __attribute__((__unused__))) -{ +void rdb_pack_with_make_sort_key( + Rdb_field_packing *const fpi, Field *const field, + uchar *const buf MY_ATTRIBUTE((__unused__)), uchar **dst, + Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) { DBUG_ASSERT(fpi != nullptr); DBUG_ASSERT(field != nullptr); DBUG_ASSERT(dst != nullptr); DBUG_ASSERT(*dst != nullptr); - const int max_len= fpi->m_max_image_len; + const int max_len = fpi->m_max_image_len; field->make_sort_key(*dst, max_len); *dst += max_len; } @@ -755,12 +730,9 @@ void rdb_pack_with_make_sort_key(Rdb_field_packing* const fpi, -1 if two kes are equal 1 - Data format error. */ -int Rdb_key_def::compare_keys( - const rocksdb::Slice *key1, - const rocksdb::Slice *key2, - std::size_t* const column_index -) const -{ +int Rdb_key_def::compare_keys(const rocksdb::Slice *key1, + const rocksdb::Slice *key2, + std::size_t *const column_index) const { DBUG_ASSERT(key1 != nullptr); DBUG_ASSERT(key2 != nullptr); DBUG_ASSERT(column_index != nullptr); @@ -774,29 +746,27 @@ int Rdb_key_def::compare_keys( // Skip the index number if ((!reader1.read(INDEX_NUMBER_SIZE))) - return 1; + return HA_EXIT_FAILURE; if ((!reader2.read(INDEX_NUMBER_SIZE))) - return 1; + return HA_EXIT_FAILURE; - for (uint i= 0; i < m_key_parts ; i++) - { - const Rdb_field_packing* const fpi= &m_pack_info[i]; - if (fpi->m_maybe_null) - { - const auto nullp1= reader1.read(1); - const auto nullp2= reader2.read(1); - if (nullp1 == nullptr || nullp2 == nullptr) - return 1; //error + for (uint i = 0; i < m_key_parts; i++) { + const Rdb_field_packing *const fpi = &m_pack_info[i]; + if (fpi->m_maybe_null) { + const auto nullp1 = reader1.read(1); + const auto nullp2 = reader2.read(1); + + if (nullp1 == nullptr || nullp2 == nullptr) { + return HA_EXIT_FAILURE; + } - if (*nullp1 != *nullp2) - { + if (*nullp1 != *nullp2) { *column_index = i; - return 0; + return HA_EXIT_SUCCESS; } - if (*nullp1 == 0) - { + if (*nullp1 == 0) { /* This is a NULL value */ continue; } @@ -806,29 +776,26 @@ int Rdb_key_def::compare_keys( const auto before_skip2 = reader2.get_current_ptr(); DBUG_ASSERT(fpi->m_skip_func); if (fpi->m_skip_func(fpi, nullptr, &reader1)) - return 1; + return HA_EXIT_FAILURE; if (fpi->m_skip_func(fpi, nullptr, &reader2)) - return 1; + return HA_EXIT_FAILURE; const auto size1 = reader1.get_current_ptr() - before_skip1; const auto size2 = reader2.get_current_ptr() - before_skip2; - if (size1 != size2) - { + if (size1 != size2) { *column_index = i; - return 0; + return HA_EXIT_SUCCESS; } if (memcmp(before_skip1, before_skip2, size1) != 0) { *column_index = i; - return 0; + return HA_EXIT_SUCCESS; } } *column_index = m_key_parts; - return 0; - + return HA_EXIT_SUCCESS; } - /* @brief Given a zero-padded key, determine its real key length @@ -837,9 +804,8 @@ int Rdb_key_def::compare_keys( Fixed-size skip functions just read. */ -size_t Rdb_key_def::key_length(const TABLE* const table, - const rocksdb::Slice &key) const -{ +size_t Rdb_key_def::key_length(const TABLE *const table, + const rocksdb::Slice &key) const { DBUG_ASSERT(table != nullptr); Rdb_string_reader reader(&key); @@ -847,19 +813,17 @@ size_t Rdb_key_def::key_length(const TABLE* const table, if ((!reader.read(INDEX_NUMBER_SIZE))) return size_t(-1); - for (uint i= 0; i < m_key_parts ; i++) - { - const Rdb_field_packing *fpi= &m_pack_info[i]; - const Field *field= nullptr; + for (uint i = 0; i < m_key_parts; i++) { + const Rdb_field_packing *fpi = &m_pack_info[i]; + const Field *field = nullptr; if (m_index_type != INDEX_TYPE_HIDDEN_PRIMARY) - field= fpi->get_field_in_table(table); + field = fpi->get_field_in_table(table); if (fpi->m_skip_func(fpi, field, &reader)) return size_t(-1); } return key.size() - reader.remaining_bytes(); } - /* Take mem-comparable form and unpack_info and unpack it to Table->record @@ -869,21 +833,18 @@ size_t Rdb_key_def::key_length(const TABLE* const table, @return UNPACK_SUCCESS - Ok UNPACK_FAILURE - Data format error. - UNPACK_INFO_MISSING - Unpack info was unavailable and was required for - unpacking. */ -int Rdb_key_def::unpack_record(TABLE* const table, uchar* const buf, - const rocksdb::Slice* const packed_key, - const rocksdb::Slice* const unpack_info, - const bool &verify_row_debug_checksums) const -{ +int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, + const rocksdb::Slice *const packed_key, + const rocksdb::Slice *const unpack_info, + const bool &verify_row_debug_checksums) const { Rdb_string_reader reader(packed_key); - Rdb_string_reader unp_reader= Rdb_string_reader::read_or_empty(unpack_info); + Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info); - const bool is_hidden_pk= (m_index_type == INDEX_TYPE_HIDDEN_PRIMARY); - const bool hidden_pk_exists= table_has_hidden_pk(table); - const bool secondary_key= (m_index_type == INDEX_TYPE_SECONDARY); + const bool is_hidden_pk = (m_index_type == INDEX_TYPE_HIDDEN_PRIMARY); + const bool hidden_pk_exists = table_has_hidden_pk(table); + const bool secondary_key = (m_index_type == INDEX_TYPE_SECONDARY); // There is no checksuming data after unpack_info for primary keys, because // the layout there is different. The checksum is verified in // ha_rocksdb::convert_record_from_storage_format instead. @@ -892,166 +853,143 @@ int Rdb_key_def::unpack_record(TABLE* const table, uchar* const buf, // Old Field methods expected the record pointer to be at tbl->record[0]. // The quick and easy way to fix this was to pass along the offset // for the pointer. - const my_ptrdiff_t ptr_diff= buf - table->record[0]; + const my_ptrdiff_t ptr_diff = buf - table->record[0]; // Skip the index number - if ((!reader.read(INDEX_NUMBER_SIZE))) - { - return 1; + if ((!reader.read(INDEX_NUMBER_SIZE))) { + return HA_EXIT_FAILURE; } // For secondary keys, we expect the value field to contain unpack data and // checksum data in that order. One or both can be missing, but they cannot // be reordered. - const bool has_unpack_info= unp_reader.remaining_bytes() && - *unp_reader.get_current_ptr() == RDB_UNPACK_DATA_TAG; - if (has_unpack_info && !unp_reader.read(RDB_UNPACK_HEADER_SIZE)) - { - return 1; + const bool has_unpack_info = + unp_reader.remaining_bytes() && + *unp_reader.get_current_ptr() == RDB_UNPACK_DATA_TAG; + if (has_unpack_info && !unp_reader.read(RDB_UNPACK_HEADER_SIZE)) { + return HA_EXIT_FAILURE; } - for (uint i= 0; i < m_key_parts ; i++) - { - Rdb_field_packing* const fpi= &m_pack_info[i]; + for (uint i = 0; i < m_key_parts; i++) { + Rdb_field_packing *const fpi = &m_pack_info[i]; /* Hidden pk field is packed at the end of the secondary keys, but the SQL layer does not know about it. Skip retrieving field if hidden pk. */ if ((secondary_key && hidden_pk_exists && i + 1 == m_key_parts) || - is_hidden_pk) - { + is_hidden_pk) { DBUG_ASSERT(fpi->m_unpack_func); - if (fpi->m_skip_func(fpi, nullptr, &reader)) - { - return 1; + if (fpi->m_skip_func(fpi, nullptr, &reader)) { + return HA_EXIT_FAILURE; } continue; } - Field* const field= fpi->get_field_in_table(table); + Field *const field = fpi->get_field_in_table(table); - if (fpi->m_unpack_func) - { + if (fpi->m_unpack_func) { /* It is possible to unpack this column. Do it. */ - if (fpi->m_maybe_null) - { - const char* nullp; - if (!(nullp= reader.read(1))) - return 1; - if (*nullp == 0) - { + if (fpi->m_maybe_null) { + const char *nullp; + if (!(nullp = reader.read(1))) + return HA_EXIT_FAILURE; + if (*nullp == 0) { /* Set the NULL-bit of this field */ field->set_null(ptr_diff); /* Also set the field to its default value */ - uint field_offset= field->ptr - table->record[0]; - memcpy(buf + field_offset, - table->s->default_values + field_offset, + uint field_offset = field->ptr - table->record[0]; + memcpy(buf + field_offset, table->s->default_values + field_offset, field->pack_length()); continue; - } - else if (*nullp == 1) + } else if (*nullp == 1) field->set_notnull(ptr_diff); else - return 1; + return HA_EXIT_FAILURE; } // If we need unpack info, but there is none, tell the unpack function // this by passing unp_reader as nullptr. If we never read unpack_info // during unpacking anyway, then there won't an error. - const bool maybe_missing_unpack= - !has_unpack_info && fpi->uses_unpack_info(); - const int res= fpi->m_unpack_func(fpi, field, field->ptr + ptr_diff, - &reader, - maybe_missing_unpack ? nullptr : &unp_reader); + const bool maybe_missing_unpack = + !has_unpack_info && fpi->uses_unpack_info(); + const int res = + fpi->m_unpack_func(fpi, field, field->ptr + ptr_diff, &reader, + maybe_missing_unpack ? nullptr : &unp_reader); if (res) return res; - } - else - { + } else { /* It is impossible to unpack the column. Skip it. */ - if (fpi->m_maybe_null) - { - const char* nullp; - if (!(nullp= reader.read(1))) - return 1; - if (*nullp == 0) - { + if (fpi->m_maybe_null) { + const char *nullp; + if (!(nullp = reader.read(1))) + return HA_EXIT_FAILURE; + if (*nullp == 0) { /* This is a NULL value */ continue; } /* If NULL marker is not '0', it can be only '1' */ if (*nullp != 1) - return 1; + return HA_EXIT_FAILURE; } if (fpi->m_skip_func(fpi, field, &reader)) - return 1; + return HA_EXIT_FAILURE; } } /* Check checksum values if present */ - const char* ptr; - if ((ptr= unp_reader.read(1)) && *ptr == RDB_CHECKSUM_DATA_TAG) - { - if (verify_row_debug_checksums) - { - uint32_t stored_key_chksum= rdb_netbuf_to_uint32( - (const uchar*)unp_reader.read(RDB_CHECKSUM_SIZE)); - const uint32_t stored_val_chksum= rdb_netbuf_to_uint32( - (const uchar*)unp_reader.read(RDB_CHECKSUM_SIZE)); - - const uint32_t computed_key_chksum= - crc32(0, (const uchar*)packed_key->data(), packed_key->size()); - const uint32_t computed_val_chksum= - crc32(0, (const uchar*) unpack_info->data(), - unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE); + const char *ptr; + if ((ptr = unp_reader.read(1)) && *ptr == RDB_CHECKSUM_DATA_TAG) { + if (verify_row_debug_checksums) { + uint32_t stored_key_chksum = rdb_netbuf_to_uint32( + (const uchar *)unp_reader.read(RDB_CHECKSUM_SIZE)); + const uint32_t stored_val_chksum = rdb_netbuf_to_uint32( + (const uchar *)unp_reader.read(RDB_CHECKSUM_SIZE)); + + const uint32_t computed_key_chksum = + crc32(0, (const uchar *)packed_key->data(), packed_key->size()); + const uint32_t computed_val_chksum = + crc32(0, (const uchar *)unpack_info->data(), + unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE); DBUG_EXECUTE_IF("myrocks_simulate_bad_key_checksum1", stored_key_chksum++;); - if (stored_key_chksum != computed_key_chksum) - { - report_checksum_mismatch(true, packed_key->data(), - packed_key->size()); - return 1; + if (stored_key_chksum != computed_key_chksum) { + report_checksum_mismatch(true, packed_key->data(), packed_key->size()); + return HA_EXIT_FAILURE; } - if (stored_val_chksum != computed_val_chksum) - { - report_checksum_mismatch( - false, unpack_info->data(), - unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE); - return 1; + if (stored_val_chksum != computed_val_chksum) { + report_checksum_mismatch(false, unpack_info->data(), + unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE); + return HA_EXIT_FAILURE; } - } - else - { + } else { /* The checksums are present but we are not checking checksums */ } } if (reader.remaining_bytes()) - return 1; + return HA_EXIT_FAILURE; - return 0; + return HA_EXIT_SUCCESS; } -bool Rdb_key_def::table_has_hidden_pk(const TABLE* const table) -{ +bool Rdb_key_def::table_has_hidden_pk(const TABLE *const table) { return table->s->primary_key == MAX_INDEXES; } void Rdb_key_def::report_checksum_mismatch(const bool &is_key, - const char* const data, - const size_t data_size) const -{ + const char *const data, + const size_t data_size) const { // NO_LINT_DEBUG sql_print_error("Checksum mismatch in %s of key-value pair for index 0x%x", - is_key? "key" : "value", get_index_number()); + is_key ? "key" : "value", get_index_number()); const std::string buf = rdb_hexdump(data, data_size, RDB_MAX_HEXDUMP_LEN); // NO_LINT_DEBUG @@ -1062,18 +1000,16 @@ void Rdb_key_def::report_checksum_mismatch(const bool &is_key, } bool Rdb_key_def::index_format_min_check(const int &pk_min, - const int &sk_min) const -{ - switch (m_index_type) - { - case INDEX_TYPE_PRIMARY: - case INDEX_TYPE_HIDDEN_PRIMARY: - return (m_kv_format_version >= pk_min); - case INDEX_TYPE_SECONDARY: - return (m_kv_format_version >= sk_min); - default: - DBUG_ASSERT(0); - return false; + const int &sk_min) const { + switch (m_index_type) { + case INDEX_TYPE_PRIMARY: + case INDEX_TYPE_HIDDEN_PRIMARY: + return (m_kv_format_version >= pk_min); + case INDEX_TYPE_SECONDARY: + return (m_kv_format_version >= sk_min); + default: + DBUG_ASSERT(0); + return false; } } @@ -1085,13 +1021,12 @@ bool Rdb_key_def::index_format_min_check(const int &pk_min, Function of type rdb_index_field_skip_t */ -int rdb_skip_max_length(const Rdb_field_packing* const fpi, - const Field* const field __attribute__((__unused__)), - Rdb_string_reader* const reader) -{ +int rdb_skip_max_length(const Rdb_field_packing *const fpi, + const Field *const field MY_ATTRIBUTE((__unused__)), + Rdb_string_reader *const reader) { if (!reader->read(fpi->m_max_image_len)) - return 1; - return 0; + return HA_EXIT_FAILURE; + return HA_EXIT_SUCCESS; } /* @@ -1100,7 +1035,7 @@ int rdb_skip_max_length(const Rdb_field_packing* const fpi, rdb_unpack_binary_or_utf8_varchar. */ -const uint RDB_ESCAPE_LENGTH= 9; +const uint RDB_ESCAPE_LENGTH = 9; static_assert((RDB_ESCAPE_LENGTH - 1) % 2 == 0, "RDB_ESCAPE_LENGTH-1 must be even."); @@ -1109,50 +1044,43 @@ static_assert((RDB_ESCAPE_LENGTH - 1) % 2 == 0, */ static int rdb_skip_variable_length( - const Rdb_field_packing* const fpi __attribute__((__unused__)), - const Field* const field, Rdb_string_reader* const reader) -{ + const Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)), + const Field *const field, Rdb_string_reader *const reader) { const uchar *ptr; - bool finished= false; + bool finished = false; size_t dst_len; /* How much data can be there */ - if (field) - { - const Field_varstring* const field_var= - static_cast<const Field_varstring*>(field); - dst_len= field_var->pack_length() - field_var->length_bytes; - } - else - { - dst_len= UINT_MAX; + if (field) { + const Field_varstring *const field_var = + static_cast<const Field_varstring *>(field); + dst_len = field_var->pack_length() - field_var->length_bytes; + } else { + dst_len = UINT_MAX; } /* Decode the length-emitted encoding here */ - while ((ptr= (const uchar*)reader->read(RDB_ESCAPE_LENGTH))) - { + while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) { /* See rdb_pack_with_varchar_encoding. */ - const uchar pad= 255 - ptr[RDB_ESCAPE_LENGTH - 1]; // number of padding bytes - const uchar used_bytes= RDB_ESCAPE_LENGTH - 1 - pad; + const uchar pad = + 255 - ptr[RDB_ESCAPE_LENGTH - 1]; // number of padding bytes + const uchar used_bytes = RDB_ESCAPE_LENGTH - 1 - pad; - if (used_bytes > RDB_ESCAPE_LENGTH - 1 || used_bytes > dst_len) - { - return 1; /* cannot store that much, invalid data */ + if (used_bytes > RDB_ESCAPE_LENGTH - 1 || used_bytes > dst_len) { + return HA_EXIT_FAILURE; /* cannot store that much, invalid data */ } - if (used_bytes < RDB_ESCAPE_LENGTH - 1) - { - finished= true; + if (used_bytes < RDB_ESCAPE_LENGTH - 1) { + finished = true; break; } dst_len -= used_bytes; } - if (!finished) - { - return 1; + if (!finished) { + return HA_EXIT_FAILURE; } - return 0; + return HA_EXIT_SUCCESS; } const int VARCHAR_CMP_LESS_THAN_SPACES = 1; @@ -1163,136 +1091,135 @@ const int VARCHAR_CMP_GREATER_THAN_SPACES = 3; Skip a keypart that uses Variable-Length Space-Padded encoding */ -static int rdb_skip_variable_space_pad( - const Rdb_field_packing* const fpi, - const Field* const field, Rdb_string_reader* const reader) -{ +static int rdb_skip_variable_space_pad(const Rdb_field_packing *const fpi, + const Field *const field, + Rdb_string_reader *const reader) { const uchar *ptr; - bool finished= false; + bool finished = false; - size_t dst_len= UINT_MAX; /* How much data can be there */ + size_t dst_len = UINT_MAX; /* How much data can be there */ - if (field) - { - const Field_varstring* const field_var= - static_cast<const Field_varstring*>(field); - dst_len= field_var->pack_length() - field_var->length_bytes; + if (field) { + const Field_varstring *const field_var = + static_cast<const Field_varstring *>(field); + dst_len = field_var->pack_length() - field_var->length_bytes; } /* Decode the length-emitted encoding here */ - while ((ptr= (const uchar*)reader->read(fpi->m_segment_size))) - { + while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) { // See rdb_pack_with_varchar_space_pad - const uchar c= ptr[fpi->m_segment_size-1]; - if (c == VARCHAR_CMP_EQUAL_TO_SPACES) - { + const uchar c = ptr[fpi->m_segment_size - 1]; + if (c == VARCHAR_CMP_EQUAL_TO_SPACES) { // This is the last segment - finished= true; + finished = true; break; - } - else if (c == VARCHAR_CMP_LESS_THAN_SPACES || - c == VARCHAR_CMP_GREATER_THAN_SPACES) - { + } else if (c == VARCHAR_CMP_LESS_THAN_SPACES || + c == VARCHAR_CMP_GREATER_THAN_SPACES) { // This is not the last segment - if ((fpi->m_segment_size-1) > dst_len) - { + if ((fpi->m_segment_size - 1) > dst_len) { // The segment is full of data but the table field can't hold that // much! This must be data corruption. - return 1; + return HA_EXIT_FAILURE; } - dst_len -= (fpi->m_segment_size-1); - } - else - { + dst_len -= (fpi->m_segment_size - 1); + } else { // Encountered a value that's none of the VARCHAR_CMP* constants // It's data corruption. - return 1; + return HA_EXIT_FAILURE; } } - return finished? 0: 1; + return finished ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE; } - /* Function of type rdb_index_field_unpack_t */ -int rdb_unpack_integer( - Rdb_field_packing* const fpi, Field* const field, uchar* const to, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader __attribute__((__unused__))) -{ - const int length= fpi->m_max_image_len; +int rdb_unpack_integer(Rdb_field_packing *const fpi, Field *const field, + uchar *const to, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader + MY_ATTRIBUTE((__unused__))) { + const int length = fpi->m_max_image_len; const uchar *from; - if (!(from= (const uchar*)reader->read(length))) + if (!(from = (const uchar *)reader->read(length))) return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ #ifdef WORDS_BIGENDIAN { - if (((Field_num*)field)->unsigned_flag) - to[0]= from[0]; + if (((Field_num *)field)->unsigned_flag) + to[0] = from[0]; else - to[0]= (char)(from[0] ^ 128); // Reverse the sign bit. + to[0] = (char)(from[0] ^ 128); // Reverse the sign bit. memcpy(to + 1, from + 1, length - 1); } #else { - const int sign_byte= from[0]; - if (((Field_num*)field)->unsigned_flag) - to[length - 1]= sign_byte; + const int sign_byte = from[0]; + if (((Field_num *)field)->unsigned_flag) + to[length - 1] = sign_byte; else - to[length - 1]= static_cast<char>(sign_byte ^ 128); // Reverse the sign bit. - for (int i= 0, j= length - 1; i < length-1; ++i, --j) - to[i]= from[j]; + to[length - 1] = + static_cast<char>(sign_byte ^ 128); // Reverse the sign bit. + for (int i = 0, j = length - 1; i < length - 1; ++i, --j) + to[i] = from[j]; } #endif return UNPACK_SUCCESS; } #if !defined(WORDS_BIGENDIAN) -static void rdb_swap_double_bytes(uchar* const dst, const uchar* const src) -{ +static void rdb_swap_double_bytes(uchar *const dst, const uchar *const src) { #if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN) // A few systems store the most-significant _word_ first on little-endian - dst[0] = src[3]; dst[1] = src[2]; dst[2] = src[1]; dst[3] = src[0]; - dst[4] = src[7]; dst[5] = src[6]; dst[6] = src[5]; dst[7] = src[4]; + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; + dst[4] = src[7]; + dst[5] = src[6]; + dst[6] = src[5]; + dst[7] = src[4]; #else - dst[0] = src[7]; dst[1] = src[6]; dst[2] = src[5]; dst[3] = src[4]; - dst[4] = src[3]; dst[5] = src[2]; dst[6] = src[1]; dst[7] = src[0]; + dst[0] = src[7]; + dst[1] = src[6]; + dst[2] = src[5]; + dst[3] = src[4]; + dst[4] = src[3]; + dst[5] = src[2]; + dst[6] = src[1]; + dst[7] = src[0]; #endif } -static void rdb_swap_float_bytes(uchar* const dst, const uchar* const src) -{ - dst[0] = src[3]; dst[1] = src[2]; dst[2] = src[1]; dst[3] = src[0]; +static void rdb_swap_float_bytes(uchar *const dst, const uchar *const src) { + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; } #else #define rdb_swap_double_bytes nullptr -#define rdb_swap_float_bytes nullptr +#define rdb_swap_float_bytes nullptr #endif static int rdb_unpack_floating_point( - uchar* const dst, Rdb_string_reader* const reader, const size_t &size, - const int &exp_digit, - const uchar* const zero_pattern, - const uchar* const zero_val, - void (*swap_func)(uchar *, const uchar *)) -{ - const uchar* const from = (const uchar*) reader->read(size); + uchar *const dst, Rdb_string_reader *const reader, const size_t &size, + const int &exp_digit, const uchar *const zero_pattern, + const uchar *const zero_val, void (*swap_func)(uchar *, const uchar *)) { + const uchar *const from = (const uchar *)reader->read(size); if (from == nullptr) return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ /* Check to see if the value is zero */ - if (memcmp(from, zero_pattern, size) == 0) - { + if (memcmp(from, zero_pattern, size) == 0) { memcpy(dst, zero_val, size); return UNPACK_SUCCESS; } #if defined(WORDS_BIGENDIAN) // On big-endian, output can go directly into result - uchar* const tmp = dst; + uchar *const tmp = dst; #else // Otherwise use a temporary buffer to make byte-swapping easier later uchar tmp[8]; @@ -1300,18 +1227,15 @@ static int rdb_unpack_floating_point( memcpy(tmp, from, size); - if (tmp[0] & 0x80) - { + if (tmp[0] & 0x80) { // If the high bit is set the original value was positive so // remove the high bit and subtract one from the exponent. - ushort exp_part= ((ushort) tmp[0] << 8) | (ushort) tmp[1]; - exp_part &= 0x7FFF; // clear high bit; - exp_part -= (ushort) 1 << (16 - 1 - exp_digit); // subtract from exponent - tmp[0] = (uchar) (exp_part >> 8); - tmp[1] = (uchar) exp_part; - } - else - { + ushort exp_part = ((ushort)tmp[0] << 8) | (ushort)tmp[1]; + exp_part &= 0x7FFF; // clear high bit; + exp_part -= (ushort)1 << (16 - 1 - exp_digit); // subtract from exponent + tmp[0] = (uchar)(exp_part >> 8); + tmp[1] = (uchar)exp_part; + } else { // Otherwise the original value was negative and all bytes have been // negated. for (size_t ii = 0; ii < size; ii++) @@ -1332,7 +1256,6 @@ static int rdb_unpack_floating_point( #define DBL_EXP_DIG (sizeof(double) * 8 - DBL_MANT_DIG) #endif - /* Function of type rdb_index_field_unpack_t @@ -1342,19 +1265,16 @@ static int rdb_unpack_floating_point( allowed in the database. */ static int rdb_unpack_double( - Rdb_field_packing* const fpi __attribute__((__unused__)), - Field* const field __attribute__((__unused__)), - uchar* const field_ptr, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader __attribute__((__unused__))) -{ - static double zero_val = 0.0; - static const uchar zero_pattern[8] = { 128, 0, 0, 0, 0, 0, 0, 0 }; - - return rdb_unpack_floating_point(field_ptr, reader, - sizeof(double), - DBL_EXP_DIG, zero_pattern, (const uchar *) &zero_val, - rdb_swap_double_bytes); + Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)), + Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { + static double zero_val = 0.0; + static const uchar zero_pattern[8] = {128, 0, 0, 0, 0, 0, 0, 0}; + + return rdb_unpack_floating_point( + field_ptr, reader, sizeof(double), DBL_EXP_DIG, zero_pattern, + (const uchar *)&zero_val, rdb_swap_double_bytes); } #if !defined(FLT_EXP_DIG) @@ -1370,18 +1290,15 @@ static int rdb_unpack_double( allowed in the database. */ static int rdb_unpack_float( - Rdb_field_packing* const, Field* const field __attribute__((__unused__)), - uchar* const field_ptr, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader __attribute__((__unused__))) -{ - static float zero_val = 0.0; - static const uchar zero_pattern[4] = { 128, 0, 0, 0 }; - - return rdb_unpack_floating_point(field_ptr, reader, - sizeof(float), - FLT_EXP_DIG, zero_pattern, (const uchar *) &zero_val, - rdb_swap_float_bytes); + Rdb_field_packing *const, Field *const field MY_ATTRIBUTE((__unused__)), + uchar *const field_ptr, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { + static float zero_val = 0.0; + static const uchar zero_pattern[4] = {128, 0, 0, 0}; + + return rdb_unpack_floating_point( + field_ptr, reader, sizeof(float), FLT_EXP_DIG, zero_pattern, + (const uchar *)&zero_val, rdb_swap_float_bytes); } /* @@ -1389,25 +1306,22 @@ static int rdb_unpack_float( Unpack by doing the reverse action to Field_newdate::make_sort_key. */ -int rdb_unpack_newdate( - Rdb_field_packing* const fpi, Field* constfield, - uchar* const field_ptr, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader __attribute__((__unused__))) -{ - const char* from; +int rdb_unpack_newdate(Rdb_field_packing *const fpi, Field *constfield, + uchar *const field_ptr, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader + MY_ATTRIBUTE((__unused__))) { + const char *from; DBUG_ASSERT(fpi->m_max_image_len == 3); - if (!(from= reader->read(3))) + if (!(from = reader->read(3))) return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ - field_ptr[0]= from[2]; - field_ptr[1]= from[1]; - field_ptr[2]= from[0]; + field_ptr[0] = from[2]; + field_ptr[1] = from[1]; + field_ptr[2] = from[0]; return UNPACK_SUCCESS; } - /* Function of type rdb_index_field_unpack_t, used to Unpack the string by copying it over. @@ -1415,64 +1329,57 @@ int rdb_unpack_newdate( */ static int rdb_unpack_binary_str( - Rdb_field_packing* const fpi, Field* const field, uchar* const to, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader __attribute__((__unused__))) -{ - const char* from; - if (!(from= reader->read(fpi->m_max_image_len))) + Rdb_field_packing *const fpi, Field *const field, uchar *const to, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { + const char *from; + if (!(from = reader->read(fpi->m_max_image_len))) return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ memcpy(to, from, fpi->m_max_image_len); return UNPACK_SUCCESS; } - /* Function of type rdb_index_field_unpack_t. For UTF-8, we need to convert 2-byte wide-character entities back into UTF8 sequences. */ -static int rdb_unpack_utf8_str( - Rdb_field_packing* const fpi, Field* const field, - uchar *dst, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader __attribute__((__unused__))) -{ - my_core::CHARSET_INFO* const cset= (my_core::CHARSET_INFO*)field->charset(); +static int rdb_unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field, + uchar *dst, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader + MY_ATTRIBUTE((__unused__))) { + my_core::CHARSET_INFO *const cset = (my_core::CHARSET_INFO *)field->charset(); const uchar *src; - if (!(src= (const uchar*)reader->read(fpi->m_max_image_len))) + if (!(src = (const uchar *)reader->read(fpi->m_max_image_len))) return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */ - const uchar* const src_end= src + fpi->m_max_image_len; - uchar* const dst_end= dst + field->pack_length(); + const uchar *const src_end = src + fpi->m_max_image_len; + uchar *const dst_end = dst + field->pack_length(); - while (src < src_end) - { - my_wc_t wc= (src[0] <<8) | src[1]; + while (src < src_end) { + my_wc_t wc = (src[0] << 8) | src[1]; src += 2; - int res= cset->cset->wc_mb(cset, wc, dst, dst_end); - DBUG_ASSERT(res > 0 && res <=3); + int res = cset->cset->wc_mb(cset, wc, dst, dst_end); + DBUG_ASSERT(res > 0 && res <= 3); if (res < 0) return UNPACK_FAILURE; dst += res; } - cset->cset->fill(cset, reinterpret_cast<char *>(dst), - dst_end - dst, cset->pad_char); + cset->cset->fill(cset, reinterpret_cast<char *>(dst), dst_end - dst, + cset->pad_char); return UNPACK_SUCCESS; } - /* Function of type rdb_index_field_pack_t */ static void rdb_pack_with_varchar_encoding( - Rdb_field_packing* const fpi, Field* const field, uchar *buf, uchar **dst, - Rdb_pack_field_context* const pack_ctx __attribute__((__unused__))) -{ + Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst, + Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) { /* Use a flag byte every Nth byte. Set it to (255 - #pad) where #pad is 0 when the var length field filled all N-1 previous bytes and #pad is @@ -1483,66 +1390,58 @@ static void rdb_pack_with_varchar_encoding( * 4 bytes (1, 2, 3, 0) this is encoded as: 1, 2, 3, 0, 0, 0, 0, 252 And the 4 byte string compares as greater than the 3 byte string */ - const CHARSET_INFO* const charset= field->charset(); - Field_varstring* const field_var= (Field_varstring*)field; - - const size_t value_length= (field_var->length_bytes == 1) ? - (uint) *field->ptr : - uint2korr(field->ptr); - size_t xfrm_len= charset->coll->strnxfrm( - charset, - buf, fpi->m_max_image_len, - field_var->char_length(), - field_var->ptr + field_var->length_bytes, - value_length, - 0); + const CHARSET_INFO *const charset = field->charset(); + Field_varstring *const field_var = (Field_varstring *)field; + + const size_t value_length = (field_var->length_bytes == 1) + ? (uint)*field->ptr + : uint2korr(field->ptr); + size_t xfrm_len = charset->coll->strnxfrm( + charset, buf, fpi->m_max_image_len, field_var->char_length(), + field_var->ptr + field_var->length_bytes, value_length, 0); /* Got a mem-comparable image in 'buf'. Now, produce varlength encoding */ - size_t encoded_size= 0; - uchar* ptr= *dst; - while (1) - { - const size_t copy_len= std::min((size_t)RDB_ESCAPE_LENGTH-1, xfrm_len); - const size_t padding_bytes= RDB_ESCAPE_LENGTH - 1 - copy_len; + size_t encoded_size = 0; + uchar *ptr = *dst; + while (1) { + const size_t copy_len = std::min((size_t)RDB_ESCAPE_LENGTH - 1, xfrm_len); + const size_t padding_bytes = RDB_ESCAPE_LENGTH - 1 - copy_len; memcpy(ptr, buf, copy_len); ptr += copy_len; buf += copy_len; // pad with zeros if necessary; - for (size_t idx= 0; idx < padding_bytes; idx++) - *(ptr++)= 0; + for (size_t idx = 0; idx < padding_bytes; idx++) + *(ptr++) = 0; *(ptr++) = 255 - padding_bytes; - xfrm_len -= copy_len; + xfrm_len -= copy_len; encoded_size += RDB_ESCAPE_LENGTH; - if (padding_bytes !=0) + if (padding_bytes != 0) break; } *dst += encoded_size; } - /* Compare the string in [buf..buf_end) with a string that is an infinite sequence of strings in space_xfrm */ -static -int rdb_compare_string_with_spaces(const uchar *buf, const uchar* const buf_end, - const std::vector<uchar>* const space_xfrm) -{ - int cmp= 0; - while (buf < buf_end) - { - size_t bytes = std::min((size_t) (buf_end - buf), space_xfrm->size()); - if ((cmp= memcmp(buf, space_xfrm->data(), bytes)) != 0) +static int +rdb_compare_string_with_spaces(const uchar *buf, const uchar *const buf_end, + const std::vector<uchar> *const space_xfrm) { + int cmp = 0; + while (buf < buf_end) { + size_t bytes = std::min((size_t)(buf_end - buf), space_xfrm->size()); + if ((cmp = memcmp(buf, space_xfrm->data(), bytes)) != 0) break; buf += bytes; } return cmp; } -static const int RDB_TRIMMED_CHARS_OFFSET= 8; +static const int RDB_TRIMMED_CHARS_OFFSET = 8; /* Pack the data with Variable-Length Space-Padded Encoding. @@ -1614,68 +1513,57 @@ static const int RDB_TRIMMED_CHARS_OFFSET= 8; rdb_skip_variable_space_pad */ -static void rdb_pack_with_varchar_space_pad( - Rdb_field_packing* const fpi, Field* const field, uchar* buf, - uchar **dst, Rdb_pack_field_context* const pack_ctx) -{ - Rdb_string_writer* const unpack_info= pack_ctx->writer; - const CHARSET_INFO* const charset= field->charset(); - const auto field_var= static_cast<Field_varstring *>(field); - - const size_t value_length= (field_var->length_bytes == 1) ? - (uint) *field->ptr : - uint2korr(field->ptr); - - const size_t trimmed_len= - charset->cset->lengthsp(charset, - (const char*)field_var->ptr + - field_var->length_bytes, - value_length); +static void +rdb_pack_with_varchar_space_pad(Rdb_field_packing *const fpi, + Field *const field, uchar *buf, uchar **dst, + Rdb_pack_field_context *const pack_ctx) { + Rdb_string_writer *const unpack_info = pack_ctx->writer; + const CHARSET_INFO *const charset = field->charset(); + const auto field_var = static_cast<Field_varstring *>(field); + + const size_t value_length = (field_var->length_bytes == 1) + ? (uint)*field->ptr + : uint2korr(field->ptr); + + const size_t trimmed_len = charset->cset->lengthsp( + charset, (const char *)field_var->ptr + field_var->length_bytes, + value_length); const size_t xfrm_len = charset->coll->strnxfrm( - charset, - buf, fpi->m_max_image_len, - field_var->char_length(), - field_var->ptr + field_var->length_bytes, - trimmed_len, - 0); + charset, buf, fpi->m_max_image_len, field_var->char_length(), + field_var->ptr + field_var->length_bytes, trimmed_len, 0); /* Got a mem-comparable image in 'buf'. Now, produce varlength encoding */ - uchar* const buf_end= buf + xfrm_len; + uchar *const buf_end = buf + xfrm_len; - size_t encoded_size= 0; - uchar *ptr= *dst; + size_t encoded_size = 0; + uchar *ptr = *dst; size_t padding_bytes; - while (true) - { - const size_t copy_len= - std::min<size_t>(fpi->m_segment_size-1, buf_end - buf); - padding_bytes= fpi->m_segment_size - 1 - copy_len; + while (true) { + const size_t copy_len = + std::min<size_t>(fpi->m_segment_size - 1, buf_end - buf); + padding_bytes = fpi->m_segment_size - 1 - copy_len; memcpy(ptr, buf, copy_len); ptr += copy_len; buf += copy_len; - if (padding_bytes) - { + if (padding_bytes) { memcpy(ptr, fpi->space_xfrm->data(), padding_bytes); - ptr+= padding_bytes; - *ptr= VARCHAR_CMP_EQUAL_TO_SPACES; // last segment - } - else - { + ptr += padding_bytes; + *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; // last segment + } else { // Compare the string suffix with a hypothetical infinite string of // spaces. It could be that the first difference is beyond the end of // current chunk. - const int cmp= - rdb_compare_string_with_spaces(buf, buf_end, fpi->space_xfrm); + const int cmp = + rdb_compare_string_with_spaces(buf, buf_end, fpi->space_xfrm); if (cmp < 0) - *ptr= VARCHAR_CMP_LESS_THAN_SPACES; + *ptr = VARCHAR_CMP_LESS_THAN_SPACES; else if (cmp > 0) - *ptr= VARCHAR_CMP_GREATER_THAN_SPACES; - else - { + *ptr = VARCHAR_CMP_GREATER_THAN_SPACES; + else { // It turns out all the rest are spaces. - *ptr= VARCHAR_CMP_EQUAL_TO_SPACES; + *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; } } encoded_size += fpi->m_segment_size; @@ -1687,24 +1575,20 @@ static void rdb_pack_with_varchar_space_pad( // m_unpack_info_stores_value means unpack_info stores the whole original // value. There is no need to store the number of trimmed/padded endspaces // in that case. - if (unpack_info && !fpi->m_unpack_info_stores_value) - { + if (unpack_info && !fpi->m_unpack_info_stores_value) { // (value_length - trimmed_len) is the number of trimmed space *characters* // then, padding_bytes is the number of *bytes* added as padding // then, we add 8, because we don't store negative values. DBUG_ASSERT(padding_bytes % fpi->space_xfrm_len == 0); - DBUG_ASSERT((value_length - trimmed_len)% fpi->space_mb_len == 0); - const size_t removed_chars= - RDB_TRIMMED_CHARS_OFFSET + - (value_length - trimmed_len) / fpi->space_mb_len - - padding_bytes/fpi->space_xfrm_len; + DBUG_ASSERT((value_length - trimmed_len) % fpi->space_mb_len == 0); + const size_t removed_chars = + RDB_TRIMMED_CHARS_OFFSET + + (value_length - trimmed_len) / fpi->space_mb_len - + padding_bytes / fpi->space_xfrm_len; - if (fpi->m_unpack_info_uses_two_bytes) - { + if (fpi->m_unpack_info_uses_two_bytes) { unpack_info->write_uint16(removed_chars); - } - else - { + } else { DBUG_ASSERT(removed_chars < 0x100); unpack_info->write_uint8(removed_chars); } @@ -1718,35 +1602,30 @@ static void rdb_pack_with_varchar_space_pad( */ static int rdb_unpack_binary_or_utf8_varchar( - Rdb_field_packing* const fpi, Field* const field, - uchar* dst, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader __attribute__((__unused__))) -{ + Rdb_field_packing *const fpi, Field *const field, uchar *dst, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) { const uchar *ptr; - size_t len= 0; - bool finished= false; - uchar *d0= dst; - Field_varstring* const field_var= (Field_varstring*)field; + size_t len = 0; + bool finished = false; + uchar *d0 = dst; + Field_varstring *const field_var = (Field_varstring *)field; dst += field_var->length_bytes; // How much we can unpack - size_t dst_len= field_var->pack_length() - field_var->length_bytes; - uchar* const dst_end= dst + dst_len; + size_t dst_len = field_var->pack_length() - field_var->length_bytes; + uchar *const dst_end = dst + dst_len; /* Decode the length-emitted encoding here */ - while ((ptr= (const uchar*)reader->read(RDB_ESCAPE_LENGTH))) - { + while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) { /* See rdb_pack_with_varchar_encoding. */ - uchar pad= 255 - ptr[RDB_ESCAPE_LENGTH - 1]; // number of padding bytes - uchar used_bytes= RDB_ESCAPE_LENGTH - 1 - pad; + uchar pad = 255 - ptr[RDB_ESCAPE_LENGTH - 1]; // number of padding bytes + uchar used_bytes = RDB_ESCAPE_LENGTH - 1 - pad; - if (used_bytes > RDB_ESCAPE_LENGTH - 1) - { + if (used_bytes > RDB_ESCAPE_LENGTH - 1) { return UNPACK_FAILURE; /* cannot store that much, invalid data */ } - if (dst_len < used_bytes) - { + if (dst_len < used_bytes) { /* Encoded index tuple is longer than the size in the record buffer? */ return UNPACK_FAILURE; } @@ -1754,10 +1633,8 @@ static int rdb_unpack_binary_or_utf8_varchar( /* Now, we need to decode used_bytes of data and append them to the value. */ - if (fpi->m_varchar_charset == &my_charset_utf8_bin) - { - if (used_bytes & 1) - { + if (fpi->m_varchar_charset == &my_charset_utf8_bin) { + if (used_bytes & 1) { /* UTF-8 characters are encoded into two-byte entities. There is no way we can have an odd number of bytes after encoding. @@ -1765,33 +1642,29 @@ static int rdb_unpack_binary_or_utf8_varchar( return UNPACK_FAILURE; } - const uchar *src= ptr; - const uchar *src_end= ptr + used_bytes; - while (src < src_end) - { - my_wc_t wc= (src[0] <<8) | src[1]; + const uchar *src = ptr; + const uchar *src_end = ptr + used_bytes; + while (src < src_end) { + my_wc_t wc = (src[0] << 8) | src[1]; src += 2; - const CHARSET_INFO *cset= fpi->m_varchar_charset; - int res= cset->cset->wc_mb(cset, wc, dst, dst_end); - DBUG_ASSERT(res > 0 && res <=3); + const CHARSET_INFO *cset = fpi->m_varchar_charset; + int res = cset->cset->wc_mb(cset, wc, dst, dst_end); + DBUG_ASSERT(res > 0 && res <= 3); if (res < 0) return UNPACK_FAILURE; dst += res; len += res; dst_len -= res; } - } - else - { + } else { memcpy(dst, ptr, used_bytes); dst += used_bytes; dst_len -= used_bytes; len += used_bytes; } - if (used_bytes < RDB_ESCAPE_LENGTH - 1) - { - finished= true; + if (used_bytes < RDB_ESCAPE_LENGTH - 1) { + finished = true; break; } } @@ -1800,12 +1673,9 @@ static int rdb_unpack_binary_or_utf8_varchar( return UNPACK_FAILURE; /* Save the length */ - if (field_var->length_bytes == 1) - { - d0[0]= len; - } - else - { + if (field_var->length_bytes == 1) { + d0[0] = len; + } else { DBUG_ASSERT(field_var->length_bytes == 2); int2store(d0, len); } @@ -1820,66 +1690,54 @@ static int rdb_unpack_binary_or_utf8_varchar( rdb_skip_variable_space_pad - skip function */ static int rdb_unpack_binary_or_utf8_varchar_space_pad( - Rdb_field_packing* const fpi, Field* const field, - uchar* dst, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader) -{ + Rdb_field_packing *const fpi, Field *const field, uchar *dst, + Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) { const uchar *ptr; - size_t len= 0; - bool finished= false; - Field_varstring* const field_var= static_cast<Field_varstring *>(field); - uchar *d0= dst; - uchar *dst_end= dst + field_var->pack_length(); + size_t len = 0; + bool finished = false; + Field_varstring *const field_var = static_cast<Field_varstring *>(field); + uchar *d0 = dst; + uchar *dst_end = dst + field_var->pack_length(); dst += field_var->length_bytes; - uint space_padding_bytes= 0; + uint space_padding_bytes = 0; uint extra_spaces; - if ((fpi->m_unpack_info_uses_two_bytes? - unp_reader->read_uint16(&extra_spaces): - unp_reader->read_uint8(&extra_spaces))) - { + if ((fpi->m_unpack_info_uses_two_bytes + ? unp_reader->read_uint16(&extra_spaces) + : unp_reader->read_uint8(&extra_spaces))) { return UNPACK_FAILURE; } - if (extra_spaces <= RDB_TRIMMED_CHARS_OFFSET) - { - space_padding_bytes= -(static_cast<int>(extra_spaces) - - RDB_TRIMMED_CHARS_OFFSET); - extra_spaces= 0; - } - else + if (extra_spaces <= RDB_TRIMMED_CHARS_OFFSET) { + space_padding_bytes = + -(static_cast<int>(extra_spaces) - RDB_TRIMMED_CHARS_OFFSET); + extra_spaces = 0; + } else extra_spaces -= RDB_TRIMMED_CHARS_OFFSET; space_padding_bytes *= fpi->space_xfrm_len; /* Decode the length-emitted encoding here */ - while ((ptr= (const uchar*)reader->read(fpi->m_segment_size))) - { - const char last_byte= ptr[fpi->m_segment_size - 1]; + while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) { + const char last_byte = ptr[fpi->m_segment_size - 1]; size_t used_bytes; - if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) // this is the last segment - { - if (space_padding_bytes > (fpi->m_segment_size-1)) - return UNPACK_FAILURE; // Cannot happen, corrupted data - used_bytes= (fpi->m_segment_size-1) - space_padding_bytes; - finished= true; - } - else + if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) // this is the last segment { + if (space_padding_bytes > (fpi->m_segment_size - 1)) + return UNPACK_FAILURE; // Cannot happen, corrupted data + used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes; + finished = true; + } else { if (last_byte != VARCHAR_CMP_LESS_THAN_SPACES && - last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) - { - return UNPACK_FAILURE; // Invalid value + last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) { + return UNPACK_FAILURE; // Invalid value } - used_bytes= fpi->m_segment_size-1; + used_bytes = fpi->m_segment_size - 1; } // Now, need to decode used_bytes of data and append them to the value. - if (fpi->m_varchar_charset == &my_charset_utf8_bin) - { - if (used_bytes & 1) - { + if (fpi->m_varchar_charset == &my_charset_utf8_bin) { + if (used_bytes & 1) { /* UTF-8 characters are encoded into two-byte entities. There is no way we can have an odd number of bytes after encoding. @@ -1887,23 +1745,20 @@ static int rdb_unpack_binary_or_utf8_varchar_space_pad( return UNPACK_FAILURE; } - const uchar *src= ptr; - const uchar* const src_end= ptr + used_bytes; - while (src < src_end) - { - my_wc_t wc= (src[0] <<8) | src[1]; + const uchar *src = ptr; + const uchar *const src_end = ptr + used_bytes; + while (src < src_end) { + my_wc_t wc = (src[0] << 8) | src[1]; src += 2; - const CHARSET_INFO *cset= fpi->m_varchar_charset; - int res= cset->cset->wc_mb(cset, wc, dst, dst_end); - DBUG_ASSERT(res <=3); + const CHARSET_INFO *cset = fpi->m_varchar_charset; + int res = cset->cset->wc_mb(cset, wc, dst, dst_end); + DBUG_ASSERT(res <= 3); if (res <= 0) return UNPACK_FAILURE; dst += res; len += res; } - } - else - { + } else { if (dst + used_bytes > dst_end) return UNPACK_FAILURE; memcpy(dst, ptr, used_bytes); @@ -1911,10 +1766,8 @@ static int rdb_unpack_binary_or_utf8_varchar_space_pad( len += used_bytes; } - if (finished) - { - if (extra_spaces) - { + if (finished) { + if (extra_spaces) { // Both binary and UTF-8 charset store space as ' ', // so the following is ok: if (dst + extra_spaces > dst_end) @@ -1930,12 +1783,9 @@ static int rdb_unpack_binary_or_utf8_varchar_space_pad( return UNPACK_FAILURE; /* Save the length */ - if (field_var->length_bytes == 1) - { - d0[0]= len; - } - else - { + if (field_var->length_bytes == 1) { + d0[0] = len; + } else { DBUG_ASSERT(field_var->length_bytes == 2); int2store(d0, len); } @@ -1949,13 +1799,11 @@ static int rdb_unpack_binary_or_utf8_varchar_space_pad( */ static void rdb_make_unpack_unknown( - const Rdb_collation_codec *codec __attribute__((__unused__)), - const Field* const field, Rdb_pack_field_context* const pack_ctx) -{ + const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)), + const Field *const field, Rdb_pack_field_context *const pack_ctx) { pack_ctx->writer->write(field->ptr, field->pack_length()); } - /* This point of this function is only to indicate that unpack_info is available. @@ -1965,35 +1813,27 @@ static void rdb_make_unpack_unknown( */ static void rdb_dummy_make_unpack_info( - const Rdb_collation_codec *codec __attribute__((__unused__)), - const Field *field __attribute__((__unused__)), - Rdb_pack_field_context *pack_ctx __attribute__((__unused__))) -{ -} + const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)), + const Field *field MY_ATTRIBUTE((__unused__)), + Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) {} /* Function of type rdb_index_field_unpack_t */ -static int rdb_unpack_unknown(Rdb_field_packing* const fpi, Field* const field, - uchar* const dst, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader) -{ +static int rdb_unpack_unknown(Rdb_field_packing *const fpi, Field *const field, + uchar *const dst, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader) { const uchar *ptr; const uint len = fpi->m_unpack_data_len; // We don't use anything from the key, so skip over it. - if (rdb_skip_max_length(fpi, field, reader)) - { + if (rdb_skip_max_length(fpi, field, reader)) { return UNPACK_FAILURE; } - // Unpack info is needed but none available. - if (len > 0 && unp_reader == nullptr) - { - return UNPACK_INFO_MISSING; - } - if ((ptr= (const uchar*)unp_reader->read(len))) - { + + DBUG_ASSERT_IMP(len > 0, unp_reader != nullptr); + + if ((ptr = (const uchar *)unp_reader->read(len))) { memcpy(dst, ptr, len); return UNPACK_SUCCESS; } @@ -2005,16 +1845,14 @@ static int rdb_unpack_unknown(Rdb_field_packing* const fpi, Field* const field, */ static void rdb_make_unpack_unknown_varchar( - const Rdb_collation_codec* const codec __attribute__((__unused__)), - const Field* const field, Rdb_pack_field_context* const pack_ctx) -{ - const auto f= static_cast<const Field_varstring *>(field); - uint len= f->length_bytes == 1 ? (uint) *f->ptr : uint2korr(f->ptr); - len+= f->length_bytes; + const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)), + const Field *const field, Rdb_pack_field_context *const pack_ctx) { + const auto f = static_cast<const Field_varstring *>(field); + uint len = f->length_bytes == 1 ? (uint)*f->ptr : uint2korr(f->ptr); + len += f->length_bytes; pack_ctx->writer->write(field->ptr, len); } - /* Function of type rdb_index_field_unpack_t @@ -2029,34 +1867,27 @@ static void rdb_make_unpack_unknown_varchar( rdb_make_unpack_unknown, rdb_unpack_unknown */ -static int rdb_unpack_unknown_varchar(Rdb_field_packing* const fpi, - Field* const field, - uchar *dst, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader) -{ +static int rdb_unpack_unknown_varchar(Rdb_field_packing *const fpi, + Field *const field, uchar *dst, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader) { const uchar *ptr; - uchar* const d0= dst; - const auto f= static_cast<Field_varstring *>(field); + uchar *const d0 = dst; + const auto f = static_cast<Field_varstring *>(field); dst += f->length_bytes; - const uint len_bytes= f->length_bytes; + const uint len_bytes = f->length_bytes; // We don't use anything from the key, so skip over it. - if (fpi->m_skip_func(fpi, field, reader)) - { + if (fpi->m_skip_func(fpi, field, reader)) { return UNPACK_FAILURE; } - // Unpack info is needed but none available. + DBUG_ASSERT(len_bytes > 0); - if (unp_reader == nullptr) - { - return UNPACK_INFO_MISSING; - } - if ((ptr= (const uchar*)unp_reader->read(len_bytes))) - { + DBUG_ASSERT(unp_reader != nullptr); + + if ((ptr = (const uchar *)unp_reader->read(len_bytes))) { memcpy(d0, ptr, len_bytes); - const uint len= len_bytes == 1 ? (uint) *ptr : uint2korr(ptr); - if ((ptr= (const uchar*)unp_reader->read(len))) - { + const uint len = len_bytes == 1 ? (uint)*ptr : uint2korr(ptr); + if ((ptr = (const uchar *)unp_reader->read(len))) { memcpy(dst, ptr, len); return UNPACK_SUCCESS; } @@ -2064,47 +1895,33 @@ static int rdb_unpack_unknown_varchar(Rdb_field_packing* const fpi, return UNPACK_FAILURE; } - /* Write unpack_data for a "simple" collation */ -static void rdb_write_unpack_simple(Rdb_bit_writer* const writer, - const Rdb_collation_codec* const codec, - const uchar* const src, - const size_t src_len) -{ - for (uint i= 0; i < src_len; i++) - { +static void rdb_write_unpack_simple(Rdb_bit_writer *const writer, + const Rdb_collation_codec *const codec, + const uchar *const src, + const size_t src_len) { + for (uint i = 0; i < src_len; i++) { writer->write(codec->m_enc_size[src[i]], codec->m_enc_idx[src[i]]); } } - -static uint rdb_read_unpack_simple(Rdb_bit_reader* const reader, - const Rdb_collation_codec* const codec, - const uchar* const src, - const size_t &src_len, uchar* const dst) -{ - for (uint i= 0; i < src_len; i++) - { - if (codec->m_dec_size[src[i]] > 0) - { +static uint rdb_read_unpack_simple(Rdb_bit_reader *const reader, + const Rdb_collation_codec *const codec, + const uchar *const src, + const size_t &src_len, uchar *const dst) { + for (uint i = 0; i < src_len; i++) { + if (codec->m_dec_size[src[i]] > 0) { uint *ret; - // Unpack info is needed but none available. - if (reader == nullptr) - { - return UNPACK_INFO_MISSING; - } + DBUG_ASSERT(reader != nullptr); - if ((ret= reader->read(codec->m_dec_size[src[i]])) == nullptr) - { + if ((ret = reader->read(codec->m_dec_size[src[i]])) == nullptr) { return UNPACK_FAILURE; } - dst[i]= codec->m_dec_idx[*ret][src[i]]; - } - else - { - dst[i]= codec->m_dec_idx[0][src[i]]; + dst[i] = codec->m_dec_idx[*ret][src[i]]; + } else { + dst[i] = codec->m_dec_idx[0][src[i]]; } } @@ -2119,14 +1936,13 @@ static uint rdb_read_unpack_simple(Rdb_bit_reader* const reader, */ static void -rdb_make_unpack_simple_varchar(const Rdb_collation_codec* const codec, - const Field* const field, - Rdb_pack_field_context* const pack_ctx) -{ - const auto f= static_cast<const Field_varstring *>(field); - uchar* const src= f->ptr + f->length_bytes; - const size_t src_len= - f->length_bytes == 1 ? (uint) *f->ptr : uint2korr(f->ptr); +rdb_make_unpack_simple_varchar(const Rdb_collation_codec *const codec, + const Field *const field, + Rdb_pack_field_context *const pack_ctx) { + const auto f = static_cast<const Field_varstring *>(field); + uchar *const src = f->ptr + f->length_bytes; + const size_t src_len = + f->length_bytes == 1 ? (uint)*f->ptr : uint2korr(f->ptr); Rdb_bit_writer bit_writer(pack_ctx->writer); // The std::min compares characters with bytes, but for simple collations, // mbmaxlen = 1. @@ -2142,92 +1958,75 @@ rdb_make_unpack_simple_varchar(const Rdb_collation_codec* const codec, rdb_unpack_binary_or_utf8_varchar_space_pad - a similar unpacking function */ -int -rdb_unpack_simple_varchar_space_pad(Rdb_field_packing* const fpi, - Field* const field, - uchar* dst, - Rdb_string_reader* const reader, - Rdb_string_reader * const unp_reader) -{ +int rdb_unpack_simple_varchar_space_pad(Rdb_field_packing *const fpi, + Field *const field, uchar *dst, + Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader) { const uchar *ptr; - size_t len= 0; - bool finished= false; - uchar *d0= dst; - const Field_varstring* const field_var= static_cast<Field_varstring*>(field); + size_t len = 0; + bool finished = false; + uchar *d0 = dst; + const Field_varstring *const field_var = + static_cast<Field_varstring *>(field); // For simple collations, char_length is also number of bytes. DBUG_ASSERT((size_t)fpi->m_max_image_len >= field_var->char_length()); - uchar *dst_end= dst + field_var->pack_length(); + uchar *dst_end = dst + field_var->pack_length(); dst += field_var->length_bytes; Rdb_bit_reader bit_reader(unp_reader); - uint space_padding_bytes= 0; + uint space_padding_bytes = 0; uint extra_spaces; - if (!unp_reader) - { - return UNPACK_INFO_MISSING; - } + DBUG_ASSERT(unp_reader != nullptr); - if ((fpi->m_unpack_info_uses_two_bytes? - unp_reader->read_uint16(&extra_spaces): - unp_reader->read_uint8(&extra_spaces))) - { + if ((fpi->m_unpack_info_uses_two_bytes + ? unp_reader->read_uint16(&extra_spaces) + : unp_reader->read_uint8(&extra_spaces))) { return UNPACK_FAILURE; } - if (extra_spaces <= 8) - { - space_padding_bytes= -(static_cast<int>(extra_spaces) - 8); - extra_spaces= 0; - } - else + if (extra_spaces <= 8) { + space_padding_bytes = -(static_cast<int>(extra_spaces) - 8); + extra_spaces = 0; + } else extra_spaces -= 8; space_padding_bytes *= fpi->space_xfrm_len; /* Decode the length-emitted encoding here */ - while ((ptr= (const uchar*)reader->read(fpi->m_segment_size))) - { - const char last_byte= ptr[fpi->m_segment_size - 1]; // number of padding bytes + while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) { + const char last_byte = + ptr[fpi->m_segment_size - 1]; // number of padding bytes size_t used_bytes; - if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) - { + if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) { // this is the last one - if (space_padding_bytes > (fpi->m_segment_size-1)) - return UNPACK_FAILURE; // Cannot happen, corrupted data - used_bytes= (fpi->m_segment_size-1) - space_padding_bytes; - finished= true; - } - else - { + if (space_padding_bytes > (fpi->m_segment_size - 1)) + return UNPACK_FAILURE; // Cannot happen, corrupted data + used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes; + finished = true; + } else { if (last_byte != VARCHAR_CMP_LESS_THAN_SPACES && - last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) - { + last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) { return UNPACK_FAILURE; } - used_bytes= fpi->m_segment_size-1; + used_bytes = fpi->m_segment_size - 1; } - if (dst + used_bytes > dst_end) - { + if (dst + used_bytes > dst_end) { // The value on disk is longer than the field definition allows? return UNPACK_FAILURE; } uint ret; - if ((ret= rdb_read_unpack_simple(&bit_reader, - fpi->m_charset_codec, ptr, used_bytes, - dst)) != UNPACK_SUCCESS) - { + if ((ret = rdb_read_unpack_simple(&bit_reader, fpi->m_charset_codec, ptr, + used_bytes, dst)) != UNPACK_SUCCESS) { return ret; } dst += used_bytes; len += used_bytes; - if (finished) - { - if (extra_spaces) - { + if (finished) { + if (extra_spaces) { if (dst + extra_spaces > dst_end) return UNPACK_FAILURE; // pad_char has a 1-byte form in all charsets that @@ -2243,19 +2042,15 @@ rdb_unpack_simple_varchar_space_pad(Rdb_field_packing* const fpi, return UNPACK_FAILURE; /* Save the length */ - if (field_var->length_bytes == 1) - { - d0[0]= len; - } - else - { + if (field_var->length_bytes == 1) { + d0[0] = len; + } else { DBUG_ASSERT(field_var->length_bytes == 2); int2store(d0, len); } return UNPACK_SUCCESS; } - /* Function of type rdb_make_unpack_info_t @@ -2267,11 +2062,10 @@ rdb_unpack_simple_varchar_space_pad(Rdb_field_packing* const fpi, The VARCHAR variant is in rdb_make_unpack_simple_varchar */ -static void rdb_make_unpack_simple(const Rdb_collation_codec* const codec, - const Field* const field, - Rdb_pack_field_context* const pack_ctx) -{ - const uchar* const src= field->ptr; +static void rdb_make_unpack_simple(const Rdb_collation_codec *const codec, + const Field *const field, + Rdb_pack_field_context *const pack_ctx) { + const uchar *const src = field->ptr; Rdb_bit_writer bit_writer(pack_ctx->writer); rdb_write_unpack_simple(&bit_writer, codec, src, field->pack_length()); } @@ -2280,18 +2074,15 @@ static void rdb_make_unpack_simple(const Rdb_collation_codec* const codec, Function of type rdb_index_field_unpack_t */ -static int rdb_unpack_simple(Rdb_field_packing* const fpi, - Field* const field __attribute__((__unused__)), - uchar* const dst, - Rdb_string_reader* const reader, - Rdb_string_reader* const unp_reader) -{ +static int rdb_unpack_simple(Rdb_field_packing *const fpi, + Field *const field MY_ATTRIBUTE((__unused__)), + uchar *const dst, Rdb_string_reader *const reader, + Rdb_string_reader *const unp_reader) { const uchar *ptr; const uint len = fpi->m_max_image_len; Rdb_bit_reader bit_reader(unp_reader); - if (!(ptr= (const uchar*)reader->read(len))) - { + if (!(ptr = (const uchar *)reader->read(len))) { return UNPACK_FAILURE; } @@ -2299,17 +2090,15 @@ static int rdb_unpack_simple(Rdb_field_packing* const fpi, fpi->m_charset_codec, ptr, len, dst); } - // See Rdb_charset_space_info::spaces_xfrm -const int RDB_SPACE_XFRM_SIZE= 32; +const int RDB_SPACE_XFRM_SIZE = 32; // A class holding information about how space character is represented in a // charset. -class Rdb_charset_space_info -{ - public: - Rdb_charset_space_info(const Rdb_charset_space_info&) = delete; - Rdb_charset_space_info& operator=(const Rdb_charset_space_info&) = delete; +class Rdb_charset_space_info { +public: + Rdb_charset_space_info(const Rdb_charset_space_info &) = delete; + Rdb_charset_space_info &operator=(const Rdb_charset_space_info &) = delete; Rdb_charset_space_info() = default; // A few strxfrm'ed space characters, at least RDB_SPACE_XFRM_SIZE bytes @@ -2325,8 +2114,7 @@ class Rdb_charset_space_info }; static std::array<std::unique_ptr<Rdb_charset_space_info>, MY_ALL_CHARSETS_SIZE> -rdb_mem_comparable_space; - + rdb_mem_comparable_space; /* @brief @@ -2347,39 +2135,33 @@ rdb_mem_comparable_space; uses the charset). */ -static -void rdb_get_mem_comparable_space(const CHARSET_INFO* const cs, - const std::vector<uchar> **xfrm, - size_t* const xfrm_len, - size_t* const mb_len) -{ +static void rdb_get_mem_comparable_space(const CHARSET_INFO *const cs, + const std::vector<uchar> **xfrm, + size_t *const xfrm_len, + size_t *const mb_len) { DBUG_ASSERT(cs->number < MY_ALL_CHARSETS_SIZE); - if (!rdb_mem_comparable_space[cs->number].get()) - { + if (!rdb_mem_comparable_space[cs->number].get()) { mysql_mutex_lock(&rdb_mem_cmp_space_mutex); - if (!rdb_mem_comparable_space[cs->number].get()) - { + if (!rdb_mem_comparable_space[cs->number].get()) { // Upper bound of how many bytes can be occupied by multi-byte form of a // character in any charset. - const int MAX_MULTI_BYTE_CHAR_SIZE= 4; + const int MAX_MULTI_BYTE_CHAR_SIZE = 4; DBUG_ASSERT(cs->mbmaxlen <= MAX_MULTI_BYTE_CHAR_SIZE); // multi-byte form of the ' ' (space) character uchar space_mb[MAX_MULTI_BYTE_CHAR_SIZE]; - const size_t space_mb_len= cs->cset->wc_mb(cs, (my_wc_t) cs->pad_char, - space_mb, - space_mb + sizeof(space_mb)); + const size_t space_mb_len = cs->cset->wc_mb( + cs, (my_wc_t)cs->pad_char, space_mb, space_mb + sizeof(space_mb)); - uchar space[20]; // mem-comparable image of the space character + uchar space[20]; // mem-comparable image of the space character - const size_t space_len= cs->coll->strnxfrm(cs, space, sizeof(space), 1, - space_mb, space_mb_len, 0); - Rdb_charset_space_info* const info= new Rdb_charset_space_info; - info->space_xfrm_len= space_len; - info->space_mb_len= space_mb_len; - while (info->spaces_xfrm.size() < RDB_SPACE_XFRM_SIZE) - { + const size_t space_len = cs->coll->strnxfrm(cs, space, sizeof(space), 1, + space_mb, space_mb_len, 0); + Rdb_charset_space_info *const info = new Rdb_charset_space_info; + info->space_xfrm_len = space_len; + info->space_mb_len = space_mb_len; + while (info->spaces_xfrm.size() < RDB_SPACE_XFRM_SIZE) { info->spaces_xfrm.insert(info->spaces_xfrm.end(), space, space + space_len); } @@ -2388,79 +2170,69 @@ void rdb_get_mem_comparable_space(const CHARSET_INFO* const cs, mysql_mutex_unlock(&rdb_mem_cmp_space_mutex); } - *xfrm= &rdb_mem_comparable_space[cs->number]->spaces_xfrm; - *xfrm_len= rdb_mem_comparable_space[cs->number]->space_xfrm_len; - *mb_len= rdb_mem_comparable_space[cs->number]->space_mb_len; + *xfrm = &rdb_mem_comparable_space[cs->number]->spaces_xfrm; + *xfrm_len = rdb_mem_comparable_space[cs->number]->space_xfrm_len; + *mb_len = rdb_mem_comparable_space[cs->number]->space_mb_len; } mysql_mutex_t rdb_mem_cmp_space_mutex; -std::array<const Rdb_collation_codec*, MY_ALL_CHARSETS_SIZE> - rdb_collation_data; +std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE> + rdb_collation_data; mysql_mutex_t rdb_collation_data_mutex; -static bool rdb_is_collation_supported(const my_core::CHARSET_INFO* const cs) -{ +static bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs) { return (cs->coll == &my_collation_8bit_simple_ci_handler); } -static const Rdb_collation_codec *rdb_init_collation_mapping( - const my_core::CHARSET_INFO* const cs) -{ +static const Rdb_collation_codec * +rdb_init_collation_mapping(const my_core::CHARSET_INFO *const cs) { DBUG_ASSERT(cs && cs->state & MY_CS_AVAILABLE); - const Rdb_collation_codec* codec= rdb_collation_data[cs->number]; + const Rdb_collation_codec *codec = rdb_collation_data[cs->number]; - if (codec == nullptr && rdb_is_collation_supported(cs)) - { + if (codec == nullptr && rdb_is_collation_supported(cs)) { mysql_mutex_lock(&rdb_collation_data_mutex); - codec= rdb_collation_data[cs->number]; - if (codec == nullptr) - { - Rdb_collation_codec *cur= nullptr; + codec = rdb_collation_data[cs->number]; + if (codec == nullptr) { + Rdb_collation_codec *cur = nullptr; // Compute reverse mapping for simple collations. - if (cs->coll == &my_collation_8bit_simple_ci_handler) - { - cur= new Rdb_collation_codec; + if (cs->coll == &my_collation_8bit_simple_ci_handler) { + cur = new Rdb_collation_codec; std::map<uchar, std::vector<uchar>> rev_map; - size_t max_conflict_size= 0; - for (int src = 0; src < 256; src++) - { - uchar dst= cs->sort_order[src]; + size_t max_conflict_size = 0; + for (int src = 0; src < 256; src++) { + uchar dst = cs->sort_order[src]; rev_map[dst].push_back(src); - max_conflict_size= std::max(max_conflict_size, rev_map[dst].size()); + max_conflict_size = std::max(max_conflict_size, rev_map[dst].size()); } cur->m_dec_idx.resize(max_conflict_size); - for (auto const &p : rev_map) - { - uchar dst= p.first; - for (uint idx = 0; idx < p.second.size(); idx++) - { - uchar src= p.second[idx]; - uchar bits= my_bit_log2(my_round_up_to_next_power(p.second.size())); - cur->m_enc_idx[src]= idx; - cur->m_enc_size[src]= bits; - cur->m_dec_size[dst]= bits; - cur->m_dec_idx[idx][dst]= src; + for (auto const &p : rev_map) { + uchar dst = p.first; + for (uint idx = 0; idx < p.second.size(); idx++) { + uchar src = p.second[idx]; + uchar bits = + my_bit_log2(my_round_up_to_next_power(p.second.size())); + cur->m_enc_idx[src] = idx; + cur->m_enc_size[src] = bits; + cur->m_dec_size[dst] = bits; + cur->m_dec_idx[idx][dst] = src; } } - cur->m_make_unpack_info_func= - {{ rdb_make_unpack_simple_varchar, rdb_make_unpack_simple }}; - cur->m_unpack_func= - {{ rdb_unpack_simple_varchar_space_pad, rdb_unpack_simple }}; - } - else - { + cur->m_make_unpack_info_func = { + {rdb_make_unpack_simple_varchar, rdb_make_unpack_simple}}; + cur->m_unpack_func = { + {rdb_unpack_simple_varchar_space_pad, rdb_unpack_simple}}; + } else { // Out of luck for now. } - if (cur != nullptr) - { - codec= cur; - cur->m_cs= cs; - rdb_collation_data[cs->number]= cur; + if (cur != nullptr) { + codec = cur; + cur->m_cs = cs; + rdb_collation_data[cs->number] = cur; } } mysql_mutex_unlock(&rdb_collation_data_mutex); @@ -2469,24 +2241,17 @@ static const Rdb_collation_codec *rdb_init_collation_mapping( return codec; } - -static int get_segment_size_from_collation(const CHARSET_INFO* const cs) -{ +static int get_segment_size_from_collation(const CHARSET_INFO *const cs) { int ret; - if (cs == &my_charset_utf8mb4_bin || - cs == &my_charset_utf16_bin || - cs == &my_charset_utf16le_bin || - cs == &my_charset_utf32_bin) - { + if (cs == &my_charset_utf8mb4_bin || cs == &my_charset_utf16_bin || + cs == &my_charset_utf16le_bin || cs == &my_charset_utf32_bin) { /* In these collations, a character produces one weight, which is 3 bytes. Segment has 3 characters, add one byte for VARCHAR_CMP_* marker, and we get 3*3+1=10 */ - ret= 10; - } - else - { + ret = 10; + } else { /* All other collations. There are two classes: - Unicode-based, except for collations mentioned in the if-condition. @@ -2500,13 +2265,12 @@ static int get_segment_size_from_collation(const CHARSET_INFO* const cs) In both cases, take 8 bytes payload + 1 byte for VARCHAR_CMP* marker. */ - ret= 9; + ret = 9; } DBUG_ASSERT(ret < RDB_SPACE_XFRM_SIZE); return ret; } - /* @brief Setup packing of index field into its mem-comparable form @@ -2527,187 +2291,173 @@ static int get_segment_size_from_collation(const CHARSET_INFO* const cs) FALSE - Otherwise */ -bool Rdb_field_packing::setup(const Rdb_key_def* const key_descr, - const Field* const field, - const uint &keynr_arg, const uint &key_part_arg, - const uint16 &key_length) -{ - int res= false; - enum_field_types type= field ? field->real_type() : MYSQL_TYPE_LONGLONG; +bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr, + const Field *const field, const uint &keynr_arg, + const uint &key_part_arg, + const uint16 &key_length) { + int res = false; + enum_field_types type = field ? field->real_type() : MYSQL_TYPE_LONGLONG; - m_keynr= keynr_arg; - m_key_part= key_part_arg; + m_keynr = keynr_arg; + m_key_part = key_part_arg; - m_maybe_null= field ? field->real_maybe_null() : false; - m_unpack_func= nullptr; - m_make_unpack_info_func= nullptr; - m_unpack_data_len= 0; - space_xfrm= nullptr; // safety + m_maybe_null = field ? field->real_maybe_null() : false; + m_unpack_func = nullptr; + m_make_unpack_info_func = nullptr; + m_unpack_data_len = 0; + space_xfrm = nullptr; // safety /* Calculate image length. By default, is is pack_length() */ - m_max_image_len= field ? field->pack_length() : - ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN; - m_skip_func= rdb_skip_max_length; - m_pack_func= rdb_pack_with_make_sort_key; + m_max_image_len = + field ? field->pack_length() : ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN; + m_skip_func = rdb_skip_max_length; + m_pack_func = rdb_pack_with_make_sort_key; switch (type) { - case MYSQL_TYPE_LONGLONG: - case MYSQL_TYPE_LONG: - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_TINY: - m_unpack_func= rdb_unpack_integer; - return true; + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_TINY: + m_unpack_func = rdb_unpack_integer; + return true; - case MYSQL_TYPE_DOUBLE: - m_unpack_func= rdb_unpack_double; - return true; + case MYSQL_TYPE_DOUBLE: + m_unpack_func = rdb_unpack_double; + return true; - case MYSQL_TYPE_FLOAT: - m_unpack_func= rdb_unpack_float; - return true; + case MYSQL_TYPE_FLOAT: + m_unpack_func = rdb_unpack_float; + return true; - case MYSQL_TYPE_NEWDECIMAL: - /* - Decimal is packed with Field_new_decimal::make_sort_key, which just - does memcpy. - Unpacking decimal values was supported only after fix for issue#253, - because of that ha_rocksdb::get_storage_type() handles decimal values - in a special way. - */ - case MYSQL_TYPE_DATETIME2: - case MYSQL_TYPE_TIMESTAMP2: - /* These are packed with Field_temporal_with_date_and_timef::make_sort_key */ - case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */ - case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */ - /* Everything that comes here is packed with just a memcpy(). */ - m_unpack_func= rdb_unpack_binary_str; - return true; + case MYSQL_TYPE_NEWDECIMAL: + /* + Decimal is packed with Field_new_decimal::make_sort_key, which just + does memcpy. + Unpacking decimal values was supported only after fix for issue#253, + because of that ha_rocksdb::get_storage_type() handles decimal values + in a special way. + */ + case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_TIMESTAMP2: + /* These are packed with Field_temporal_with_date_and_timef::make_sort_key */ + case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */ + case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */ + /* Everything that comes here is packed with just a memcpy(). */ + m_unpack_func = rdb_unpack_binary_str; + return true; - case MYSQL_TYPE_NEWDATE: - /* - This is packed by Field_newdate::make_sort_key. It assumes the data is - 3 bytes, and packing is done by swapping the byte order (for both big- - and little-endian) - */ - m_unpack_func= rdb_unpack_newdate; - return true; - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_BLOB: - { - if (key_descr) - { - // The my_charset_bin collation is special in that it will consider - // shorter strings sorting as less than longer strings. - // - // See Field_blob::make_sort_key for details. - m_max_image_len= key_length + - (field->charset() == &my_charset_bin - ? reinterpret_cast<const Field_blob*>(field)->pack_length_no_ptr() - : 0); - // Return false because indexes on text/blob will always require - // a prefix. With a prefix, the optimizer will not be able to do an - // index-only scan since there may be content occuring after the prefix - // length. - return false; - } + case MYSQL_TYPE_NEWDATE: + /* + This is packed by Field_newdate::make_sort_key. It assumes the data is + 3 bytes, and packing is done by swapping the byte order (for both big- + and little-endian) + */ + m_unpack_func = rdb_unpack_newdate; + return true; + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: { + if (key_descr) { + // The my_charset_bin collation is special in that it will consider + // shorter strings sorting as less than longer strings. + // + // See Field_blob::make_sort_key for details. + m_max_image_len = + key_length + (field->charset() == &my_charset_bin + ? reinterpret_cast<const Field_blob *>(field) + ->pack_length_no_ptr() + : 0); + // Return false because indexes on text/blob will always require + // a prefix. With a prefix, the optimizer will not be able to do an + // index-only scan since there may be content occuring after the prefix + // length. + return false; } - default: - break; + } + default: + break; } - m_unpack_info_stores_value= false; + m_unpack_info_stores_value = false; /* Handle [VAR](CHAR|BINARY) */ - if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING) - { + if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING) { /* For CHAR-based columns, check how strxfrm image will take. field->field_length = field->char_length() * cs->mbmaxlen. */ - const CHARSET_INFO *cs= field->charset(); - m_max_image_len= cs->coll->strnxfrmlen(cs, field->field_length); + const CHARSET_INFO *cs = field->charset(); + m_max_image_len = cs->coll->strnxfrmlen(cs, field->field_length); } - const bool is_varchar= (type == MYSQL_TYPE_VARCHAR); - const CHARSET_INFO *cs= field->charset(); + const bool is_varchar = (type == MYSQL_TYPE_VARCHAR); + const CHARSET_INFO *cs = field->charset(); // max_image_len before chunking is taken into account - const int max_image_len_before_chunks= m_max_image_len; + const int max_image_len_before_chunks = m_max_image_len; - if (is_varchar) - { + if (is_varchar) { // The default for varchar is variable-length, without space-padding for // comparisons - m_varchar_charset= cs; - m_skip_func= rdb_skip_variable_length; - m_pack_func= rdb_pack_with_varchar_encoding; - m_max_image_len= - (m_max_image_len/(RDB_ESCAPE_LENGTH-1) + 1) * RDB_ESCAPE_LENGTH; - - const auto field_var= static_cast<const Field_varstring*>(field); - m_unpack_info_uses_two_bytes= (field_var->field_length + 8 >= 0x100); + m_varchar_charset = cs; + m_skip_func = rdb_skip_variable_length; + m_pack_func = rdb_pack_with_varchar_encoding; + m_max_image_len = + (m_max_image_len / (RDB_ESCAPE_LENGTH - 1) + 1) * RDB_ESCAPE_LENGTH; + + const auto field_var = static_cast<const Field_varstring *>(field); + m_unpack_info_uses_two_bytes = (field_var->field_length + 8 >= 0x100); } - if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING) - { + if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING) { // See http://dev.mysql.com/doc/refman/5.7/en/string-types.html for // information about character-based datatypes are compared. - bool use_unknown_collation= false; + bool use_unknown_collation = false; DBUG_EXECUTE_IF("myrocks_enable_unknown_collation_index_only_scans", - use_unknown_collation= true;); + use_unknown_collation = true;); - if (cs == &my_charset_bin) - { + if (cs == &my_charset_bin) { // - SQL layer pads BINARY(N) so that it always is N bytes long. // - For VARBINARY(N), values may have different lengths, so we're using // variable-length encoding. This is also the only charset where the // values are not space-padded for comparison. - m_unpack_func= is_varchar? rdb_unpack_binary_or_utf8_varchar : - rdb_unpack_binary_str; - res= true; - } - else if (cs == &my_charset_latin1_bin || cs == &my_charset_utf8_bin) - { + m_unpack_func = is_varchar ? rdb_unpack_binary_or_utf8_varchar + : rdb_unpack_binary_str; + res = true; + } else if (cs == &my_charset_latin1_bin || cs == &my_charset_utf8_bin) { // For _bin collations, mem-comparable form of the string is the string // itself. - if (is_varchar) - { + if (is_varchar) { // VARCHARs - are compared as if they were space-padded - but are // not actually space-padded (reading the value back produces the // original value, without the padding) - m_unpack_func= rdb_unpack_binary_or_utf8_varchar_space_pad; - m_skip_func= rdb_skip_variable_space_pad; - m_pack_func= rdb_pack_with_varchar_space_pad; - m_make_unpack_info_func= rdb_dummy_make_unpack_info; - m_segment_size= get_segment_size_from_collation(cs); - m_max_image_len= - (max_image_len_before_chunks/(m_segment_size-1) + 1) * - m_segment_size; + m_unpack_func = rdb_unpack_binary_or_utf8_varchar_space_pad; + m_skip_func = rdb_skip_variable_space_pad; + m_pack_func = rdb_pack_with_varchar_space_pad; + m_make_unpack_info_func = rdb_dummy_make_unpack_info; + m_segment_size = get_segment_size_from_collation(cs); + m_max_image_len = + (max_image_len_before_chunks / (m_segment_size - 1) + 1) * + m_segment_size; rdb_get_mem_comparable_space(cs, &space_xfrm, &space_xfrm_len, &space_mb_len); - } - else - { + } else { // SQL layer pads CHAR(N) values to their maximum length. // We just store that and restore it back. - m_unpack_func= (cs == &my_charset_latin1_bin)? rdb_unpack_binary_str: - rdb_unpack_utf8_str; + m_unpack_func = (cs == &my_charset_latin1_bin) ? rdb_unpack_binary_str + : rdb_unpack_utf8_str; } - res= true; - } - else - { + res = true; + } else { // This is [VAR]CHAR(n) and the collation is not $(charset_name)_bin - res= true; // index-only scans are possible - m_unpack_data_len= is_varchar ? 0 : field->field_length; - const uint idx= is_varchar ? 0 : 1; - const Rdb_collation_codec *codec= nullptr; + res = true; // index-only scans are possible + m_unpack_data_len = is_varchar ? 0 : field->field_length; + const uint idx = is_varchar ? 0 : 1; + const Rdb_collation_codec *codec = nullptr; - if (is_varchar) - { + if (is_varchar) { // VARCHAR requires space-padding for doing comparisons // // The check for cs->levels_for_order is to catch @@ -2717,100 +2467,84 @@ bool Rdb_field_packing::setup(const Rdb_key_def* const key_descr, // either. // Currently we handle these collations as NO_PAD, even if they have // PAD_SPACE attribute. - if (cs->levels_for_order == 1) - { - m_pack_func= rdb_pack_with_varchar_space_pad; - m_skip_func= rdb_skip_variable_space_pad; - m_segment_size= get_segment_size_from_collation(cs); - m_max_image_len= - (max_image_len_before_chunks/(m_segment_size-1) + 1) * + if (cs->levels_for_order == 1) { + m_pack_func = rdb_pack_with_varchar_space_pad; + m_skip_func = rdb_skip_variable_space_pad; + m_segment_size = get_segment_size_from_collation(cs); + m_max_image_len = + (max_image_len_before_chunks / (m_segment_size - 1) + 1) * m_segment_size; rdb_get_mem_comparable_space(cs, &space_xfrm, &space_xfrm_len, &space_mb_len); - } - else - { + } else { // NO_LINT_DEBUG sql_print_warning("RocksDB: you're trying to create an index " - "with a multi-level collation %s", cs->name); + "with a multi-level collation %s", + cs->name); // NO_LINT_DEBUG sql_print_warning("MyRocks will handle this collation internally " " as if it had a NO_PAD attribute."); - m_pack_func= rdb_pack_with_varchar_encoding; - m_skip_func= rdb_skip_variable_length; + m_pack_func = rdb_pack_with_varchar_encoding; + m_skip_func = rdb_skip_variable_length; } } - if ((codec= rdb_init_collation_mapping(cs)) != nullptr) - { + if ((codec = rdb_init_collation_mapping(cs)) != nullptr) { // The collation allows to store extra information in the unpack_info // which can be used to restore the original value from the // mem-comparable form. - m_make_unpack_info_func= codec->m_make_unpack_info_func[idx]; - m_unpack_func= codec->m_unpack_func[idx]; - m_charset_codec= codec; - } - else if (use_unknown_collation) - { + m_make_unpack_info_func = codec->m_make_unpack_info_func[idx]; + m_unpack_func = codec->m_unpack_func[idx]; + m_charset_codec = codec; + } else if (use_unknown_collation) { // We have no clue about how this collation produces mem-comparable // form. Our way of restoring the original value is to keep a copy of // the original value in unpack_info. - m_unpack_info_stores_value= true; - m_make_unpack_info_func= is_varchar ? rdb_make_unpack_unknown_varchar - : rdb_make_unpack_unknown; - m_unpack_func= is_varchar ? rdb_unpack_unknown_varchar - : rdb_unpack_unknown; - } - else - { + m_unpack_info_stores_value = true; + m_make_unpack_info_func = is_varchar ? rdb_make_unpack_unknown_varchar + : rdb_make_unpack_unknown; + m_unpack_func = + is_varchar ? rdb_unpack_unknown_varchar : rdb_unpack_unknown; + } else { // Same as above: we don't know how to restore the value from its // mem-comparable form. // Here, we just indicate to the SQL layer we can't do it. DBUG_ASSERT(m_unpack_func == nullptr); - m_unpack_info_stores_value= false; - res= false; // Indicate that index-only reads are not possible + m_unpack_info_stores_value = false; + res = false; // Indicate that index-only reads are not possible } } // Make an adjustment: unpacking partially covered columns is not // possible. field->table is populated when called through // Rdb_key_def::setup, but not during ha_rocksdb::index_flags. - if (field->table) - { + if (field->table) { // Get the original Field object and compare lengths. If this key part is // a prefix of a column, then we can't do index-only scans. - if (field->table->field[field->field_index]->field_length != key_length) - { - m_unpack_func= nullptr; - m_make_unpack_info_func= nullptr; - m_unpack_info_stores_value= true; - res= false; + if (field->table->field[field->field_index]->field_length != key_length) { + m_unpack_func = nullptr; + m_make_unpack_info_func = nullptr; + m_unpack_info_stores_value = true; + res = false; } - } - else - { - if (field->field_length != key_length) - { - m_unpack_func= nullptr; - m_make_unpack_info_func= nullptr; - m_unpack_info_stores_value= true; - res= false; + } else { + if (field->field_length != key_length) { + m_unpack_func = nullptr; + m_make_unpack_info_func = nullptr; + m_unpack_info_stores_value = true; + res = false; } } } return res; } - -Field *Rdb_field_packing::get_field_in_table(const TABLE* const tbl) const -{ +Field *Rdb_field_packing::get_field_in_table(const TABLE *const tbl) const { return tbl->key_info[m_keynr].key_part[m_key_part].field; } - void Rdb_field_packing::fill_hidden_pk_val(uchar **dst, - const longlong &hidden_pk_id) const -{ + const longlong &hidden_pk_id) const { DBUG_ASSERT(m_max_image_len == 8); String to; @@ -2820,27 +2554,24 @@ void Rdb_field_packing::fill_hidden_pk_val(uchar **dst, *dst += m_max_image_len; } - /////////////////////////////////////////////////////////////////////////////////////////// // Rdb_ddl_manager /////////////////////////////////////////////////////////////////////////////////////////// -Rdb_tbl_def::~Rdb_tbl_def() -{ - auto ddl_manager= rdb_get_ddl_manager(); +Rdb_tbl_def::~Rdb_tbl_def() { + auto ddl_manager = rdb_get_ddl_manager(); /* Don't free key definitions */ - if (m_key_descr_arr) - { - for (uint i= 0; i < m_key_count; i++) { + if (m_key_descr_arr) { + for (uint i = 0; i < m_key_count; i++) { if (ddl_manager && m_key_descr_arr[i]) { ddl_manager->erase_index_num(m_key_descr_arr[i]->get_gl_index_id()); } - m_key_descr_arr[i]= nullptr; + m_key_descr_arr[i] = nullptr; } delete[] m_key_descr_arr; - m_key_descr_arr= nullptr; + m_key_descr_arr = nullptr; } } @@ -2855,24 +2586,22 @@ Rdb_tbl_def::~Rdb_tbl_def() ( cf_id, index_nr ) */ -bool Rdb_tbl_def::put_dict(Rdb_dict_manager* const dict, - rocksdb::WriteBatch* const batch, - uchar* const key, const size_t &keylen) -{ +bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict, + rocksdb::WriteBatch *const batch, uchar *const key, + const size_t &keylen) { StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes; indexes.alloc(Rdb_key_def::VERSION_SIZE + m_key_count * Rdb_key_def::PACKED_SIZE * 2); rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION); - for (uint i = 0; i < m_key_count; i++) - { - const Rdb_key_def& kd= *m_key_descr_arr[i]; + for (uint i = 0; i < m_key_count; i++) { + const Rdb_key_def &kd = *m_key_descr_arr[i]; const uchar flags = - (kd.m_is_reverse_cf ? Rdb_key_def::REVERSE_CF_FLAG : 0) | - (kd.m_is_auto_cf ? Rdb_key_def::AUTO_CF_FLAG : 0); + (kd.m_is_reverse_cf ? Rdb_key_def::REVERSE_CF_FLAG : 0) | + (kd.m_is_auto_cf ? Rdb_key_def::AUTO_CF_FLAG : 0); - const uint cf_id= kd.get_cf()->GetID(); + const uint cf_id = kd.get_cf()->GetID(); /* If cf_id already exists, cf_flags must be the same. To prevent race condition, reading/modifying/committing CF flags @@ -2881,19 +2610,16 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager* const dict, control, we can switch to use it and removing mutex. */ uint existing_cf_flags; - if (dict->get_cf_flags(cf_id, &existing_cf_flags)) - { - if (existing_cf_flags != flags) - { + if (dict->get_cf_flags(cf_id, &existing_cf_flags)) { + if (existing_cf_flags != flags) { my_printf_error(ER_UNKNOWN_ERROR, "Column Family Flag is different from existing flag. " "Assign a new CF flag, or do not change existing " - "CF flag.", MYF(0)); + "CF flag.", + MYF(0)); return true; } - } - else - { + } else { dict->add_cf_flags(batch, cf_id, flags); } @@ -2904,37 +2630,33 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager* const dict, kd.m_index_number, cf_id); } - const rocksdb::Slice skey((char*)key, keylen); + const rocksdb::Slice skey((char *)key, keylen); const rocksdb::Slice svalue(indexes.c_ptr(), indexes.length()); dict->put_key(batch, skey, svalue); return false; } -void Rdb_tbl_def::check_if_is_mysql_system_table() -{ +void Rdb_tbl_def::check_if_is_mysql_system_table() { static const char *const system_dbs[] = { - "mysql", - "performance_schema", - "information_schema", + "mysql", "performance_schema", "information_schema", }; - m_is_mysql_system_table= false; + m_is_mysql_system_table = false; for (uint ii = 0; ii < array_elements(system_dbs); ii++) { if (strcmp(m_dbname.c_str(), system_dbs[ii]) == 0) { - m_is_mysql_system_table= true; + m_is_mysql_system_table = true; break; } } } -void Rdb_tbl_def::set_name(const std::string& name) -{ - int err __attribute__((__unused__)); +void Rdb_tbl_def::set_name(const std::string &name) { + int err MY_ATTRIBUTE((__unused__)); - m_dbname_tablename= name; - err= rdb_split_normalized_tablename(name, &m_dbname, &m_tablename, - &m_partition); + m_dbname_tablename = name; + err = rdb_split_normalized_tablename(name, &m_dbname, &m_tablename, + &m_partition); DBUG_ASSERT(err == 0); check_if_is_mysql_system_table(); @@ -2946,68 +2668,53 @@ void Rdb_tbl_def::set_name(const std::string& name) It manufactures a key (db+table name in our case) from a record (Rdb_tbl_def in our case). */ -const uchar* Rdb_ddl_manager::get_hash_key( - Rdb_tbl_def* const rec, size_t* const length, - my_bool not_used __attribute__((__unused__))) -{ - const std::string& dbname_tablename= rec->full_tablename(); - *length= dbname_tablename.size(); - return reinterpret_cast<const uchar*>(dbname_tablename.c_str()); +const uchar * +Rdb_ddl_manager::get_hash_key(Rdb_tbl_def *const rec, size_t *const length, + my_bool not_used MY_ATTRIBUTE((__unused__))) { + const std::string &dbname_tablename = rec->full_tablename(); + *length = dbname_tablename.size(); + return reinterpret_cast<const uchar *>(dbname_tablename.c_str()); } - /* Static function of type void (*my_hash_free_element_func_t)(void*) that gets invoked by the m_ddl_hash object of type my_core::HASH. It deletes a record (Rdb_tbl_def in our case). */ -void Rdb_ddl_manager::free_hash_elem(void* const data) -{ - Rdb_tbl_def* elem= reinterpret_cast<Rdb_tbl_def*>(data); +void Rdb_ddl_manager::free_hash_elem(void *const data) { + Rdb_tbl_def *elem = reinterpret_cast<Rdb_tbl_def *>(data); delete elem; } -void Rdb_ddl_manager::erase_index_num(const GL_INDEX_ID &gl_index_id) -{ +void Rdb_ddl_manager::erase_index_num(const GL_INDEX_ID &gl_index_id) { m_index_num_to_keydef.erase(gl_index_id); } - -namespace // anonymous namespace = not visible outside this source file +namespace // anonymous namespace = not visible outside this source file { -struct Rdb_validate_tbls : public Rdb_tables_scanner -{ - using tbl_info_t= std::pair<std::string, bool>; - using tbl_list_t= std::map<std::string, std::set<tbl_info_t>>; +struct Rdb_validate_tbls : public Rdb_tables_scanner { + using tbl_info_t = std::pair<std::string, bool>; + using tbl_list_t = std::map<std::string, std::set<tbl_info_t>>; tbl_list_t m_list; - int add_table(Rdb_tbl_def* tdef) override; + int add_table(Rdb_tbl_def *tdef) override; - bool compare_to_actual_tables( - const std::string& datadir, - bool* has_errors); + bool compare_to_actual_tables(const std::string &datadir, bool *has_errors); - bool scan_for_frms( - const std::string& datadir, - const std::string& dbname, - bool* has_errors); + bool scan_for_frms(const std::string &datadir, const std::string &dbname, + bool *has_errors); - bool check_frm_file( - const std::string& fullpath, - const std::string& dbname, - const std::string& tablename, - bool* has_errors); + bool check_frm_file(const std::string &fullpath, const std::string &dbname, + const std::string &tablename, bool *has_errors); }; -} // anonymous namespace - +} // anonymous namespace /* Get a list of tables that we expect to have .frm files for. This will use the information just read from the RocksDB data dictionary. */ -int Rdb_validate_tbls::add_table(Rdb_tbl_def* tdef) -{ +int Rdb_validate_tbls::add_table(Rdb_tbl_def *tdef) { DBUG_ASSERT(tdef != nullptr); /* Add the database/table into the list */ @@ -3015,19 +2722,17 @@ int Rdb_validate_tbls::add_table(Rdb_tbl_def* tdef) m_list[tdef->base_dbname()].insert( tbl_info_t(tdef->base_tablename(), is_partition)); - return 0; + return HA_EXIT_SUCCESS; } /* Access the .frm file for this dbname/tablename and see if it is a RocksDB table (or partition table). */ -bool Rdb_validate_tbls::check_frm_file( - const std::string& fullpath, - const std::string& dbname, - const std::string& tablename, - bool* has_errors) -{ +bool Rdb_validate_tbls::check_frm_file(const std::string &fullpath, + const std::string &dbname, + const std::string &tablename, + bool *has_errors) { /* Check this .frm file to see what engine it uses */ String fullfilename(fullpath.c_str(), &my_charset_bin); fullfilename.append(FN_DIRSEP); @@ -3042,42 +2747,34 @@ bool Rdb_validate_tbls::check_frm_file( */ enum legacy_db_type eng_type; frm_type_enum type = dd_frm_type(nullptr, fullfilename.c_ptr(), &eng_type); - if (type == FRMTYPE_ERROR) - { + if (type == FRMTYPE_ERROR) { sql_print_warning("RocksDB: Failed to open/read .from file: %s", - fullfilename.ptr()); + fullfilename.ptr()); return false; } - if (type == FRMTYPE_TABLE) - { + if (type == FRMTYPE_TABLE) { /* For a RocksDB table do we have a reference in the data dictionary? */ - if (eng_type == DB_TYPE_ROCKSDB) - { + if (eng_type == DB_TYPE_ROCKSDB) { /* Attempt to remove the table entry from the list of tables. If this fails then we know we had a .frm file that wasn't registered in RocksDB. */ tbl_info_t element(tablename, false); - if (m_list.count(dbname) == 0 || - m_list[dbname].erase(element) == 0) - { + if (m_list.count(dbname) == 0 || m_list[dbname].erase(element) == 0) { sql_print_warning("RocksDB: Schema mismatch - " "A .frm file exists for table %s.%s, " "but that table is not registered in RocksDB", dbname.c_str(), tablename.c_str()); *has_errors = true; } - } - else if (eng_type == DB_TYPE_PARTITION_DB) - { + } else if (eng_type == DB_TYPE_PARTITION_DB) { /* For partition tables, see if it is in the m_list as a partition, but don't generate an error if it isn't there - we don't know that the .frm is for RocksDB. */ - if (m_list.count(dbname) > 0) - { + if (m_list.count(dbname) > 0) { m_list[dbname].erase(tbl_info_t(tablename, true)); } } @@ -3087,38 +2784,32 @@ bool Rdb_validate_tbls::check_frm_file( } /* Scan the database subdirectory for .frm files */ -bool Rdb_validate_tbls::scan_for_frms( - const std::string& datadir, - const std::string& dbname, - bool* has_errors) -{ - bool result = true; - std::string fullpath = datadir + dbname; - struct st_my_dir* dir_info = my_dir(fullpath.c_str(), MYF(MY_DONT_SORT)); +bool Rdb_validate_tbls::scan_for_frms(const std::string &datadir, + const std::string &dbname, + bool *has_errors) { + bool result = true; + std::string fullpath = datadir + dbname; + struct st_my_dir *dir_info = my_dir(fullpath.c_str(), MYF(MY_DONT_SORT)); /* Access the directory */ - if (dir_info == nullptr) - { + if (dir_info == nullptr) { sql_print_warning("RocksDB: Could not open database directory: %s", - fullpath.c_str()); + fullpath.c_str()); return false; } /* Scan through the files in the directory */ - struct fileinfo* file_info = dir_info->dir_entry; - for (uint ii = 0; ii < dir_info->number_off_files; ii++, file_info++) - { + struct fileinfo *file_info = dir_info->dir_entry; + for (uint ii = 0; ii < dir_info->number_off_files; ii++, file_info++) { /* Find .frm files that are not temp files (those that start with '#') */ - const char* ext = strrchr(file_info->name, '.'); + const char *ext = strrchr(file_info->name, '.'); if (ext != nullptr && !is_prefix(file_info->name, tmp_file_prefix) && - strcmp(ext, ".frm") == 0) - { - std::string tablename = std::string(file_info->name, - ext - file_info->name); + strcmp(ext, ".frm") == 0) { + std::string tablename = + std::string(file_info->name, ext - file_info->name); /* Check to see if the .frm file is from RocksDB */ - if (!check_frm_file(fullpath, dbname, tablename, has_errors)) - { + if (!check_frm_file(fullpath, dbname, tablename, has_errors)) { result = false; break; } @@ -3126,8 +2817,7 @@ bool Rdb_validate_tbls::scan_for_frms( } /* Remove any databases who have no more tables listed */ - if (m_list.count(dbname) == 1 && m_list[dbname].size() == 0) - { + if (m_list.count(dbname) == 1 && m_list[dbname].size() == 0) { m_list.erase(dbname); } @@ -3141,24 +2831,20 @@ bool Rdb_validate_tbls::scan_for_frms( Scan the datadir for all databases (subdirectories) and get a list of .frm files they contain */ -bool Rdb_validate_tbls::compare_to_actual_tables( - const std::string& datadir, - bool* has_errors) -{ - bool result = true; - struct st_my_dir* dir_info; - struct fileinfo* file_info; +bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir, + bool *has_errors) { + bool result = true; + struct st_my_dir *dir_info; + struct fileinfo *file_info; dir_info = my_dir(datadir.c_str(), MYF(MY_DONT_SORT | MY_WANT_STAT)); - if (dir_info == nullptr) - { + if (dir_info == nullptr) { sql_print_warning("RocksDB: could not open datadir: %s", datadir.c_str()); return false; } file_info = dir_info->dir_entry; - for (uint ii = 0; ii < dir_info->number_off_files; ii++, file_info++) - { + for (uint ii = 0; ii < dir_info->number_off_files; ii++, file_info++) { /* Ignore files/dirs starting with '.' */ if (file_info->name[0] == '.') continue; @@ -3168,8 +2854,7 @@ bool Rdb_validate_tbls::compare_to_actual_tables( continue; /* Scan all the .frm files in the directory */ - if (!scan_for_frms(datadir, file_info->name, has_errors)) - { + if (!scan_for_frms(datadir, file_info->name, has_errors)) { result = false; break; } @@ -3185,21 +2870,18 @@ bool Rdb_validate_tbls::compare_to_actual_tables( Validate that all the tables in the RocksDB database dictionary match the .frm files in the datdir */ -bool Rdb_ddl_manager::validate_schemas(void) -{ - bool has_errors= false; - const std::string datadir= std::string(mysql_real_data_home); +bool Rdb_ddl_manager::validate_schemas(void) { + bool has_errors = false; + const std::string datadir = std::string(mysql_real_data_home); Rdb_validate_tbls table_list; /* Get the list of tables from the database dictionary */ - if (scan_for_tables(&table_list) != 0) - { + if (scan_for_tables(&table_list) != 0) { return false; } /* Compare that to the list of actual .frm files */ - if (!table_list.compare_to_actual_tables(datadir, &has_errors)) - { + if (!table_list.compare_to_actual_tables(datadir, &has_errors)) { return false; } @@ -3207,14 +2889,12 @@ bool Rdb_ddl_manager::validate_schemas(void) Any tables left in the tables list are ones that are registered in RocksDB but don't have .frm files. */ - for (const auto& db : table_list.m_list) - { - for (const auto& table : db.second) - { + for (const auto &db : table_list.m_list) { + for (const auto &table : db.second) { sql_print_warning("RocksDB: Schema mismatch - " "Table %s.%s is registered in RocksDB " - "but does not have a .frm file", db.first.c_str(), - table.first.c_str()); + "but does not have a .frm file", + db.first.c_str(), table.first.c_str()); has_errors = true; } } @@ -3222,111 +2902,99 @@ bool Rdb_ddl_manager::validate_schemas(void) return !has_errors; } -bool Rdb_ddl_manager::init(Rdb_dict_manager* const dict_arg, - Rdb_cf_manager* const cf_manager, - const uint32_t &validate_tables) -{ - const ulong TABLE_HASH_SIZE= 32; - m_dict= dict_arg; +bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, + Rdb_cf_manager *const cf_manager, + const uint32_t &validate_tables) { + const ulong TABLE_HASH_SIZE = 32; + m_dict = dict_arg; mysql_rwlock_init(0, &m_rwlock); - (void) my_hash_init(&m_ddl_hash, - /*system_charset_info*/ &my_charset_bin, - TABLE_HASH_SIZE, 0, 0, - (my_hash_get_key) Rdb_ddl_manager::get_hash_key, - Rdb_ddl_manager::free_hash_elem, - 0); + (void)my_hash_init(&m_ddl_hash, + /*system_charset_info*/ &my_charset_bin, TABLE_HASH_SIZE, + 0, 0, (my_hash_get_key)Rdb_ddl_manager::get_hash_key, + Rdb_ddl_manager::free_hash_elem, 0); /* Read the data dictionary and populate the hash */ uchar ddl_entry[Rdb_key_def::INDEX_NUMBER_SIZE]; rdb_netbuf_store_index(ddl_entry, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); - const rocksdb::Slice ddl_entry_slice((char*)ddl_entry, - Rdb_key_def::INDEX_NUMBER_SIZE); + const rocksdb::Slice ddl_entry_slice((char *)ddl_entry, + Rdb_key_def::INDEX_NUMBER_SIZE); /* Reading data dictionary should always skip bloom filter */ - rocksdb::Iterator* it= m_dict->new_iterator(); - int i= 0; + rocksdb::Iterator *it = m_dict->new_iterator(); + int i = 0; - uint max_index_id_in_dict= 0; + uint max_index_id_in_dict = 0; m_dict->get_max_index_id(&max_index_id_in_dict); - for (it->Seek(ddl_entry_slice); it->Valid(); it->Next()) - { + for (it->Seek(ddl_entry_slice); it->Valid(); it->Next()) { const uchar *ptr; const uchar *ptr_end; - const rocksdb::Slice key= it->key(); - const rocksdb::Slice val= it->value(); + const rocksdb::Slice key = it->key(); + const rocksdb::Slice val = it->value(); if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE && memcmp(key.data(), ddl_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) break; - if (key.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) - { + if (key.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) { sql_print_error("RocksDB: Table_store: key has length %d (corruption?)", (int)key.size()); return true; } - Rdb_tbl_def* const tdef= - new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE); + Rdb_tbl_def *const tdef = + new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE); // Now, read the DDLs. - const int real_val_size= val.size() - Rdb_key_def::VERSION_SIZE; - if (real_val_size % Rdb_key_def::PACKED_SIZE*2) - { + const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE; + if (real_val_size % Rdb_key_def::PACKED_SIZE * 2) { sql_print_error("RocksDB: Table_store: invalid keylist for table %s", tdef->full_tablename().c_str()); return true; } - tdef->m_key_count= real_val_size / (Rdb_key_def::PACKED_SIZE*2); - tdef->m_key_descr_arr= new std::shared_ptr<Rdb_key_def>[tdef->m_key_count]; + tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2); + tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count]; - ptr= reinterpret_cast<const uchar*>(val.data()); - const int version= rdb_netbuf_read_uint16(&ptr); - if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) - { + ptr = reinterpret_cast<const uchar *>(val.data()); + const int version = rdb_netbuf_read_uint16(&ptr); + if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) { sql_print_error("RocksDB: DDL ENTRY Version was not expected." "Expected: %d, Actual: %d", Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version); return true; } - ptr_end= ptr + real_val_size; - for (uint keyno= 0; ptr < ptr_end; keyno++) - { + ptr_end = ptr + real_val_size; + for (uint keyno = 0; ptr < ptr_end; keyno++) { GL_INDEX_ID gl_index_id; rdb_netbuf_read_gl_index(&ptr, &gl_index_id); - uint16 m_index_dict_version= 0; - uchar m_index_type= 0; - uint16 kv_version= 0; - uint flags= 0; + uint16 m_index_dict_version = 0; + uchar m_index_type = 0; + uint16 kv_version = 0; + uint flags = 0; if (!m_dict->get_index_info(gl_index_id, &m_index_dict_version, - &m_index_type, &kv_version)) - { + &m_index_type, &kv_version)) { sql_print_error("RocksDB: Could not get index information " "for Index Number (%u,%u), table %s", gl_index_id.cf_id, gl_index_id.index_id, tdef->full_tablename().c_str()); return true; } - if (max_index_id_in_dict < gl_index_id.index_id) - { + if (max_index_id_in_dict < gl_index_id.index_id) { sql_print_error("RocksDB: Found max index id %u from data dictionary " "but also found larger index id %u from dictionary. " "This should never happen and possibly a bug.", max_index_id_in_dict, gl_index_id.index_id); return true; } - if (!m_dict->get_cf_flags(gl_index_id.cf_id, &flags)) - { + if (!m_dict->get_cf_flags(gl_index_id.cf_id, &flags)) { sql_print_error("RocksDB: Could not get Column Family Flags " "for CF Number %d, table %s", - gl_index_id.cf_id, - tdef->full_tablename().c_str()); + gl_index_id.cf_id, tdef->full_tablename().c_str()); return true; } - rocksdb::ColumnFamilyHandle* const cfh = - cf_manager->get_cf(gl_index_id.cf_id); + rocksdb::ColumnFamilyHandle *const cfh = + cf_manager->get_cf(gl_index_id.cf_id); DBUG_ASSERT(cfh != nullptr); /* @@ -3334,13 +3002,11 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager* const dict_arg, initialization requires that there is an open TABLE* where we could look at Field* objects and set max_length and other attributes */ - tdef->m_key_descr_arr[keyno]= - std::make_shared<Rdb_key_def>(gl_index_id.index_id, keyno, cfh, - m_index_dict_version, - m_index_type, kv_version, - flags & Rdb_key_def::REVERSE_CF_FLAG, - flags & Rdb_key_def::AUTO_CF_FLAG, "", - m_dict->get_stats(gl_index_id)); + tdef->m_key_descr_arr[keyno] = std::make_shared<Rdb_key_def>( + gl_index_id.index_id, keyno, cfh, m_index_dict_version, m_index_type, + kv_version, flags & Rdb_key_def::REVERSE_CF_FLAG, + flags & Rdb_key_def::AUTO_CF_FLAG, "", + m_dict->get_stats(gl_index_id)); } put(tdef); i++; @@ -3360,40 +3026,34 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager* const dict_arg, // index ids used by applications should not conflict with // data dictionary index ids - if (max_index_id_in_dict < Rdb_key_def::END_DICT_INDEX_ID) - { - max_index_id_in_dict= Rdb_key_def::END_DICT_INDEX_ID; + if (max_index_id_in_dict < Rdb_key_def::END_DICT_INDEX_ID) { + max_index_id_in_dict = Rdb_key_def::END_DICT_INDEX_ID; } - m_sequence.init(max_index_id_in_dict+1); + m_sequence.init(max_index_id_in_dict + 1); - if (!it->status().ok()) - { - const std::string s= it->status().ToString(); + if (!it->status().ok()) { + const std::string s = it->status().ToString(); sql_print_error("RocksDB: Table_store: load error: %s", s.c_str()); return true; } delete it; - sql_print_information("RocksDB: Table_store: loaded DDL data for %d tables", i); + sql_print_information("RocksDB: Table_store: loaded DDL data for %d tables", + i); return false; } - -Rdb_tbl_def* Rdb_ddl_manager::find(const std::string& table_name, - const bool &lock) -{ - if (lock) - { +Rdb_tbl_def *Rdb_ddl_manager::find(const std::string &table_name, + const bool &lock) { + if (lock) { mysql_rwlock_rdlock(&m_rwlock); } - Rdb_tbl_def* const rec= reinterpret_cast<Rdb_tbl_def*>( - my_hash_search(&m_ddl_hash, - reinterpret_cast<const uchar*>(table_name.c_str()), - table_name.size())); + Rdb_tbl_def *const rec = reinterpret_cast<Rdb_tbl_def *>(my_hash_search( + &m_ddl_hash, reinterpret_cast<const uchar *>(table_name.c_str()), + table_name.size())); - if (lock) - { + if (lock) { mysql_rwlock_unlock(&m_rwlock); } @@ -3404,22 +3064,18 @@ Rdb_tbl_def* Rdb_ddl_manager::find(const std::string& table_name, // lock on m_rwlock to make sure the Rdb_key_def is not discarded while we // are finding it. Copying it into 'ret' increments the count making sure // that the object will not be discarded until we are finished with it. -std::shared_ptr<const Rdb_key_def> Rdb_ddl_manager::safe_find( - GL_INDEX_ID gl_index_id) -{ +std::shared_ptr<const Rdb_key_def> +Rdb_ddl_manager::safe_find(GL_INDEX_ID gl_index_id) { std::shared_ptr<const Rdb_key_def> ret(nullptr); mysql_rwlock_rdlock(&m_rwlock); - auto it= m_index_num_to_keydef.find(gl_index_id); - if (it != m_index_num_to_keydef.end()) - { + auto it = m_index_num_to_keydef.find(gl_index_id); + if (it != m_index_num_to_keydef.end()) { const auto table_def = find(it->second.first, false); - if (table_def && it->second.second < table_def->m_key_count) - { - const auto &kd= table_def->m_key_descr_arr[it->second.second]; - if (kd->max_storage_fmt_length() != 0) - { + if (table_def && it->second.second < table_def->m_key_count) { + const auto &kd = table_def->m_key_descr_arr[it->second.second]; + if (kd->max_storage_fmt_length() != 0) { ret = kd; } } @@ -3431,10 +3087,9 @@ std::shared_ptr<const Rdb_key_def> Rdb_ddl_manager::safe_find( } // this method assumes at least read-only lock on m_rwlock -const std::shared_ptr<Rdb_key_def>& Rdb_ddl_manager::find( - GL_INDEX_ID gl_index_id) -{ - auto it= m_index_num_to_keydef.find(gl_index_id); +const std::shared_ptr<Rdb_key_def> & +Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) { + auto it = m_index_num_to_keydef.find(gl_index_id); if (it != m_index_num_to_keydef.end()) { auto table_def = find(it->second.first, false); if (table_def) { @@ -3450,11 +3105,10 @@ const std::shared_ptr<Rdb_key_def>& Rdb_ddl_manager::find( } void Rdb_ddl_manager::set_stats( - const std::unordered_map<GL_INDEX_ID, Rdb_index_stats>& stats) -{ + const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats) { mysql_rwlock_wrlock(&m_rwlock); for (auto src : stats) { - const auto& keydef = find(src.second.m_gl_index_id); + const auto &keydef = find(src.second.m_gl_index_id); if (keydef) { keydef->m_stats = src.second; m_stats2store[keydef->m_stats.m_gl_index_id] = keydef->m_stats; @@ -3464,35 +3118,29 @@ void Rdb_ddl_manager::set_stats( } void Rdb_ddl_manager::adjust_stats( - const std::vector<Rdb_index_stats>& new_data, - const std::vector<Rdb_index_stats>& deleted_data) -{ + const std::vector<Rdb_index_stats> &new_data, + const std::vector<Rdb_index_stats> &deleted_data) { mysql_rwlock_wrlock(&m_rwlock); int i = 0; - for (const auto& data : {new_data, deleted_data}) - { - for (const auto& src : data) - { - const auto& keydef= find(src.m_gl_index_id); - if (keydef) - { + for (const auto &data : {new_data, deleted_data}) { + for (const auto &src : data) { + const auto &keydef = find(src.m_gl_index_id); + if (keydef) { keydef->m_stats.merge(src, i == 0, keydef->max_storage_fmt_length()); m_stats2store[keydef->m_stats.m_gl_index_id] = keydef->m_stats; } } i++; } - const bool should_save_stats= !m_stats2store.empty(); + const bool should_save_stats = !m_stats2store.empty(); mysql_rwlock_unlock(&m_rwlock); - if (should_save_stats) - { + if (should_save_stats) { // Queue an async persist_stats(false) call to the background thread. rdb_queue_save_stats_request(); } } -void Rdb_ddl_manager::persist_stats(const bool &sync) -{ +void Rdb_ddl_manager::persist_stats(const bool &sync) { mysql_rwlock_wrlock(&m_rwlock); const auto local_stats2store = std::move(m_stats2store); m_stats2store.clear(); @@ -3501,12 +3149,11 @@ void Rdb_ddl_manager::persist_stats(const bool &sync) // Persist stats const std::unique_ptr<rocksdb::WriteBatch> wb = m_dict->begin(); std::vector<Rdb_index_stats> stats; - std::transform( - local_stats2store.begin(), local_stats2store.end(), - std::back_inserter(stats), - []( - const std::pair<GL_INDEX_ID, Rdb_index_stats>& s - ) {return s.second;}); + std::transform(local_stats2store.begin(), local_stats2store.end(), + std::back_inserter(stats), + [](const std::pair<GL_INDEX_ID, Rdb_index_stats> &s) { + return s.second; + }); m_dict->add_stats(wb.get(), stats); m_dict->commit(wb.get(), sync); } @@ -3516,32 +3163,28 @@ void Rdb_ddl_manager::persist_stats(const bool &sync) on-disk data dictionary. */ -int Rdb_ddl_manager::put_and_write(Rdb_tbl_def* const tbl, - rocksdb::WriteBatch* const batch) -{ +int Rdb_ddl_manager::put_and_write(Rdb_tbl_def *const tbl, + rocksdb::WriteBatch *const batch) { uchar buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE]; - uint pos= 0; + uint pos = 0; rdb_netbuf_store_index(buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); - pos+= Rdb_key_def::INDEX_NUMBER_SIZE; + pos += Rdb_key_def::INDEX_NUMBER_SIZE; - const std::string& dbname_tablename= tbl->full_tablename(); + const std::string &dbname_tablename = tbl->full_tablename(); memcpy(buf + pos, dbname_tablename.c_str(), dbname_tablename.size()); pos += dbname_tablename.size(); int res; - if ((res= tbl->put_dict(m_dict, batch, buf, pos))) - { + if ((res = tbl->put_dict(m_dict, batch, buf, pos))) { return res; } - if ((res= put(tbl))) - { + if ((res = put(tbl))) { return res; } - return 0; + return HA_EXIT_SUCCESS; } - /* Return 0 - ok, other value - error */ /* TODO: This function modifies m_ddl_hash and m_index_num_to_keydef. @@ -3549,28 +3192,26 @@ int Rdb_ddl_manager::put_and_write(Rdb_tbl_def* const tbl, See the discussion here: https://reviews.facebook.net/D35925#inline-259167 Tracked by https://github.com/facebook/mysql-5.6/issues/33 */ -int Rdb_ddl_manager::put(Rdb_tbl_def* const tbl, const bool &lock) -{ +int Rdb_ddl_manager::put(Rdb_tbl_def *const tbl, const bool &lock) { Rdb_tbl_def *rec; my_bool result; - const std::string& dbname_tablename= tbl->full_tablename(); + const std::string &dbname_tablename = tbl->full_tablename(); if (lock) mysql_rwlock_wrlock(&m_rwlock); // We have to do this find because 'tbl' is not yet in the list. We need // to find the one we are replacing ('rec') - rec= find(dbname_tablename, false); - if (rec) - { + rec = find(dbname_tablename, false); + if (rec) { // this will free the old record. - my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar*>(rec)); + my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar *>(rec)); } - result= my_hash_insert(&m_ddl_hash, reinterpret_cast<uchar*>(tbl)); + result = my_hash_insert(&m_ddl_hash, reinterpret_cast<uchar *>(tbl)); - for (uint keyno= 0; keyno < tbl->m_key_count; keyno++) { - m_index_num_to_keydef[tbl->m_key_descr_arr[keyno]->get_gl_index_id()]= - std::make_pair(dbname_tablename, keyno); + for (uint keyno = 0; keyno < tbl->m_key_count; keyno++) { + m_index_num_to_keydef[tbl->m_key_descr_arr[keyno]->get_gl_index_id()] = + std::make_pair(dbname_tablename, keyno); } if (lock) @@ -3578,91 +3219,81 @@ int Rdb_ddl_manager::put(Rdb_tbl_def* const tbl, const bool &lock) return result; } - -void Rdb_ddl_manager::remove(Rdb_tbl_def* const tbl, - rocksdb::WriteBatch * const batch, - const bool &lock) -{ +void Rdb_ddl_manager::remove(Rdb_tbl_def *const tbl, + rocksdb::WriteBatch *const batch, + const bool &lock) { if (lock) mysql_rwlock_wrlock(&m_rwlock); uchar buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE]; - uint pos= 0; + uint pos = 0; rdb_netbuf_store_index(buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); - pos+= Rdb_key_def::INDEX_NUMBER_SIZE; + pos += Rdb_key_def::INDEX_NUMBER_SIZE; - const std::string& dbname_tablename= tbl->full_tablename(); + const std::string &dbname_tablename = tbl->full_tablename(); memcpy(buf + pos, dbname_tablename.c_str(), dbname_tablename.size()); pos += dbname_tablename.size(); - const rocksdb::Slice tkey((char*)buf, pos); + const rocksdb::Slice tkey((char *)buf, pos); m_dict->delete_key(batch, tkey); /* The following will also delete the object: */ - my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar*>(tbl)); + my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar *>(tbl)); if (lock) mysql_rwlock_unlock(&m_rwlock); } - -bool Rdb_ddl_manager::rename(const std::string& from, const std::string& to, - rocksdb::WriteBatch* const batch) -{ +bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to, + rocksdb::WriteBatch *const batch) { Rdb_tbl_def *rec; Rdb_tbl_def *new_rec; - bool res= true; + bool res = true; uchar new_buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE]; - uint new_pos= 0; + uint new_pos = 0; mysql_rwlock_wrlock(&m_rwlock); - if (!(rec= find(from, false))) - { + if (!(rec = find(from, false))) { mysql_rwlock_unlock(&m_rwlock); return true; } - new_rec= new Rdb_tbl_def(to); + new_rec = new Rdb_tbl_def(to); - new_rec->m_key_count= rec->m_key_count; - new_rec->m_auto_incr_val= - rec->m_auto_incr_val.load(std::memory_order_relaxed); - new_rec->m_key_descr_arr= rec->m_key_descr_arr; + new_rec->m_key_count = rec->m_key_count; + new_rec->m_auto_incr_val = + rec->m_auto_incr_val.load(std::memory_order_relaxed); + new_rec->m_key_descr_arr = rec->m_key_descr_arr; // so that it's not free'd when deleting the old rec - rec->m_key_descr_arr= nullptr; + rec->m_key_descr_arr = nullptr; // Create a new key rdb_netbuf_store_index(new_buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER); - new_pos+= Rdb_key_def::INDEX_NUMBER_SIZE; + new_pos += Rdb_key_def::INDEX_NUMBER_SIZE; - const std::string& dbname_tablename= new_rec->full_tablename(); + const std::string &dbname_tablename = new_rec->full_tablename(); memcpy(new_buf + new_pos, dbname_tablename.c_str(), dbname_tablename.size()); new_pos += dbname_tablename.size(); // Create a key to add - if (!new_rec->put_dict(m_dict, batch, new_buf, new_pos)) - { + if (!new_rec->put_dict(m_dict, batch, new_buf, new_pos)) { remove(rec, batch, false); put(new_rec, false); - res= false; // ok + res = false; // ok } mysql_rwlock_unlock(&m_rwlock); return res; } - -void Rdb_ddl_manager::cleanup() -{ +void Rdb_ddl_manager::cleanup() { my_hash_free(&m_ddl_hash); mysql_rwlock_destroy(&m_rwlock); m_sequence.cleanup(); } - -int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner* const tables_scanner) -{ +int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner *const tables_scanner) { int i, ret; Rdb_tbl_def *rec; @@ -3670,12 +3301,11 @@ int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner* const tables_scanner) mysql_rwlock_rdlock(&m_rwlock); - ret= 0; - i= 0; + ret = 0; + i = 0; - while ((rec = reinterpret_cast<Rdb_tbl_def*>(my_hash_element(&m_ddl_hash, - i)))) - { + while (( + rec = reinterpret_cast<Rdb_tbl_def *>(my_hash_element(&m_ddl_hash, i)))) { ret = tables_scanner->add_table(rec); if (ret) break; @@ -3686,25 +3316,21 @@ int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner* const tables_scanner) return ret; } - /* Rdb_binlog_manager class implementation */ -bool Rdb_binlog_manager::init(Rdb_dict_manager* const dict_arg) -{ +bool Rdb_binlog_manager::init(Rdb_dict_manager *const dict_arg) { DBUG_ASSERT(dict_arg != nullptr); - m_dict= dict_arg; + m_dict = dict_arg; rdb_netbuf_store_index(m_key_buf, Rdb_key_def::BINLOG_INFO_INDEX_NUMBER); - m_key_slice = rocksdb::Slice(reinterpret_cast<char*>(m_key_buf), + m_key_slice = rocksdb::Slice(reinterpret_cast<char *>(m_key_buf), Rdb_key_def::INDEX_NUMBER_SIZE); return false; } -void Rdb_binlog_manager::cleanup() -{ -} +void Rdb_binlog_manager::cleanup() {} /** Set binlog name, pos and optionally gtid into WriteBatch. @@ -3717,19 +3343,17 @@ void Rdb_binlog_manager::cleanup() @param binlog_gtid Binlog max GTID @param batch WriteBatch */ -void Rdb_binlog_manager::update(const char* const binlog_name, +void Rdb_binlog_manager::update(const char *const binlog_name, const my_off_t binlog_pos, - const char* const binlog_max_gtid, - rocksdb::WriteBatchBase* const batch) -{ - if (binlog_name && binlog_pos) - { + const char *const binlog_max_gtid, + rocksdb::WriteBatchBase *const batch) { + if (binlog_name && binlog_pos) { // max binlog length (512) + binlog pos (4) + binlog gtid (57) < 1024 - const size_t RDB_MAX_BINLOG_INFO_LEN= 1024; - uchar value_buf[RDB_MAX_BINLOG_INFO_LEN]; - m_dict->put_key(batch, m_key_slice, - pack_value(value_buf, binlog_name, - binlog_pos, binlog_max_gtid)); + const size_t RDB_MAX_BINLOG_INFO_LEN = 1024; + uchar value_buf[RDB_MAX_BINLOG_INFO_LEN]; + m_dict->put_key( + batch, m_key_slice, + pack_value(value_buf, binlog_name, binlog_pos, binlog_max_gtid)); } } @@ -3742,20 +3366,17 @@ void Rdb_binlog_manager::update(const char* const binlog_name, true is binlog info was found (valid behavior) false otherwise */ -bool Rdb_binlog_manager::read(char* const binlog_name, - my_off_t* const binlog_pos, - char* const binlog_gtid) const -{ - bool ret= false; - if (binlog_name) - { +bool Rdb_binlog_manager::read(char *const binlog_name, + my_off_t *const binlog_pos, + char *const binlog_gtid) const { + bool ret = false; + if (binlog_name) { std::string value; - rocksdb::Status status= m_dict->get_value(m_key_slice, &value); - if(status.ok()) - { - if (!unpack_value((const uchar*)value.c_str(), - binlog_name, binlog_pos, binlog_gtid)) - ret= true; + rocksdb::Status status = m_dict->get_value(m_key_slice, &value); + if (status.ok()) { + if (!unpack_value((const uchar *)value.c_str(), binlog_name, binlog_pos, + binlog_gtid)) + ret = true; } } return ret; @@ -3770,13 +3391,11 @@ bool Rdb_binlog_manager::read(char* const binlog_name, @param binlog_gtid Binlog GTID @return rocksdb::Slice converted from buf and its length */ -rocksdb::Slice Rdb_binlog_manager::pack_value(uchar* const buf, - const char* const binlog_name, - const my_off_t &binlog_pos, - const char* const binlog_gtid - ) const -{ - uint pack_len= 0; +rocksdb::Slice +Rdb_binlog_manager::pack_value(uchar *const buf, const char *const binlog_name, + const my_off_t &binlog_pos, + const char *const binlog_gtid) const { + uint pack_len = 0; // store version rdb_netbuf_store_uint16(buf, Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION); @@ -3785,31 +3404,30 @@ rocksdb::Slice Rdb_binlog_manager::pack_value(uchar* const buf, // store binlog file name length DBUG_ASSERT(strlen(binlog_name) <= FN_REFLEN); const uint16_t binlog_name_len = strlen(binlog_name); - rdb_netbuf_store_uint16(buf+pack_len, binlog_name_len); + rdb_netbuf_store_uint16(buf + pack_len, binlog_name_len); pack_len += sizeof(uint16); // store binlog file name - memcpy(buf+pack_len, binlog_name, binlog_name_len); + memcpy(buf + pack_len, binlog_name, binlog_name_len); pack_len += binlog_name_len; // store binlog pos - rdb_netbuf_store_uint32(buf+pack_len, binlog_pos); + rdb_netbuf_store_uint32(buf + pack_len, binlog_pos); pack_len += sizeof(uint32); // store binlog gtid length. // If gtid was not set, store 0 instead - const uint16_t binlog_gtid_len = binlog_gtid? strlen(binlog_gtid) : 0; - rdb_netbuf_store_uint16(buf+pack_len, binlog_gtid_len); + const uint16_t binlog_gtid_len = binlog_gtid ? strlen(binlog_gtid) : 0; + rdb_netbuf_store_uint16(buf + pack_len, binlog_gtid_len); pack_len += sizeof(uint16); - if (binlog_gtid_len > 0) - { + if (binlog_gtid_len > 0) { // store binlog gtid - memcpy(buf+pack_len, binlog_gtid, binlog_gtid_len); + memcpy(buf + pack_len, binlog_gtid, binlog_gtid_len); pack_len += binlog_gtid_len; } - return rocksdb::Slice((char*)buf, pack_len); + return rocksdb::Slice((char *)buf, pack_len); } /** @@ -3820,43 +3438,40 @@ rocksdb::Slice Rdb_binlog_manager::pack_value(uchar* const buf, @param[OUT] binlog_gtid Binlog GTID @return true on error */ -bool Rdb_binlog_manager::unpack_value(const uchar* const value, - char* const binlog_name, - my_off_t* const binlog_pos, - char* const binlog_gtid) const -{ - uint pack_len= 0; +bool Rdb_binlog_manager::unpack_value(const uchar *const value, + char *const binlog_name, + my_off_t *const binlog_pos, + char *const binlog_gtid) const { + uint pack_len = 0; DBUG_ASSERT(binlog_pos != nullptr); // read version - const uint16_t version= rdb_netbuf_to_uint16(value); + const uint16_t version = rdb_netbuf_to_uint16(value); pack_len += Rdb_key_def::VERSION_SIZE; if (version != Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION) return true; // read binlog file name length - const uint16_t binlog_name_len= rdb_netbuf_to_uint16(value+pack_len); + const uint16_t binlog_name_len = rdb_netbuf_to_uint16(value + pack_len); pack_len += sizeof(uint16); - if (binlog_name_len) - { + if (binlog_name_len) { // read and set binlog name - memcpy(binlog_name, value+pack_len, binlog_name_len); - binlog_name[binlog_name_len]= '\0'; + memcpy(binlog_name, value + pack_len, binlog_name_len); + binlog_name[binlog_name_len] = '\0'; pack_len += binlog_name_len; // read and set binlog pos - *binlog_pos= rdb_netbuf_to_uint32(value+pack_len); + *binlog_pos = rdb_netbuf_to_uint32(value + pack_len); pack_len += sizeof(uint32); // read gtid length - const uint16_t binlog_gtid_len= rdb_netbuf_to_uint16(value+pack_len); + const uint16_t binlog_gtid_len = rdb_netbuf_to_uint16(value + pack_len); pack_len += sizeof(uint16); - if (binlog_gtid && binlog_gtid_len > 0) - { + if (binlog_gtid && binlog_gtid_len > 0) { // read and set gtid - memcpy(binlog_gtid, value+pack_len, binlog_gtid_len); - binlog_gtid[binlog_gtid_len]= '\0'; + memcpy(binlog_gtid, value + pack_len, binlog_gtid_len); + binlog_gtid[binlog_gtid_len] = '\0'; pack_len += binlog_gtid_len; } } @@ -3873,15 +3488,14 @@ bool Rdb_binlog_manager::unpack_value(const uchar* const value, @param[IN] write_batch Handle to storage engine writer. */ void Rdb_binlog_manager::update_slave_gtid_info( - const uint &id, const char* const db, const char* const gtid, - rocksdb::WriteBatchBase* const write_batch) -{ + const uint &id, const char *const db, const char *const gtid, + rocksdb::WriteBatchBase *const write_batch) { if (id && db && gtid) { // Make sure that if the slave_gtid_info table exists we have a // pointer to it via m_slave_gtid_info_tbl. if (!m_slave_gtid_info_tbl.load()) { m_slave_gtid_info_tbl.store( - rdb_get_ddl_manager()->find("mysql.slave_gtid_info")); + rdb_get_ddl_manager()->find("mysql.slave_gtid_info")); } if (!m_slave_gtid_info_tbl.load()) { // slave_gtid_info table is not present. Simply return. @@ -3889,26 +3503,26 @@ void Rdb_binlog_manager::update_slave_gtid_info( } DBUG_ASSERT(m_slave_gtid_info_tbl.load()->m_key_count == 1); - const std::shared_ptr<const Rdb_key_def>& kd= + const std::shared_ptr<const Rdb_key_def> &kd = m_slave_gtid_info_tbl.load()->m_key_descr_arr[0]; String value; // Build key - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE + 4]= {0}; - uchar* buf= key_buf; + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE + 4] = {0}; + uchar *buf = key_buf; rdb_netbuf_store_index(buf, kd->get_index_number()); buf += Rdb_key_def::INDEX_NUMBER_SIZE; rdb_netbuf_store_uint32(buf, id); buf += 4; const rocksdb::Slice key_slice = - rocksdb::Slice((const char*)key_buf, buf-key_buf); + rocksdb::Slice((const char *)key_buf, buf - key_buf); // Build value - uchar value_buf[128]= {0}; + uchar value_buf[128] = {0}; DBUG_ASSERT(gtid); - const uint db_len= strlen(db); - const uint gtid_len= strlen(gtid); - buf= value_buf; + const uint db_len = strlen(db); + const uint gtid_len = strlen(gtid); + buf = value_buf; // 1 byte used for flags. Empty here. buf++; @@ -3926,87 +3540,77 @@ void Rdb_binlog_manager::update_slave_gtid_info( memcpy(buf, gtid, gtid_len); buf += gtid_len; const rocksdb::Slice value_slice = - rocksdb::Slice((const char*)value_buf, buf-value_buf); + rocksdb::Slice((const char *)value_buf, buf - value_buf); write_batch->Put(kd->get_cf(), key_slice, value_slice); } } -bool Rdb_dict_manager::init(rocksdb::DB* const rdb_dict, - Rdb_cf_manager* const cf_manager) -{ +bool Rdb_dict_manager::init(rocksdb::DB *const rdb_dict, + Rdb_cf_manager *const cf_manager) { mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); - m_db= rdb_dict; + m_db = rdb_dict; bool is_automatic; - m_system_cfh= cf_manager->get_or_create_cf(m_db, DEFAULT_SYSTEM_CF_NAME, - "", nullptr, &is_automatic); - rdb_netbuf_store_index(m_key_buf_max_index_id, - Rdb_key_def::MAX_INDEX_ID); - m_key_slice_max_index_id= rocksdb::Slice( - reinterpret_cast<char*>(m_key_buf_max_index_id), - Rdb_key_def::INDEX_NUMBER_SIZE); + m_system_cfh = cf_manager->get_or_create_cf(m_db, DEFAULT_SYSTEM_CF_NAME, "", + nullptr, &is_automatic); + rdb_netbuf_store_index(m_key_buf_max_index_id, Rdb_key_def::MAX_INDEX_ID); + m_key_slice_max_index_id = + rocksdb::Slice(reinterpret_cast<char *>(m_key_buf_max_index_id), + Rdb_key_def::INDEX_NUMBER_SIZE); resume_drop_indexes(); rollback_ongoing_index_creation(); return (m_system_cfh == nullptr); } -std::unique_ptr<rocksdb::WriteBatch> Rdb_dict_manager::begin() const -{ +std::unique_ptr<rocksdb::WriteBatch> Rdb_dict_manager::begin() const { return std::unique_ptr<rocksdb::WriteBatch>(new rocksdb::WriteBatch); } -void Rdb_dict_manager::put_key(rocksdb::WriteBatchBase* const batch, +void Rdb_dict_manager::put_key(rocksdb::WriteBatchBase *const batch, const rocksdb::Slice &key, - const rocksdb::Slice &value) const -{ + const rocksdb::Slice &value) const { batch->Put(m_system_cfh, key, value); } rocksdb::Status Rdb_dict_manager::get_value(const rocksdb::Slice &key, - std::string* const value) const -{ + std::string *const value) const { rocksdb::ReadOptions options; - options.total_order_seek= true; + options.total_order_seek = true; return m_db->Get(options, m_system_cfh, key, value); } void Rdb_dict_manager::delete_key(rocksdb::WriteBatchBase *batch, - const rocksdb::Slice &key) const -{ + const rocksdb::Slice &key) const { batch->Delete(m_system_cfh, key); } -rocksdb::Iterator* Rdb_dict_manager::new_iterator() const -{ +rocksdb::Iterator *Rdb_dict_manager::new_iterator() const { /* Reading data dictionary should always skip bloom filter */ rocksdb::ReadOptions read_options; - read_options.total_order_seek= true; + read_options.total_order_seek = true; return m_db->NewIterator(read_options, m_system_cfh); } -int Rdb_dict_manager::commit(rocksdb::WriteBatch* const batch, const bool &sync) -const -{ +int Rdb_dict_manager::commit(rocksdb::WriteBatch *const batch, + const bool &sync) const { if (!batch) - return 1; - int res= 0; + return HA_EXIT_FAILURE; + int res = 0; rocksdb::WriteOptions options; - options.sync= sync; - rocksdb::Status s= m_db->Write(options, batch); - res= !s.ok(); // we return true when something failed - if (res) - { + options.sync = sync; + rocksdb::Status s = m_db->Write(options, batch); + res = !s.ok(); // we return true when something failed + if (res) { rdb_handle_io_error(s, RDB_IO_ERROR_DICT_COMMIT); } batch->Clear(); return res; } -void Rdb_dict_manager::dump_index_id(uchar* const netbuf, +void Rdb_dict_manager::dump_index_id(uchar *const netbuf, Rdb_key_def::DATA_DICT_TYPE dict_type, - const GL_INDEX_ID &gl_index_id) -{ + const GL_INDEX_ID &gl_index_id) { rdb_netbuf_store_uint32(netbuf, dict_type); rdb_netbuf_store_uint32(netbuf + Rdb_key_def::INDEX_NUMBER_SIZE, gl_index_id.cf_id); @@ -4014,127 +3618,116 @@ void Rdb_dict_manager::dump_index_id(uchar* const netbuf, gl_index_id.index_id); } -void Rdb_dict_manager::delete_with_prefix(rocksdb::WriteBatch* const batch, - Rdb_key_def::DATA_DICT_TYPE dict_type, - const GL_INDEX_ID &gl_index_id) const -{ - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*3]= {0}; +void Rdb_dict_manager::delete_with_prefix( + rocksdb::WriteBatch *const batch, Rdb_key_def::DATA_DICT_TYPE dict_type, + const GL_INDEX_ID &gl_index_id) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; dump_index_id(key_buf, dict_type, gl_index_id); - rocksdb::Slice key= rocksdb::Slice((char*)key_buf, sizeof(key_buf)); + rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); delete_key(batch, key); } void Rdb_dict_manager::add_or_update_index_cf_mapping( - rocksdb::WriteBatch* batch, - const uchar m_index_type, - const uint16_t kv_version, - const uint32_t index_id, - const uint32_t cf_id) const -{ - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*3]= {0}; - uchar value_buf[256]= {0}; - GL_INDEX_ID gl_index_id= {cf_id, index_id}; + rocksdb::WriteBatch *batch, const uchar m_index_type, + const uint16_t kv_version, const uint32_t index_id, + const uint32_t cf_id) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; + uchar value_buf[256] = {0}; + GL_INDEX_ID gl_index_id = {cf_id, index_id}; dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, gl_index_id); - const rocksdb::Slice key= rocksdb::Slice((char*)key_buf, sizeof(key_buf)); + const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); - uchar* ptr= value_buf; + uchar *ptr = value_buf; rdb_netbuf_store_uint16(ptr, Rdb_key_def::INDEX_INFO_VERSION_LATEST); - ptr+= 2; + ptr += 2; rdb_netbuf_store_byte(ptr, m_index_type); - ptr+= 1; + ptr += 1; rdb_netbuf_store_uint16(ptr, kv_version); - ptr+= 2; + ptr += 2; - const rocksdb::Slice value= rocksdb::Slice((char*)value_buf, ptr-value_buf); + const rocksdb::Slice value = + rocksdb::Slice((char *)value_buf, ptr - value_buf); batch->Put(m_system_cfh, key, value); } -void Rdb_dict_manager::add_cf_flags(rocksdb::WriteBatch* const batch, +void Rdb_dict_manager::add_cf_flags(rocksdb::WriteBatch *const batch, const uint32_t &cf_id, - const uint32_t &cf_flags) const -{ - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*2]= {0}; - uchar value_buf[Rdb_key_def::VERSION_SIZE+ - Rdb_key_def::INDEX_NUMBER_SIZE]= {0}; + const uint32_t &cf_flags) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0}; + uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE] = + {0}; rdb_netbuf_store_uint32(key_buf, Rdb_key_def::CF_DEFINITION); rdb_netbuf_store_uint32(key_buf + Rdb_key_def::INDEX_NUMBER_SIZE, cf_id); - const rocksdb::Slice key= rocksdb::Slice((char*)key_buf, sizeof(key_buf)); + const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); rdb_netbuf_store_uint16(value_buf, Rdb_key_def::CF_DEFINITION_VERSION); rdb_netbuf_store_uint32(value_buf + Rdb_key_def::VERSION_SIZE, cf_flags); - const rocksdb::Slice value= - rocksdb::Slice((char*)value_buf, sizeof(value_buf)); + const rocksdb::Slice value = + rocksdb::Slice((char *)value_buf, sizeof(value_buf)); batch->Put(m_system_cfh, key, value); } -void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch* batch, - const GL_INDEX_ID &gl_index_id) const -{ +void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch, + const GL_INDEX_ID &gl_index_id) const { delete_with_prefix(batch, Rdb_key_def::INDEX_INFO, gl_index_id); } - bool Rdb_dict_manager::get_index_info(const GL_INDEX_ID &gl_index_id, uint16_t *m_index_dict_version, uchar *m_index_type, - uint16_t *kv_version) const -{ - bool found= false; - bool error= false; + uint16_t *kv_version) const { + bool found = false; + bool error = false; std::string value; - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*3]= {0}; + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, gl_index_id); - const rocksdb::Slice &key= rocksdb::Slice((char*)key_buf, sizeof(key_buf)); - - const rocksdb::Status &status= get_value(key, &value); - if (status.ok()) - { - const uchar* const val= (const uchar*)value.c_str(); - const uchar* ptr= val; - *m_index_dict_version= rdb_netbuf_to_uint16(val); - *kv_version= 0; - *m_index_type= 0; - ptr+= 2; + const rocksdb::Slice &key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); + + const rocksdb::Status &status = get_value(key, &value); + if (status.ok()) { + const uchar *const val = (const uchar *)value.c_str(); + const uchar *ptr = val; + *m_index_dict_version = rdb_netbuf_to_uint16(val); + *kv_version = 0; + *m_index_type = 0; + ptr += 2; switch (*m_index_dict_version) { case Rdb_key_def::INDEX_INFO_VERSION_VERIFY_KV_FORMAT: case Rdb_key_def::INDEX_INFO_VERSION_GLOBAL_ID: - *m_index_type= rdb_netbuf_to_byte(ptr); - ptr+= 1; - *kv_version= rdb_netbuf_to_uint16(ptr); - found= true; + *m_index_type = rdb_netbuf_to_byte(ptr); + ptr += 1; + *kv_version = rdb_netbuf_to_uint16(ptr); + found = true; break; default: - error= true; + error = true; break; } - switch (*m_index_type) - { + switch (*m_index_type) { case Rdb_key_def::INDEX_TYPE_PRIMARY: - case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: - { - error= *kv_version > Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; + case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: { + error = *kv_version > Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; break; } case Rdb_key_def::INDEX_TYPE_SECONDARY: - error= *kv_version > Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; + error = *kv_version > Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; break; default: - error= true; + error = true; break; } } - if (error) - { + if (error) { // NO_LINT_DEBUG sql_print_error("RocksDB: Found invalid key version number (%u, %u, %u) " "from data dictionary. This should never happen " - "and it may be a bug.", *m_index_dict_version, - *m_index_type, *kv_version); + "and it may be a bug.", + *m_index_dict_version, *m_index_type, *kv_version); abort_with_stack_traces(); } @@ -4142,24 +3735,21 @@ bool Rdb_dict_manager::get_index_info(const GL_INDEX_ID &gl_index_id, } bool Rdb_dict_manager::get_cf_flags(const uint32_t &cf_id, - uint32_t* const cf_flags) const -{ - bool found= false; + uint32_t *const cf_flags) const { + bool found = false; std::string value; - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*2]= {0}; + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0}; rdb_netbuf_store_uint32(key_buf, Rdb_key_def::CF_DEFINITION); rdb_netbuf_store_uint32(key_buf + Rdb_key_def::INDEX_NUMBER_SIZE, cf_id); - const rocksdb::Slice key= rocksdb::Slice((char*)key_buf, sizeof(key_buf)); - - const rocksdb::Status status= get_value(key, &value); - if (status.ok()) - { - const uchar* val= (const uchar*)value.c_str(); - uint16_t version= rdb_netbuf_to_uint16(val); - if (version == Rdb_key_def::CF_DEFINITION_VERSION) - { - *cf_flags= rdb_netbuf_to_uint32(val+Rdb_key_def::VERSION_SIZE); - found= true; + const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); + + const rocksdb::Status status = get_value(key, &value); + if (status.ok()) { + const uchar *val = (const uchar *)value.c_str(); + uint16_t version = rdb_netbuf_to_uint16(val); + if (version == Rdb_key_def::CF_DEFINITION_VERSION) { + *cf_flags = rdb_netbuf_to_uint32(val + Rdb_key_def::VERSION_SIZE); + found = true; } } return found; @@ -4171,22 +3761,20 @@ bool Rdb_dict_manager::get_cf_flags(const uint32_t &cf_id, ongoing creation. */ void Rdb_dict_manager::get_ongoing_index_operation( - std::vector<GL_INDEX_ID>* const gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const -{ + std::unordered_set<GL_INDEX_ID> *gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const { DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING || dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING); uchar index_buf[Rdb_key_def::INDEX_NUMBER_SIZE]; rdb_netbuf_store_uint32(index_buf, dd_type); - const rocksdb::Slice index_slice(reinterpret_cast<char*>(index_buf), - Rdb_key_def::INDEX_NUMBER_SIZE); + const rocksdb::Slice index_slice(reinterpret_cast<char *>(index_buf), + Rdb_key_def::INDEX_NUMBER_SIZE); - rocksdb::Iterator* it= new_iterator(); - for (it->Seek(index_slice); it->Valid(); it->Next()) - { - rocksdb::Slice key= it->key(); - const uchar* const ptr= (const uchar*)key.data(); + rocksdb::Iterator *it = new_iterator(); + for (it->Seek(index_slice); it->Valid(); it->Next()) { + rocksdb::Slice key = it->key(); + const uchar *const ptr = (const uchar *)key.data(); /* Ongoing drop/create index operations require key to be of the form: @@ -4196,8 +3784,7 @@ void Rdb_dict_manager::get_ongoing_index_operation( ddl_type with different format. */ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 || - rdb_netbuf_to_uint32(ptr) != dd_type) - { + rdb_netbuf_to_uint32(ptr) != dd_type) { break; } @@ -4205,10 +3792,11 @@ void Rdb_dict_manager::get_ongoing_index_operation( // Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION = 1 as a value. // If increasing version number, we need to add version check logic here. GL_INDEX_ID gl_index_id; - gl_index_id.cf_id= rdb_netbuf_to_uint32(ptr+Rdb_key_def::INDEX_NUMBER_SIZE); - gl_index_id.index_id= rdb_netbuf_to_uint32( - ptr + 2 * Rdb_key_def::INDEX_NUMBER_SIZE); - gl_index_ids->push_back(gl_index_id); + gl_index_id.cf_id = + rdb_netbuf_to_uint32(ptr + Rdb_key_def::INDEX_NUMBER_SIZE); + gl_index_id.index_id = + rdb_netbuf_to_uint32(ptr + 2 * Rdb_key_def::INDEX_NUMBER_SIZE); + gl_index_ids->insert(gl_index_id); } delete it; } @@ -4219,22 +3807,19 @@ void Rdb_dict_manager::get_ongoing_index_operation( or not. */ bool Rdb_dict_manager::is_index_operation_ongoing( - const GL_INDEX_ID& gl_index_id, - Rdb_key_def::DATA_DICT_TYPE dd_type) const -{ + const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const { DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING || dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING); - bool found= false; + bool found = false; std::string value; - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*3]= {0}; + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; dump_index_id(key_buf, dd_type, gl_index_id); - const rocksdb::Slice key= rocksdb::Slice((char*)key_buf, sizeof(key_buf)); + const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); - const rocksdb::Status status= get_value(key, &value); - if (status.ok()) - { - found= true; + const rocksdb::Status status = get_value(key, &value); + if (status.ok()) { + found = true; } return found; } @@ -4244,32 +3829,27 @@ bool Rdb_dict_manager::is_index_operation_ongoing( by drop_index_thread, or to track online index creation. */ void Rdb_dict_manager::start_ongoing_index_operation( - rocksdb::WriteBatch* const batch, - const GL_INDEX_ID& gl_index_id, - Rdb_key_def::DATA_DICT_TYPE dd_type) const -{ + rocksdb::WriteBatch *const batch, const GL_INDEX_ID &gl_index_id, + Rdb_key_def::DATA_DICT_TYPE dd_type) const { DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING || dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING); - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*3]= {0}; - uchar value_buf[Rdb_key_def::VERSION_SIZE]= {0}; + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; + uchar value_buf[Rdb_key_def::VERSION_SIZE] = {0}; dump_index_id(key_buf, dd_type, gl_index_id); // version as needed - if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) - { + if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) { rdb_netbuf_store_uint16(value_buf, Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION); - } - else - { + } else { rdb_netbuf_store_uint16(value_buf, Rdb_key_def::DDL_CREATE_INDEX_ONGOING_VERSION); } - const rocksdb::Slice key= rocksdb::Slice((char*)key_buf, sizeof(key_buf)); - const rocksdb::Slice value= - rocksdb::Slice((char*)value_buf, sizeof(value_buf)); + const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf)); + const rocksdb::Slice value = + rocksdb::Slice((char *)value_buf, sizeof(value_buf)); batch->Put(m_system_cfh, key, value); } @@ -4278,10 +3858,8 @@ void Rdb_dict_manager::start_ongoing_index_operation( completed dropping entire key/values of the index_id */ void Rdb_dict_manager::end_ongoing_index_operation( - rocksdb::WriteBatch* const batch, - const GL_INDEX_ID& gl_index_id, - Rdb_key_def::DATA_DICT_TYPE dd_type) const -{ + rocksdb::WriteBatch *const batch, const GL_INDEX_ID &gl_index_id, + Rdb_key_def::DATA_DICT_TYPE dd_type) const { DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING || dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING); @@ -4292,9 +3870,8 @@ void Rdb_dict_manager::end_ongoing_index_operation( Returning true if there is no target index ids to be removed by drop_index_thread */ -bool Rdb_dict_manager::is_drop_index_empty() const -{ - std::vector<GL_INDEX_ID> gl_index_ids; +bool Rdb_dict_manager::is_drop_index_empty() const { + std::unordered_set<GL_INDEX_ID> gl_index_ids; get_ongoing_drop_indexes(&gl_index_ids); return gl_index_ids.empty(); } @@ -4304,13 +3881,11 @@ bool Rdb_dict_manager::is_drop_index_empty() const that dropping indexes started, and adding data dictionary so that all associated indexes to be removed */ -void Rdb_dict_manager::add_drop_table(std::shared_ptr<Rdb_key_def>* const key_descr, - const uint32 &n_keys, - rocksdb::WriteBatch* const batch) const -{ +void Rdb_dict_manager::add_drop_table( + std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, + rocksdb::WriteBatch *const batch) const { std::unordered_set<GL_INDEX_ID> dropped_index_ids; - for (uint32 i = 0; i < n_keys; i++) - { + for (uint32 i = 0; i < n_keys; i++) { dropped_index_ids.insert(key_descr[i]->get_gl_index_id()); } @@ -4323,11 +3898,9 @@ void Rdb_dict_manager::add_drop_table(std::shared_ptr<Rdb_key_def>* const key_de all associated indexes to be removed */ void Rdb_dict_manager::add_drop_index( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - rocksdb::WriteBatch* const batch) const -{ - for (const auto& gl_index_id : gl_index_ids) - { + const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + rocksdb::WriteBatch *const batch) const { + for (const auto &gl_index_id : gl_index_ids) { log_start_drop_index(gl_index_id, "Begin"); start_drop_index(batch, gl_index_id); } @@ -4339,14 +3912,12 @@ void Rdb_dict_manager::add_drop_index( indexes to be added. */ void Rdb_dict_manager::add_create_index( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - rocksdb::WriteBatch* const batch) const -{ - for (const auto& gl_index_id : gl_index_ids) - { + const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + rocksdb::WriteBatch *const batch) const { + for (const auto &gl_index_id : gl_index_ids) { // NO_LINT_DEBUG sql_print_information("RocksDB: Begin index creation (%u,%u)", - gl_index_id.cf_id, gl_index_id.index_id); + gl_index_id.cf_id, gl_index_id.index_id); start_create_index(batch, gl_index_id); } } @@ -4356,30 +3927,41 @@ void Rdb_dict_manager::add_create_index( finished dropping any index, or at the completion of online index creation. */ void Rdb_dict_manager::finish_indexes_operation( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const -{ + const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const { DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING || dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING); - const std::unique_ptr<rocksdb::WriteBatch> wb= begin(); - rocksdb::WriteBatch* const batch= wb.get(); + const std::unique_ptr<rocksdb::WriteBatch> wb = begin(); + rocksdb::WriteBatch *const batch = wb.get(); - for (const auto& gl_index_id : gl_index_ids) - { - if (is_index_operation_ongoing(gl_index_id, dd_type)) - { + std::unordered_set<GL_INDEX_ID> incomplete_create_indexes; + get_ongoing_create_indexes(&incomplete_create_indexes); + + for (const auto &gl_index_id : gl_index_ids) { + if (is_index_operation_ongoing(gl_index_id, dd_type)) { // NO_LINT_DEBUG sql_print_information("RocksDB: Finished %s (%u,%u)", - dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ? - "filtering dropped index" : "index creation", - gl_index_id.cf_id, gl_index_id.index_id); + dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING + ? "filtering dropped index" + : "index creation", + gl_index_id.cf_id, gl_index_id.index_id); end_ongoing_index_operation(batch, gl_index_id, dd_type); + + /* + Remove the corresponding incomplete create indexes from data + dictionary as well + */ + if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) { + if (incomplete_create_indexes.count(gl_index_id)) { + end_ongoing_index_operation(batch, gl_index_id, + Rdb_key_def::DDL_CREATE_INDEX_ONGOING); + } + } } - if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) - { + if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) { delete_index_info(batch, gl_index_id); } } @@ -4391,163 +3973,154 @@ void Rdb_dict_manager::finish_indexes_operation( Rdb_dict_manager (at startup). If there is any index ids that are drop ongoing, printing out messages for diagnostics purposes. */ -void Rdb_dict_manager::resume_drop_indexes() const -{ - std::vector<GL_INDEX_ID> gl_index_ids; +void Rdb_dict_manager::resume_drop_indexes() const { + std::unordered_set<GL_INDEX_ID> gl_index_ids; get_ongoing_drop_indexes(&gl_index_ids); - uint max_index_id_in_dict= 0; + uint max_index_id_in_dict = 0; get_max_index_id(&max_index_id_in_dict); - for (const auto& gl_index_id : gl_index_ids) - { + for (const auto &gl_index_id : gl_index_ids) { log_start_drop_index(gl_index_id, "Resume"); - if (max_index_id_in_dict < gl_index_id.index_id) - { + if (max_index_id_in_dict < gl_index_id.index_id) { sql_print_error("RocksDB: Found max index id %u from data dictionary " "but also found dropped index id (%u,%u) from drop_index " "dictionary. This should never happen and is possibly a " - "bug.", max_index_id_in_dict, gl_index_id.cf_id, + "bug.", + max_index_id_in_dict, gl_index_id.cf_id, gl_index_id.index_id); abort_with_stack_traces(); } } } -void Rdb_dict_manager::rollback_ongoing_index_creation() const -{ - const std::unique_ptr<rocksdb::WriteBatch> wb= begin(); - rocksdb::WriteBatch* const batch= wb.get(); +void Rdb_dict_manager::rollback_ongoing_index_creation() const { + const std::unique_ptr<rocksdb::WriteBatch> wb = begin(); + rocksdb::WriteBatch *const batch = wb.get(); - std::vector<GL_INDEX_ID> gl_index_ids; + std::unordered_set<GL_INDEX_ID> gl_index_ids; get_ongoing_create_indexes(&gl_index_ids); - for (const auto& gl_index_id : gl_index_ids) - { + for (const auto &gl_index_id : gl_index_ids) { // NO_LINT_DEBUG sql_print_information("RocksDB: Removing incomplete create index (%u,%u)", - gl_index_id.cf_id, gl_index_id.index_id); + gl_index_id.cf_id, gl_index_id.index_id); start_drop_index(batch, gl_index_id); - end_ongoing_index_operation(batch, gl_index_id, - Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } commit(batch); } void Rdb_dict_manager::log_start_drop_table( - const std::shared_ptr<Rdb_key_def>* const key_descr, - const uint32 &n_keys, - const char* const log_action) const -{ + const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, + const char *const log_action) const { for (uint32 i = 0; i < n_keys; i++) { log_start_drop_index(key_descr[i]->get_gl_index_id(), log_action); } } void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id, - const char* log_action) const -{ - uint16 m_index_dict_version= 0; - uchar m_index_type= 0; - uint16 kv_version= 0; - if (!get_index_info(gl_index_id, &m_index_dict_version, - &m_index_type, &kv_version)) - { - sql_print_error("RocksDB: Failed to get column family info " - "from index id (%u,%u). MyRocks data dictionary may " - "get corrupted.", gl_index_id.cf_id, gl_index_id.index_id); - abort_with_stack_traces(); + const char *log_action) const { + uint16 m_index_dict_version = 0; + uchar m_index_type = 0; + uint16 kv_version = 0; + + if (!get_index_info(gl_index_id, &m_index_dict_version, &m_index_type, + &kv_version)) { + /* + If we don't find the index info, it could be that it's because it was a + partially created index that isn't in the data dictionary yet that needs + to be rolled back. + */ + std::unordered_set<GL_INDEX_ID> incomplete_create_indexes; + get_ongoing_create_indexes(&incomplete_create_indexes); + + if (!incomplete_create_indexes.count(gl_index_id)) { + /* If it's not a partially created index, something is very wrong. */ + sql_print_error("RocksDB: Failed to get column family info " + "from index id (%u,%u). MyRocks data dictionary may " + "get corrupted.", + gl_index_id.cf_id, gl_index_id.index_id); + abort_with_stack_traces(); + } } sql_print_information("RocksDB: %s filtering dropped index (%u,%u)", log_action, gl_index_id.cf_id, gl_index_id.index_id); } -bool Rdb_dict_manager::get_max_index_id(uint32_t* const index_id) const -{ - bool found= false; +bool Rdb_dict_manager::get_max_index_id(uint32_t *const index_id) const { + bool found = false; std::string value; - const rocksdb::Status status= get_value(m_key_slice_max_index_id, &value); - if (status.ok()) - { - const uchar* const val= (const uchar*)value.c_str(); - const uint16_t &version= rdb_netbuf_to_uint16(val); - if (version == Rdb_key_def::MAX_INDEX_ID_VERSION) - { - *index_id= rdb_netbuf_to_uint32(val+Rdb_key_def::VERSION_SIZE); - found= true; + const rocksdb::Status status = get_value(m_key_slice_max_index_id, &value); + if (status.ok()) { + const uchar *const val = (const uchar *)value.c_str(); + const uint16_t &version = rdb_netbuf_to_uint16(val); + if (version == Rdb_key_def::MAX_INDEX_ID_VERSION) { + *index_id = rdb_netbuf_to_uint32(val + Rdb_key_def::VERSION_SIZE); + found = true; } } return found; } -bool Rdb_dict_manager::update_max_index_id(rocksdb::WriteBatch* const batch, - const uint32_t &index_id) const -{ +bool Rdb_dict_manager::update_max_index_id(rocksdb::WriteBatch *const batch, + const uint32_t &index_id) const { DBUG_ASSERT(batch != nullptr); - uint32_t old_index_id= -1; - if (get_max_index_id(&old_index_id)) - { - if (old_index_id > index_id) - { + uint32_t old_index_id = -1; + if (get_max_index_id(&old_index_id)) { + if (old_index_id > index_id) { sql_print_error("RocksDB: Found max index id %u from data dictionary " "but trying to update to older value %u. This should " - "never happen and possibly a bug.", old_index_id, - index_id); + "never happen and possibly a bug.", + old_index_id, index_id); return true; } } - uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE]= - {0}; + uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE] = + {0}; rdb_netbuf_store_uint16(value_buf, Rdb_key_def::MAX_INDEX_ID_VERSION); rdb_netbuf_store_uint32(value_buf + Rdb_key_def::VERSION_SIZE, index_id); - const rocksdb::Slice value= - rocksdb::Slice((char*)value_buf, sizeof(value_buf)); + const rocksdb::Slice value = + rocksdb::Slice((char *)value_buf, sizeof(value_buf)); batch->Put(m_system_cfh, m_key_slice_max_index_id, value); return false; } -void Rdb_dict_manager::add_stats(rocksdb::WriteBatch* const batch, - const std::vector<Rdb_index_stats>& stats) const -{ +void Rdb_dict_manager::add_stats( + rocksdb::WriteBatch *const batch, + const std::vector<Rdb_index_stats> &stats) const { DBUG_ASSERT(batch != nullptr); - for (const auto& it : stats) { - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*3]= {0}; + for (const auto &it : stats) { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; dump_index_id(key_buf, Rdb_key_def::INDEX_STATISTICS, it.m_gl_index_id); // IndexStats::materialize takes complete care of serialization including // storing the version - const auto value = Rdb_index_stats::materialize( - std::vector<Rdb_index_stats>{it}, 1.); - - batch->Put( - m_system_cfh, - rocksdb::Slice((char*)key_buf, sizeof(key_buf)), - value - ); + const auto value = + Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it}, 1.); + + batch->Put(m_system_cfh, rocksdb::Slice((char *)key_buf, sizeof(key_buf)), + value); } } -Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const -{ - uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE*3]= {0}; +Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; dump_index_id(key_buf, Rdb_key_def::INDEX_STATISTICS, gl_index_id); std::string value; - const rocksdb::Status status= get_value( - rocksdb::Slice(reinterpret_cast<char*>(key_buf), sizeof(key_buf)), - &value); - if (status.ok()) - { + const rocksdb::Status status = get_value( + rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)), + &value); + if (status.ok()) { std::vector<Rdb_index_stats> v; // unmaterialize checks if the version matches - if (Rdb_index_stats::unmaterialize(value, &v)== 0 && v.size() == 1) - { + if (Rdb_index_stats::unmaterialize(value, &v) == 0 && v.size() == 1) { return v[0]; } } @@ -4555,18 +4128,17 @@ Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const return Rdb_index_stats(); } -uint -Rdb_seq_generator::get_and_update_next_number(Rdb_dict_manager* const dict) -{ +uint Rdb_seq_generator::get_and_update_next_number( + Rdb_dict_manager *const dict) { DBUG_ASSERT(dict != nullptr); uint res; mysql_mutex_lock(&m_mutex); - res= m_next_number++; + res = m_next_number++; - const std::unique_ptr<rocksdb::WriteBatch> wb= dict->begin(); - rocksdb::WriteBatch* const batch= wb.get(); + const std::unique_ptr<rocksdb::WriteBatch> wb = dict->begin(); + rocksdb::WriteBatch *const batch = wb.get(); DBUG_ASSERT(batch != nullptr); dict->update_max_index_id(batch, res); @@ -4577,4 +4149,4 @@ Rdb_seq_generator::get_and_update_next_number(Rdb_dict_manager* const dict) return res; } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 91cb9b8d563..b084752f601 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -56,15 +56,13 @@ class Rdb_ddl_manager; In order to accommodate both cases, we require both calls to be made and unpack_info is passed as context data between the two. */ -class Rdb_pack_field_context -{ - public: - Rdb_pack_field_context(const Rdb_pack_field_context&) = delete; - Rdb_pack_field_context& operator=(const Rdb_pack_field_context&) = delete; +class Rdb_pack_field_context { +public: + Rdb_pack_field_context(const Rdb_pack_field_context &) = delete; + Rdb_pack_field_context &operator=(const Rdb_pack_field_context &) = delete; - explicit Rdb_pack_field_context(Rdb_string_writer* const writer_arg) : - writer(writer_arg) - {} + explicit Rdb_pack_field_context(Rdb_string_writer *const writer_arg) + : writer(writer_arg) {} // NULL means we're not producing unpack_info. Rdb_string_writer *writer; @@ -76,51 +74,50 @@ struct Rdb_collation_codec; C-style "virtual table" allowing different handling of packing logic based on the field type. See Rdb_field_packing::setup() implementation. */ -using rdb_make_unpack_info_t= void (*)(const Rdb_collation_codec *codec, - const Field *field, - Rdb_pack_field_context *pack_ctx); -using rdb_index_field_unpack_t= int (*)(Rdb_field_packing *fpi, Field *field, - uchar *field_ptr, - Rdb_string_reader *reader, - Rdb_string_reader *unpack_reader); -using rdb_index_field_skip_t= int (*)(const Rdb_field_packing *fpi, - const Field *field, - Rdb_string_reader *reader); -using rdb_index_field_pack_t= void (*)(Rdb_field_packing *fpi, Field *field, - uchar* buf, uchar **dst, - Rdb_pack_field_context *pack_ctx); - -const uint RDB_INVALID_KEY_LEN= uint(-1); +using rdb_make_unpack_info_t = void (*)(const Rdb_collation_codec *codec, + const Field *field, + Rdb_pack_field_context *pack_ctx); +using rdb_index_field_unpack_t = int (*)(Rdb_field_packing *fpi, Field *field, + uchar *field_ptr, + Rdb_string_reader *reader, + Rdb_string_reader *unpack_reader); +using rdb_index_field_skip_t = int (*)(const Rdb_field_packing *fpi, + const Field *field, + Rdb_string_reader *reader); +using rdb_index_field_pack_t = void (*)(Rdb_field_packing *fpi, Field *field, + uchar *buf, uchar **dst, + Rdb_pack_field_context *pack_ctx); + +const uint RDB_INVALID_KEY_LEN = uint(-1); /* How much one checksum occupies when stored in the record */ -const size_t RDB_CHECKSUM_SIZE= sizeof(uint32_t); +const size_t RDB_CHECKSUM_SIZE = sizeof(uint32_t); /* How much the checksum data occupies in record, in total. It is storing two checksums plus 1 tag-byte. */ -const size_t RDB_CHECKSUM_CHUNK_SIZE= 2 * RDB_CHECKSUM_SIZE + 1; +const size_t RDB_CHECKSUM_CHUNK_SIZE = 2 * RDB_CHECKSUM_SIZE + 1; /* Checksum data starts from CHECKSUM_DATA_TAG which is followed by two CRC32 checksums. */ -const char RDB_CHECKSUM_DATA_TAG= 0x01; +const char RDB_CHECKSUM_DATA_TAG = 0x01; /* Unpack data is variable length. It is a 1 tag-byte plus a two byte length field. The length field includes the header as well. */ -const char RDB_UNPACK_DATA_TAG= 0x02; -const size_t RDB_UNPACK_DATA_LEN_SIZE= sizeof(uint16_t); -const size_t RDB_UNPACK_HEADER_SIZE= sizeof(RDB_UNPACK_DATA_TAG) + - RDB_UNPACK_DATA_LEN_SIZE; +const char RDB_UNPACK_DATA_TAG = 0x02; +const size_t RDB_UNPACK_DATA_LEN_SIZE = sizeof(uint16_t); +const size_t RDB_UNPACK_HEADER_SIZE = + sizeof(RDB_UNPACK_DATA_TAG) + RDB_UNPACK_DATA_LEN_SIZE; // Possible return values for rdb_index_field_unpack_t functions. enum { - UNPACK_SUCCESS= 0, - UNPACK_FAILURE= 1, - UNPACK_INFO_MISSING= 2, + UNPACK_SUCCESS = 0, + UNPACK_FAILURE = 1, }; /* @@ -163,62 +160,55 @@ enum { reads. */ -class Rdb_key_def -{ +class Rdb_key_def { public: /* Convert a key from KeyTupleFormat to mem-comparable form */ - uint pack_index_tuple(TABLE* const tbl, uchar* const pack_buffer, - uchar* const packed_tuple, - const uchar* const key_tuple, + uint pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer, + uchar *const packed_tuple, const uchar *const key_tuple, const key_part_map &keypart_map) const; /* Convert a key from Table->record format to mem-comparable form */ - uint pack_record(const TABLE* const tbl, uchar* const pack_buffer, - const uchar* const record, - uchar* const packed_tuple, - Rdb_string_writer* const unpack_info, + uint pack_record(const TABLE *const tbl, uchar *const pack_buffer, + const uchar *const record, uchar *const packed_tuple, + Rdb_string_writer *const unpack_info, const bool &should_store_row_debug_checksums, - const longlong &hidden_pk_id= 0, uint n_key_parts= 0, - uint* const n_null_fields= nullptr) const; + const longlong &hidden_pk_id = 0, uint n_key_parts = 0, + uint *const n_null_fields = nullptr) const; /* Pack the hidden primary key into mem-comparable form. */ uint pack_hidden_pk(const longlong &hidden_pk_id, - uchar* const packed_tuple) const; - int unpack_record(TABLE* const table, uchar* const buf, - const rocksdb::Slice* const packed_key, - const rocksdb::Slice* const unpack_info, - const bool &verify_row_debug_checksums) - const; - - static bool unpack_info_has_checksum(const rocksdb::Slice& unpack_info); + uchar *const packed_tuple) const; + int unpack_record(TABLE *const table, uchar *const buf, + const rocksdb::Slice *const packed_key, + const rocksdb::Slice *const unpack_info, + const bool &verify_row_debug_checksums) const; + + static bool unpack_info_has_checksum(const rocksdb::Slice &unpack_info); int compare_keys(const rocksdb::Slice *key1, const rocksdb::Slice *key2, - std::size_t* const column_index) const; + std::size_t *const column_index) const; - size_t key_length(const TABLE* const table, const rocksdb::Slice &key) const; + size_t key_length(const TABLE *const table, const rocksdb::Slice &key) const; /* Get the key that is the "infimum" for this index */ - inline void get_infimum_key(uchar* const key, uint* const size) const - { + inline void get_infimum_key(uchar *const key, uint *const size) const { rdb_netbuf_store_index(key, m_index_number); - *size= INDEX_NUMBER_SIZE; + *size = INDEX_NUMBER_SIZE; } /* Get the key that is a "supremum" for this index */ - inline void get_supremum_key(uchar* const key, uint* const size) const - { - rdb_netbuf_store_index(key, m_index_number+1); - *size= INDEX_NUMBER_SIZE; + inline void get_supremum_key(uchar *const key, uint *const size) const { + rdb_netbuf_store_index(key, m_index_number + 1); + *size = INDEX_NUMBER_SIZE; } /* Make a key that is right after the given key. */ - static int successor(uchar* const packed_tuple, const uint &len); + static int successor(uchar *const packed_tuple, const uint &len); /* This can be used to compare prefixes. if X is a prefix of Y, then we consider that X = Y. */ // b describes the lookup key, which can be a prefix of a. - int cmp_full_keys(const rocksdb::Slice& a, const rocksdb::Slice& b) const - { + int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const { DBUG_ASSERT(covers_key(a)); DBUG_ASSERT(covers_key(b)); @@ -226,8 +216,7 @@ public: } /* Check if given mem-comparable key belongs to this index */ - bool covers_key(const rocksdb::Slice &slice) const - { + bool covers_key(const rocksdb::Slice &slice) const { if (slice.size() < INDEX_NUMBER_SIZE) return false; @@ -244,43 +233,35 @@ public: form) */ bool value_matches_prefix(const rocksdb::Slice &value, - const rocksdb::Slice &prefix) const - { + const rocksdb::Slice &prefix) const { return covers_key(value) && !cmp_full_keys(value, prefix); } - uint32 get_keyno() const - { - return m_keyno; - } + uint32 get_keyno() const { return m_keyno; } - uint32 get_index_number() const - { - return m_index_number; - } + uint32 get_index_number() const { return m_index_number; } - GL_INDEX_ID get_gl_index_id() const - { - const GL_INDEX_ID gl_index_id = { m_cf_handle->GetID(), m_index_number }; + GL_INDEX_ID get_gl_index_id() const { + const GL_INDEX_ID gl_index_id = {m_cf_handle->GetID(), m_index_number}; return gl_index_id; } + int read_memcmp_key_part(const TABLE *table_arg, Rdb_string_reader *reader, + const uint part_num) const; + /* Must only be called for secondary keys: */ - uint get_primary_key_tuple(const TABLE* const tbl, - const Rdb_key_def& pk_descr, - const rocksdb::Slice* const key, - uchar* const pk_buffer) const; + uint get_primary_key_tuple(const TABLE *const tbl, + const Rdb_key_def &pk_descr, + const rocksdb::Slice *const key, + uchar *const pk_buffer) const; + + uint get_memcmp_sk_parts(const TABLE *table, const rocksdb::Slice &key, + uchar *sk_buffer, uint *n_null_fields) const; /* Return max length of mem-comparable form */ - uint max_storage_fmt_length() const - { - return m_maxlength; - } + uint max_storage_fmt_length() const { return m_maxlength; } - uint get_key_parts() const - { - return m_key_parts; - } + uint get_key_parts() const { return m_key_parts; } /* Get a field object for key part #part_no @@ -292,60 +273,60 @@ public: Internally, we always extend all indexes with PK columns. This function uses our definition of how the index is Extended. */ - inline Field* get_table_field_for_part_no(TABLE *table, uint part_no) const; + inline Field *get_table_field_for_part_no(TABLE *table, uint part_no) const; + + const std::string &get_name() const { return m_name; } - const std::string& get_name() const { - return m_name; + const rocksdb::SliceTransform *get_extractor() const { + return m_prefix_extractor.get(); } - Rdb_key_def& operator=(const Rdb_key_def&) = delete; - Rdb_key_def(const Rdb_key_def& k); + Rdb_key_def &operator=(const Rdb_key_def &) = delete; + Rdb_key_def(const Rdb_key_def &k); Rdb_key_def(uint indexnr_arg, uint keyno_arg, - rocksdb::ColumnFamilyHandle* cf_handle_arg, - uint16_t index_dict_version_arg, - uchar index_type_arg, - uint16_t kv_format_version_arg, - bool is_reverse_cf_arg, bool is_auto_cf_arg, - const char* name, - Rdb_index_stats stats= Rdb_index_stats()); + rocksdb::ColumnFamilyHandle *cf_handle_arg, + uint16_t index_dict_version_arg, uchar index_type_arg, + uint16_t kv_format_version_arg, bool is_reverse_cf_arg, + bool is_auto_cf_arg, const char *name, + Rdb_index_stats stats = Rdb_index_stats()); ~Rdb_key_def(); enum { - INDEX_NUMBER_SIZE= 4, - VERSION_SIZE= 2, - CF_NUMBER_SIZE= 4, - CF_FLAG_SIZE= 4, - PACKED_SIZE= 4, // one int + INDEX_NUMBER_SIZE = 4, + VERSION_SIZE = 2, + CF_NUMBER_SIZE = 4, + CF_FLAG_SIZE = 4, + PACKED_SIZE = 4, // one int }; // bit flags for combining bools when writing to disk enum { - REVERSE_CF_FLAG= 1, - AUTO_CF_FLAG= 2, + REVERSE_CF_FLAG = 1, + AUTO_CF_FLAG = 2, }; // Data dictionary types enum DATA_DICT_TYPE { - DDL_ENTRY_INDEX_START_NUMBER= 1, - INDEX_INFO= 2, - CF_DEFINITION= 3, - BINLOG_INFO_INDEX_NUMBER= 4, - DDL_DROP_INDEX_ONGOING= 5, - INDEX_STATISTICS= 6, - MAX_INDEX_ID= 7, - DDL_CREATE_INDEX_ONGOING= 8, - END_DICT_INDEX_ID= 255 + DDL_ENTRY_INDEX_START_NUMBER = 1, + INDEX_INFO = 2, + CF_DEFINITION = 3, + BINLOG_INFO_INDEX_NUMBER = 4, + DDL_DROP_INDEX_ONGOING = 5, + INDEX_STATISTICS = 6, + MAX_INDEX_ID = 7, + DDL_CREATE_INDEX_ONGOING = 8, + END_DICT_INDEX_ID = 255 }; // Data dictionary schema version. Introduce newer versions // if changing schema layout enum { - DDL_ENTRY_INDEX_VERSION= 1, - CF_DEFINITION_VERSION= 1, - BINLOG_INFO_INDEX_NUMBER_VERSION= 1, - DDL_DROP_INDEX_ONGOING_VERSION= 1, - MAX_INDEX_ID_VERSION= 1, - DDL_CREATE_INDEX_ONGOING_VERSION= 1, + DDL_ENTRY_INDEX_VERSION = 1, + CF_DEFINITION_VERSION = 1, + BINLOG_INFO_INDEX_NUMBER_VERSION = 1, + DDL_DROP_INDEX_ONGOING_VERSION = 1, + MAX_INDEX_ID_VERSION = 1, + DDL_CREATE_INDEX_ONGOING_VERSION = 1, // Version for index stats is stored in IndexStats struct }; @@ -353,7 +334,7 @@ public: // INDEX_INFO layout. Update INDEX_INFO_VERSION_LATEST to point to the // latest version number. enum { - INDEX_INFO_VERSION_INITIAL= 1, // Obsolete + INDEX_INFO_VERSION_INITIAL = 1, // Obsolete INDEX_INFO_VERSION_KV_FORMAT, INDEX_INFO_VERSION_GLOBAL_ID, // There is no change to data format in this version, but this version @@ -362,35 +343,35 @@ public: // check inadvertently. INDEX_INFO_VERSION_VERIFY_KV_FORMAT, // This normally point to the latest (currently it does). - INDEX_INFO_VERSION_LATEST= INDEX_INFO_VERSION_VERIFY_KV_FORMAT, + INDEX_INFO_VERSION_LATEST = INDEX_INFO_VERSION_VERIFY_KV_FORMAT, }; // MyRocks index types enum { - INDEX_TYPE_PRIMARY= 1, - INDEX_TYPE_SECONDARY= 2, - INDEX_TYPE_HIDDEN_PRIMARY= 3, + INDEX_TYPE_PRIMARY = 1, + INDEX_TYPE_SECONDARY = 2, + INDEX_TYPE_HIDDEN_PRIMARY = 3, }; // Key/Value format version for each index type enum { - PRIMARY_FORMAT_VERSION_INITIAL= 10, + PRIMARY_FORMAT_VERSION_INITIAL = 10, // This change includes: // - For columns that can be unpacked with unpack_info, PK // stores the unpack_info. // - DECIMAL datatype is no longer stored in the row (because // it can be decoded from its mem-comparable form) // - VARCHAR-columns use endspace-padding. - PRIMARY_FORMAT_VERSION_UPDATE1= 11, - PRIMARY_FORMAT_VERSION_LATEST= PRIMARY_FORMAT_VERSION_UPDATE1, + PRIMARY_FORMAT_VERSION_UPDATE1 = 11, + PRIMARY_FORMAT_VERSION_LATEST = PRIMARY_FORMAT_VERSION_UPDATE1, - SECONDARY_FORMAT_VERSION_INITIAL= 10, + SECONDARY_FORMAT_VERSION_INITIAL = 10, // This change the SK format to include unpack_info. - SECONDARY_FORMAT_VERSION_UPDATE1= 11, - SECONDARY_FORMAT_VERSION_LATEST= SECONDARY_FORMAT_VERSION_UPDATE1, + SECONDARY_FORMAT_VERSION_UPDATE1 = 11, + SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_UPDATE1, }; - void setup(const TABLE* const table, const Rdb_tbl_def* const tbl_def); + void setup(const TABLE *const table, const Rdb_tbl_def *const tbl_def); rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; } @@ -400,9 +381,9 @@ public: inline bool has_unpack_info(const uint &kp) const; /* Check if given table has a primary key */ - static bool table_has_hidden_pk(const TABLE* const table); + static bool table_has_hidden_pk(const TABLE *const table); - void report_checksum_mismatch(const bool &is_key, const char* const data, + void report_checksum_mismatch(const bool &is_key, const char *const data, const size_t data_size) const; /* Check if index is at least pk_min if it is a PK, @@ -410,21 +391,19 @@ public: bool index_format_min_check(const int &pk_min, const int &sk_min) const; private: - #ifndef DBUG_OFF - inline bool is_storage_available(const int &offset, const int &needed) const - { - const int storage_length= static_cast<int>(max_storage_fmt_length()); + inline bool is_storage_available(const int &offset, const int &needed) const { + const int storage_length = static_cast<int>(max_storage_fmt_length()); return (storage_length - offset) >= needed; } -#endif // DBUG_OFF +#endif // DBUG_OFF /* Global number of this index (used as prefix in StorageFormat) */ const uint32 m_index_number; uchar m_index_number_storage_form[INDEX_NUMBER_SIZE]; - rocksdb::ColumnFamilyHandle* m_cf_handle; + rocksdb::ColumnFamilyHandle *m_cf_handle; public: uint16_t m_index_dict_version; @@ -437,9 +416,9 @@ public: bool m_is_auto_cf; std::string m_name; mutable Rdb_index_stats m_stats; -private: - friend class Rdb_tbl_def; // for m_index_number above +private: + friend class Rdb_tbl_def; // for m_index_number above /* Number of key parts in the primary key*/ uint m_pk_key_parts; @@ -461,6 +440,9 @@ private: */ uint m_key_parts; + /* Prefix extractor for the column family of the key definiton */ + std::shared_ptr<const rocksdb::SliceTransform> m_prefix_extractor; + /* Maximum length of the mem-comparable form. */ uint m_maxlength; @@ -485,8 +467,7 @@ private: // // We have m_dec_idx[idx][dst] = src to get our original character back. // -struct Rdb_collation_codec -{ +struct Rdb_collation_codec { const my_core::CHARSET_INFO *m_cs; // The first element unpacks VARCHAR(n), the second one - CHAR(n). std::array<rdb_make_unpack_info_t, 2> m_make_unpack_info_func; @@ -501,15 +482,13 @@ struct Rdb_collation_codec extern mysql_mutex_t rdb_collation_data_mutex; extern mysql_mutex_t rdb_mem_cmp_space_mutex; -extern std::array<const Rdb_collation_codec*, MY_ALL_CHARSETS_SIZE> - rdb_collation_data; - +extern std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE> + rdb_collation_data; -class Rdb_field_packing -{ +class Rdb_field_packing { public: - Rdb_field_packing(const Rdb_field_packing&) = delete; - Rdb_field_packing& operator=(const Rdb_field_packing&) = delete; + Rdb_field_packing(const Rdb_field_packing &) = delete; + Rdb_field_packing &operator=(const Rdb_field_packing &) = delete; Rdb_field_packing() = default; /* Length of mem-comparable image of the field, in bytes */ @@ -527,25 +506,22 @@ public: const CHARSET_INFO *m_varchar_charset; // (Valid when Variable Length Space Padded Encoding is used): - uint m_segment_size; // size of segment used + uint m_segment_size; // size of segment used // number of bytes used to store number of trimmed (or added) // spaces in the upack_info bool m_unpack_info_uses_two_bytes; - const std::vector<uchar>* space_xfrm; + const std::vector<uchar> *space_xfrm; size_t space_xfrm_len; size_t space_mb_len; - const Rdb_collation_codec* m_charset_codec; + const Rdb_collation_codec *m_charset_codec; /* @return TRUE: this field makes use of unpack_info. */ - bool uses_unpack_info() const - { - return (m_make_unpack_info_func != nullptr); - } + bool uses_unpack_info() const { return (m_make_unpack_info_func != nullptr); } /* TRUE means unpack_info stores the original field value */ bool m_unpack_info_stores_value; @@ -591,11 +567,12 @@ private: */ uint m_keynr; uint m_key_part; + public: - bool setup(const Rdb_key_def* const key_descr, const Field* const field, + bool setup(const Rdb_key_def *const key_descr, const Field *const field, const uint &keynr_arg, const uint &key_part_arg, const uint16 &key_length); - Field *get_field_in_table(const TABLE* const tbl) const; + Field *get_field_in_table(const TABLE *const tbl) const; void fill_hidden_pk_val(uchar **dst, const longlong &hidden_pk_id) const; }; @@ -606,11 +583,10 @@ public: For encoding/decoding of index tuples, see Rdb_key_def. */ -class Rdb_field_encoder -{ - public: - Rdb_field_encoder(const Rdb_field_encoder&) = delete; - Rdb_field_encoder& operator=(const Rdb_field_encoder&) = delete; +class Rdb_field_encoder { +public: + Rdb_field_encoder(const Rdb_field_encoder &) = delete; + Rdb_field_encoder &operator=(const Rdb_field_encoder &) = delete; /* STORE_NONE is set when a column can be decoded solely from their mem-comparable form. @@ -629,7 +605,7 @@ class Rdb_field_encoder uint m_null_offset; uint16 m_field_index; - uchar m_null_mask; // 0 means the field cannot be null + uchar m_null_mask; // 0 means the field cannot be null my_core::enum_field_types m_field_type; @@ -637,33 +613,28 @@ class Rdb_field_encoder bool maybe_null() const { return m_null_mask != 0; } - bool uses_variable_len_encoding() const - { + bool uses_variable_len_encoding() const { return (m_field_type == MYSQL_TYPE_BLOB || m_field_type == MYSQL_TYPE_VARCHAR); } }; -inline Field* Rdb_key_def::get_table_field_for_part_no(TABLE *table, - uint part_no) const -{ +inline Field *Rdb_key_def::get_table_field_for_part_no(TABLE *table, + uint part_no) const { DBUG_ASSERT(part_no < get_key_parts()); return m_pack_info[part_no].get_field_in_table(table); } -inline bool Rdb_key_def::can_unpack(const uint &kp) const -{ +inline bool Rdb_key_def::can_unpack(const uint &kp) const { DBUG_ASSERT(kp < m_key_parts); return (m_pack_info[kp].m_unpack_func != nullptr); } -inline bool Rdb_key_def::has_unpack_info(const uint &kp) const -{ +inline bool Rdb_key_def::has_unpack_info(const uint &kp) const { DBUG_ASSERT(kp < m_key_parts); return m_pack_info[kp].uses_unpack_info(); } - /* A table definition. This is an entry in the mapping @@ -673,9 +644,8 @@ inline bool Rdb_key_def::has_unpack_info(const uint &kp) const That's why we keep auto_increment value here, too. */ -class Rdb_tbl_def -{ - private: +class Rdb_tbl_def { +private: void check_if_is_mysql_system_table(); /* Stores 'dbname.tablename' */ @@ -686,27 +656,24 @@ class Rdb_tbl_def std::string m_tablename; std::string m_partition; - void set_name(const std::string& name); + void set_name(const std::string &name); - public: - Rdb_tbl_def(const Rdb_tbl_def&) = delete; - Rdb_tbl_def& operator=(const Rdb_tbl_def&) = delete; +public: + Rdb_tbl_def(const Rdb_tbl_def &) = delete; + Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete; - explicit Rdb_tbl_def(const std::string& name) : - m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) - { + explicit Rdb_tbl_def(const std::string &name) + : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { set_name(name); } - Rdb_tbl_def(const char* const name, const size_t &len) : - m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) - { + Rdb_tbl_def(const char *const name, const size_t &len) + : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { set_name(std::string(name, len)); } - explicit Rdb_tbl_def(const rocksdb::Slice& slice, const size_t &pos= 0) : - m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) - { + explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0) + : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { set_name(std::string(slice.data() + pos, slice.size() - pos)); } @@ -716,7 +683,7 @@ class Rdb_tbl_def uint m_key_count; /* Array of index descriptors */ - std::shared_ptr<Rdb_key_def>* m_key_descr_arr; + std::shared_ptr<Rdb_key_def> *m_key_descr_arr; std::atomic<longlong> m_hidden_pk_val; std::atomic<longlong> m_auto_incr_val; @@ -724,52 +691,44 @@ class Rdb_tbl_def /* Is this a system table */ bool m_is_mysql_system_table; - bool put_dict(Rdb_dict_manager* const dict, rocksdb::WriteBatch* const batch, - uchar* const key, const size_t &keylen); + bool put_dict(Rdb_dict_manager *const dict, rocksdb::WriteBatch *const batch, + uchar *const key, const size_t &keylen); - const std::string& full_tablename() const { return m_dbname_tablename; } - const std::string& base_dbname() const { return m_dbname; } - const std::string& base_tablename() const { return m_tablename; } - const std::string& base_partition() const { return m_partition; } + const std::string &full_tablename() const { return m_dbname_tablename; } + const std::string &base_dbname() const { return m_dbname; } + const std::string &base_tablename() const { return m_tablename; } + const std::string &base_partition() const { return m_partition; } }; - /* A thread-safe sequential number generator. Its performance is not a concern hence it is ok to protect it by a mutex. */ -class Rdb_seq_generator -{ - uint m_next_number= 0; +class Rdb_seq_generator { + uint m_next_number = 0; mysql_mutex_t m_mutex; + public: - Rdb_seq_generator(const Rdb_seq_generator&) = delete; - Rdb_seq_generator& operator=(const Rdb_seq_generator&) = delete; + Rdb_seq_generator(const Rdb_seq_generator &) = delete; + Rdb_seq_generator &operator=(const Rdb_seq_generator &) = delete; Rdb_seq_generator() = default; - void init(const uint &initial_number) - { - mysql_mutex_init(0 , &m_mutex, MY_MUTEX_INIT_FAST); - m_next_number= initial_number; + void init(const uint &initial_number) { + mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); + m_next_number = initial_number; } - uint get_and_update_next_number(Rdb_dict_manager* const dict); + uint get_and_update_next_number(Rdb_dict_manager *const dict); - void cleanup() - { - mysql_mutex_destroy(&m_mutex); - } + void cleanup() { mysql_mutex_destroy(&m_mutex); } }; - -interface Rdb_tables_scanner -{ - virtual int add_table(Rdb_tbl_def* tdef) =0; +interface Rdb_tables_scanner { + virtual int add_table(Rdb_tbl_def * tdef) = 0; }; - /* This contains a mapping of @@ -778,10 +737,9 @@ interface Rdb_tables_scanner objects are shared among all threads. */ -class Rdb_ddl_manager -{ - Rdb_dict_manager *m_dict= nullptr; - my_core::HASH m_ddl_hash; // Contains Rdb_tbl_def elements +class Rdb_ddl_manager { + Rdb_dict_manager *m_dict = nullptr; + my_core::HASH m_ddl_hash; // Contains Rdb_tbl_def elements // maps index id to <table_name, index number> std::map<GL_INDEX_ID, std::pair<std::string, uint>> m_index_num_to_keydef; mysql_rwlock_t m_rwlock; @@ -792,58 +750,56 @@ class Rdb_ddl_manager // and consumed by the rocksdb background thread std::map<GL_INDEX_ID, Rdb_index_stats> m_stats2store; - const std::shared_ptr<Rdb_key_def>& find( - GL_INDEX_ID gl_index_id); + const std::shared_ptr<Rdb_key_def> &find(GL_INDEX_ID gl_index_id); + public: - Rdb_ddl_manager(const Rdb_ddl_manager&) = delete; - Rdb_ddl_manager& operator=(const Rdb_ddl_manager&) = delete; + Rdb_ddl_manager(const Rdb_ddl_manager &) = delete; + Rdb_ddl_manager &operator=(const Rdb_ddl_manager &) = delete; Rdb_ddl_manager() {} /* Load the data dictionary from on-disk storage */ - bool init(Rdb_dict_manager* const dict_arg, Rdb_cf_manager* const cf_manager, + bool init(Rdb_dict_manager *const dict_arg, Rdb_cf_manager *const cf_manager, const uint32_t &validate_tables); void cleanup(); - Rdb_tbl_def* find(const std::string& table_name, const bool &lock= true); + Rdb_tbl_def *find(const std::string &table_name, const bool &lock = true); std::shared_ptr<const Rdb_key_def> safe_find(GL_INDEX_ID gl_index_id); - void set_stats( - const std::unordered_map<GL_INDEX_ID, Rdb_index_stats>& stats); - void adjust_stats( - const std::vector<Rdb_index_stats>& new_data, - const std::vector<Rdb_index_stats>& deleted_data - =std::vector<Rdb_index_stats>()); + void set_stats(const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats); + void adjust_stats(const std::vector<Rdb_index_stats> &new_data, + const std::vector<Rdb_index_stats> &deleted_data = + std::vector<Rdb_index_stats>()); void persist_stats(const bool &sync = false); /* Modify the mapping and write it to on-disk storage */ - int put_and_write(Rdb_tbl_def* const key_descr, - rocksdb::WriteBatch* const batch); - void remove(Rdb_tbl_def* const rec, rocksdb::WriteBatch* const batch, - const bool &lock= true); - bool rename(const std::string& from, const std::string& to, - rocksdb::WriteBatch* const batch); - - uint get_and_update_next_number(Rdb_dict_manager* const dict) - { return m_sequence.get_and_update_next_number(dict); } + int put_and_write(Rdb_tbl_def *const key_descr, + rocksdb::WriteBatch *const batch); + void remove(Rdb_tbl_def *const rec, rocksdb::WriteBatch *const batch, + const bool &lock = true); + bool rename(const std::string &from, const std::string &to, + rocksdb::WriteBatch *const batch); + + uint get_and_update_next_number(Rdb_dict_manager *const dict) { + return m_sequence.get_and_update_next_number(dict); + } /* Walk the data dictionary */ - int scan_for_tables(Rdb_tables_scanner* tables_scanner); + int scan_for_tables(Rdb_tables_scanner *tables_scanner); void erase_index_num(const GL_INDEX_ID &gl_index_id); private: /* Put the data into in-memory table (only) */ - int put(Rdb_tbl_def* const key_descr, const bool &lock= true); + int put(Rdb_tbl_def *const key_descr, const bool &lock = true); /* Helper functions to be passed to my_core::HASH object */ - static const uchar* get_hash_key(Rdb_tbl_def* const rec, size_t* const length, - my_bool not_used __attribute__((unused))); - static void free_hash_elem(void* const data); + static const uchar *get_hash_key(Rdb_tbl_def *const rec, size_t *const length, + my_bool not_used MY_ATTRIBUTE((unused))); + static void free_hash_elem(void *const data); bool validate_schemas(); }; - /* Writing binlog information into RocksDB at commit(), and retrieving binlog information at crash recovery. @@ -859,40 +815,37 @@ private: binlog_gtid_length (2 byte form) binlog_gtid */ -class Rdb_binlog_manager -{ +class Rdb_binlog_manager { public: - Rdb_binlog_manager(const Rdb_binlog_manager&) = delete; - Rdb_binlog_manager& operator=(const Rdb_binlog_manager&) = delete; + Rdb_binlog_manager(const Rdb_binlog_manager &) = delete; + Rdb_binlog_manager &operator=(const Rdb_binlog_manager &) = delete; Rdb_binlog_manager() = default; - bool init(Rdb_dict_manager* const dict); + bool init(Rdb_dict_manager *const dict); void cleanup(); - void update(const char* const binlog_name, const my_off_t binlog_pos, - const char* const binlog_max_gtid, - rocksdb::WriteBatchBase* const batch); - bool read(char* const binlog_name, my_off_t* const binlog_pos, - char* const binlog_gtid) const; - void update_slave_gtid_info(const uint &id, const char* const db, - const char* const gtid, - rocksdb::WriteBatchBase* const write_batch); + void update(const char *const binlog_name, const my_off_t binlog_pos, + const char *const binlog_max_gtid, + rocksdb::WriteBatchBase *const batch); + bool read(char *const binlog_name, my_off_t *const binlog_pos, + char *const binlog_gtid) const; + void update_slave_gtid_info(const uint &id, const char *const db, + const char *const gtid, + rocksdb::WriteBatchBase *const write_batch); private: - Rdb_dict_manager *m_dict= nullptr; - uchar m_key_buf[Rdb_key_def::INDEX_NUMBER_SIZE]= {0}; + Rdb_dict_manager *m_dict = nullptr; + uchar m_key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; rocksdb::Slice m_key_slice; - rocksdb::Slice pack_value(uchar* const buf, - const char* const binlog_name, + rocksdb::Slice pack_value(uchar *const buf, const char *const binlog_name, const my_off_t &binlog_pos, - const char* const binlog_gtid) const; - bool unpack_value(const uchar* const value, char* const binlog_name, - my_off_t* const binlog_pos, char* const binlog_gtid) const; + const char *const binlog_gtid) const; + bool unpack_value(const uchar *const value, char *const binlog_name, + my_off_t *const binlog_pos, char *const binlog_gtid) const; - std::atomic<Rdb_tbl_def*> m_slave_gtid_info_tbl; + std::atomic<Rdb_tbl_def *> m_slave_gtid_info_tbl; }; - /* Rdb_dict_manager manages how MySQL on RocksDB (MyRocks) stores its internal data dictionary. @@ -944,58 +897,49 @@ private: begin() and commit() to make it easier to do atomic operations. */ -class Rdb_dict_manager -{ +class Rdb_dict_manager { private: mysql_mutex_t m_mutex; - rocksdb::DB *m_db= nullptr; - rocksdb::ColumnFamilyHandle *m_system_cfh= nullptr; + rocksdb::DB *m_db = nullptr; + rocksdb::ColumnFamilyHandle *m_system_cfh = nullptr; /* Utility to put INDEX_INFO and CF_DEFINITION */ - uchar m_key_buf_max_index_id[Rdb_key_def::INDEX_NUMBER_SIZE]= {0}; + uchar m_key_buf_max_index_id[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; rocksdb::Slice m_key_slice_max_index_id; - static void dump_index_id(uchar* const netbuf, + static void dump_index_id(uchar *const netbuf, Rdb_key_def::DATA_DICT_TYPE dict_type, const GL_INDEX_ID &gl_index_id); - void delete_with_prefix(rocksdb::WriteBatch* const batch, + void delete_with_prefix(rocksdb::WriteBatch *const batch, Rdb_key_def::DATA_DICT_TYPE dict_type, const GL_INDEX_ID &gl_index_id) const; /* Functions for fast DROP TABLE/INDEX */ void resume_drop_indexes() const; - void log_start_drop_table(const std::shared_ptr<Rdb_key_def>* const key_descr, + void log_start_drop_table(const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, - const char* const log_action) const; + const char *const log_action) const; void log_start_drop_index(GL_INDEX_ID gl_index_id, - const char* log_action) const; + const char *log_action) const; + public: - Rdb_dict_manager(const Rdb_dict_manager&) = delete; - Rdb_dict_manager& operator=(const Rdb_dict_manager&) = delete; + Rdb_dict_manager(const Rdb_dict_manager &) = delete; + Rdb_dict_manager &operator=(const Rdb_dict_manager &) = delete; Rdb_dict_manager() = default; - bool init(rocksdb::DB* const rdb_dict, Rdb_cf_manager* const cf_manager); + bool init(rocksdb::DB *const rdb_dict, Rdb_cf_manager *const cf_manager); - inline void cleanup() - { - mysql_mutex_destroy(&m_mutex); - } + inline void cleanup() { mysql_mutex_destroy(&m_mutex); } - inline void lock() - { - mysql_mutex_lock(&m_mutex); - } + inline void lock() { mysql_mutex_lock(&m_mutex); } - inline void unlock() - { - mysql_mutex_unlock(&m_mutex); - } + inline void unlock() { mysql_mutex_unlock(&m_mutex); } /* Raw RocksDB operations */ std::unique_ptr<rocksdb::WriteBatch> begin() const; - int commit(rocksdb::WriteBatch* const batch, const bool &sync = true) const; - rocksdb::Status get_value(const rocksdb::Slice& key, - std::string* const value) const; - void put_key(rocksdb::WriteBatchBase* const batch, const rocksdb::Slice &key, + int commit(rocksdb::WriteBatch *const batch, const bool &sync = true) const; + rocksdb::Status get_value(const rocksdb::Slice &key, + std::string *const value) const; + void put_key(rocksdb::WriteBatchBase *const batch, const rocksdb::Slice &key, const rocksdb::Slice &value) const; void delete_key(rocksdb::WriteBatchBase *batch, const rocksdb::Slice &key) const; @@ -1007,95 +951,86 @@ public: const uint16_t kv_version, const uint index_id, const uint cf_id) const; - void delete_index_info(rocksdb::WriteBatch* batch, + void delete_index_info(rocksdb::WriteBatch *batch, const GL_INDEX_ID &index_id) const; bool get_index_info(const GL_INDEX_ID &gl_index_id, - uint16_t *index_dict_version, - uchar *index_type, uint16_t *kv_version) const; + uint16_t *index_dict_version, uchar *index_type, + uint16_t *kv_version) const; /* CF id => CF flags */ - void add_cf_flags(rocksdb::WriteBatch* const batch, - const uint &cf_id, + void add_cf_flags(rocksdb::WriteBatch *const batch, const uint &cf_id, const uint &cf_flags) const; - bool get_cf_flags(const uint &cf_id, uint* const cf_flags) const; + bool get_cf_flags(const uint &cf_id, uint *const cf_flags) const; /* Functions for fast CREATE/DROP TABLE/INDEX */ - void get_ongoing_index_operation(std::vector<GL_INDEX_ID>* gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const; - bool is_index_operation_ongoing(const GL_INDEX_ID& gl_index_id, + void + get_ongoing_index_operation(std::unordered_set<GL_INDEX_ID> *gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const; + bool is_index_operation_ongoing(const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const; - void start_ongoing_index_operation(rocksdb::WriteBatch* batch, - const GL_INDEX_ID& gl_index_id, + void start_ongoing_index_operation(rocksdb::WriteBatch *batch, + const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const; - void end_ongoing_index_operation(rocksdb::WriteBatch* const batch, - const GL_INDEX_ID& gl_index_id, + void end_ongoing_index_operation(rocksdb::WriteBatch *const batch, + const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const; bool is_drop_index_empty() const; - void add_drop_table(std::shared_ptr<Rdb_key_def>* const key_descr, + void add_drop_table(std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, - rocksdb::WriteBatch* const batch) const; - void add_drop_index(const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - rocksdb::WriteBatch* const batch) const; - void add_create_index(const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - rocksdb::WriteBatch* const batch) const; - void finish_indexes_operation( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const; + rocksdb::WriteBatch *const batch) const; + void add_drop_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + rocksdb::WriteBatch *const batch) const; + void add_create_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + rocksdb::WriteBatch *const batch) const; + void + finish_indexes_operation(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const; void rollback_ongoing_index_creation() const; - inline void - get_ongoing_drop_indexes(std::vector<GL_INDEX_ID>* gl_index_ids) const - { + inline void get_ongoing_drop_indexes( + std::unordered_set<GL_INDEX_ID> *gl_index_ids) const { get_ongoing_index_operation(gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } - inline void - get_ongoing_create_indexes(std::vector<GL_INDEX_ID>* gl_index_ids) const - { + inline void get_ongoing_create_indexes( + std::unordered_set<GL_INDEX_ID> *gl_index_ids) const { get_ongoing_index_operation(gl_index_ids, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } inline void start_drop_index(rocksdb::WriteBatch *wb, - const GL_INDEX_ID& gl_index_id) const - { + const GL_INDEX_ID &gl_index_id) const { start_ongoing_index_operation(wb, gl_index_id, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } inline void start_create_index(rocksdb::WriteBatch *wb, - const GL_INDEX_ID& gl_index_id) const - { + const GL_INDEX_ID &gl_index_id) const { start_ongoing_index_operation(wb, gl_index_id, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } inline void finish_drop_indexes( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids) const - { - finish_indexes_operation(gl_index_ids, - Rdb_key_def::DDL_DROP_INDEX_ONGOING); + const std::unordered_set<GL_INDEX_ID> &gl_index_ids) const { + finish_indexes_operation(gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } inline void finish_create_indexes( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids) const - { + const std::unordered_set<GL_INDEX_ID> &gl_index_ids) const { finish_indexes_operation(gl_index_ids, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } - inline bool is_drop_index_ongoing(const GL_INDEX_ID& gl_index_id) const - { + inline bool is_drop_index_ongoing(const GL_INDEX_ID &gl_index_id) const { return is_index_operation_ongoing(gl_index_id, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } - inline bool is_create_index_ongoing(const GL_INDEX_ID& gl_index_id) const - { + inline bool is_create_index_ongoing(const GL_INDEX_ID &gl_index_id) const { return is_index_operation_ongoing(gl_index_id, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } - bool get_max_index_id(uint32_t* const index_id) const; - bool update_max_index_id(rocksdb::WriteBatch* const batch, + bool get_max_index_id(uint32_t *const index_id) const; + bool update_max_index_id(rocksdb::WriteBatch *const batch, const uint32_t &index_id) const; - void add_stats(rocksdb::WriteBatch* const batch, - const std::vector<Rdb_index_stats>& stats) const; + void add_stats(rocksdb::WriteBatch *const batch, + const std::vector<Rdb_index_stats> &stats) const; Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc index c35116e10a9..35b0cf4899e 100644 --- a/storage/rocksdb/rdb_i_s.cc +++ b/storage/rocksdb/rdb_i_s.cc @@ -15,6 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* C++ standard header files */ +#include <map> #include <string> #include <vector> @@ -22,8 +23,8 @@ #include <sql_show.h> /* RocksDB header files */ -#include "rocksdb/convenience.h" #include "rocksdb/compaction_filter.h" +#include "rocksdb/convenience.h" #include "rocksdb/filter_policy.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" @@ -40,98 +41,84 @@ namespace myrocks { /** - Define the INFORMATION_SCHEMA (I_S) structures needed by MyRocks storage engine. + Define the INFORMATION_SCHEMA (I_S) structures needed by MyRocks storage + engine. */ -#define ROCKSDB_FIELD_INFO(_name_, _len_, _type_, _flag_) \ - { _name_, _len_, _type_, 0, _flag_, nullptr, 0 } +#define ROCKSDB_FIELD_INFO(_name_, _len_, _type_, _flag_) \ + { _name_, _len_, _type_, 0, _flag_, nullptr, 0 } -#define ROCKSDB_FIELD_INFO_END ROCKSDB_FIELD_INFO(nullptr, \ - 0, MYSQL_TYPE_NULL, 0) +#define ROCKSDB_FIELD_INFO_END \ + ROCKSDB_FIELD_INFO(nullptr, 0, MYSQL_TYPE_NULL, 0) /* Support for INFORMATION_SCHEMA.ROCKSDB_CFSTATS dynamic table */ -namespace RDB_CFSTATS_FIELD -{ - enum - { - CF_NAME= 0, - STAT_TYPE, - VALUE - }; -} // namespace RDB_CFSTATS_FIELD - -static ST_FIELD_INFO rdb_i_s_cfstats_fields_info[]= -{ - ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO_END -}; +namespace RDB_CFSTATS_FIELD { +enum { CF_NAME = 0, STAT_TYPE, VALUE }; +} // namespace RDB_CFSTATS_FIELD + +static ST_FIELD_INFO rdb_i_s_cfstats_fields_info[] = { + ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO_END}; static int rdb_i_s_cfstats_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + bool ret; uint64_t val; - DBUG_ENTER("rdb_i_s_cfstats_fill_table"); - const std::vector<std::pair<const std::string, std::string>> cf_properties = { - {rocksdb::DB::Properties::kNumImmutableMemTable, "NUM_IMMUTABLE_MEM_TABLE"}, - {rocksdb::DB::Properties::kMemTableFlushPending, - "MEM_TABLE_FLUSH_PENDING"}, - {rocksdb::DB::Properties::kCompactionPending, "COMPACTION_PENDING"}, - {rocksdb::DB::Properties::kCurSizeActiveMemTable, - "CUR_SIZE_ACTIVE_MEM_TABLE"}, - {rocksdb::DB::Properties::kCurSizeAllMemTables, "CUR_SIZE_ALL_MEM_TABLES"}, - {rocksdb::DB::Properties::kNumEntriesActiveMemTable, - "NUM_ENTRIES_ACTIVE_MEM_TABLE"}, - {rocksdb::DB::Properties::kNumEntriesImmMemTables, - "NUM_ENTRIES_IMM_MEM_TABLES"}, - {rocksdb::DB::Properties::kEstimateTableReadersMem, - "NON_BLOCK_CACHE_SST_MEM_USAGE"}, - {rocksdb::DB::Properties::kNumLiveVersions, "NUM_LIVE_VERSIONS"} - }; - - rocksdb::DB* const rdb= rdb_get_rocksdb_db(); - const Rdb_cf_manager& cf_manager= rdb_get_cf_manager(); + {rocksdb::DB::Properties::kNumImmutableMemTable, + "NUM_IMMUTABLE_MEM_TABLE"}, + {rocksdb::DB::Properties::kMemTableFlushPending, + "MEM_TABLE_FLUSH_PENDING"}, + {rocksdb::DB::Properties::kCompactionPending, "COMPACTION_PENDING"}, + {rocksdb::DB::Properties::kCurSizeActiveMemTable, + "CUR_SIZE_ACTIVE_MEM_TABLE"}, + {rocksdb::DB::Properties::kCurSizeAllMemTables, + "CUR_SIZE_ALL_MEM_TABLES"}, + {rocksdb::DB::Properties::kNumEntriesActiveMemTable, + "NUM_ENTRIES_ACTIVE_MEM_TABLE"}, + {rocksdb::DB::Properties::kNumEntriesImmMemTables, + "NUM_ENTRIES_IMM_MEM_TABLES"}, + {rocksdb::DB::Properties::kEstimateTableReadersMem, + "NON_BLOCK_CACHE_SST_MEM_USAGE"}, + {rocksdb::DB::Properties::kNumLiveVersions, "NUM_LIVE_VERSIONS"}}; + + rocksdb::DB *const rdb = rdb_get_rocksdb_db(); + const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); DBUG_ASSERT(rdb != nullptr); - for (const auto &cf_name : cf_manager.get_cf_names()) - { - rocksdb::ColumnFamilyHandle* cfh; + for (const auto &cf_name : cf_manager.get_cf_names()) { + rocksdb::ColumnFamilyHandle *cfh; bool is_automatic; /* Only the cf name is important. Whether it was generated automatically does not matter, so is_automatic is ignored. */ - cfh= cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic); + cfh = cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic); if (cfh == nullptr) continue; - for (const auto &property : cf_properties) - { + for (const auto &property : cf_properties) { if (!rdb->GetIntProperty(cfh, property.first, &val)) continue; DBUG_ASSERT(tables != nullptr); tables->table->field[RDB_CFSTATS_FIELD::CF_NAME]->store( - cf_name.c_str(), - cf_name.size(), - system_charset_info); + cf_name.c_str(), cf_name.size(), system_charset_info); tables->table->field[RDB_CFSTATS_FIELD::STAT_TYPE]->store( - property.second.c_str(), - property.second.size(), - system_charset_info); + property.second.c_str(), property.second.size(), system_charset_info); tables->table->field[RDB_CFSTATS_FIELD::VALUE]->store(val, true); - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); if (ret) DBUG_RETURN(ret); @@ -140,17 +127,17 @@ static int rdb_i_s_cfstats_fill_table( DBUG_RETURN(0); } -static int rdb_i_s_cfstats_init(void *p) -{ - my_core::ST_SCHEMA_TABLE *schema; +static int rdb_i_s_cfstats_init(void *p) { + DBUG_ENTER_FUNC(); - DBUG_ENTER("rdb_i_s_cfstats_init"); DBUG_ASSERT(p != nullptr); - schema= (my_core::ST_SCHEMA_TABLE*) p; + my_core::ST_SCHEMA_TABLE *schema; + + schema = (my_core::ST_SCHEMA_TABLE *)p; - schema->fields_info= rdb_i_s_cfstats_fields_info; - schema->fill_table= rdb_i_s_cfstats_fill_table; + schema->fields_info = rdb_i_s_cfstats_fields_info; + schema->fill_table = rdb_i_s_cfstats_fill_table; DBUG_RETURN(0); } @@ -158,56 +145,44 @@ static int rdb_i_s_cfstats_init(void *p) /* Support for INFORMATION_SCHEMA.ROCKSDB_DBSTATS dynamic table */ -namespace RDB_DBSTATS_FIELD -{ - enum - { - STAT_TYPE= 0, - VALUE - }; -} // namespace RDB_DBSTATS_FIELD - -static ST_FIELD_INFO rdb_i_s_dbstats_fields_info[]= -{ - ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO_END -}; +namespace RDB_DBSTATS_FIELD { +enum { STAT_TYPE = 0, VALUE }; +} // namespace RDB_DBSTATS_FIELD + +static ST_FIELD_INFO rdb_i_s_dbstats_fields_info[] = { + ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO_END}; static int rdb_i_s_dbstats_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + bool ret; uint64_t val; - DBUG_ENTER("rdb_i_s_dbstats_fill_table"); - const std::vector<std::pair<std::string, std::string>> db_properties = { - {rocksdb::DB::Properties::kBackgroundErrors, "DB_BACKGROUND_ERRORS"}, - {rocksdb::DB::Properties::kNumSnapshots, "DB_NUM_SNAPSHOTS"}, - {rocksdb::DB::Properties::kOldestSnapshotTime, "DB_OLDEST_SNAPSHOT_TIME"} - }; + {rocksdb::DB::Properties::kBackgroundErrors, "DB_BACKGROUND_ERRORS"}, + {rocksdb::DB::Properties::kNumSnapshots, "DB_NUM_SNAPSHOTS"}, + {rocksdb::DB::Properties::kOldestSnapshotTime, + "DB_OLDEST_SNAPSHOT_TIME"}}; - rocksdb::DB* const rdb= rdb_get_rocksdb_db(); - const rocksdb::BlockBasedTableOptions& table_options= - rdb_get_table_options(); + rocksdb::DB *const rdb = rdb_get_rocksdb_db(); + const rocksdb::BlockBasedTableOptions &table_options = + rdb_get_table_options(); - for (const auto &property : db_properties) - { + for (const auto &property : db_properties) { if (!rdb->GetIntProperty(property.first, &val)) continue; DBUG_ASSERT(tables != nullptr); tables->table->field[RDB_DBSTATS_FIELD::STAT_TYPE]->store( - property.second.c_str(), - property.second.size(), - system_charset_info); + property.second.c_str(), property.second.size(), system_charset_info); tables->table->field[RDB_DBSTATS_FIELD::VALUE]->store(val, true); - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); if (ret) DBUG_RETURN(ret); @@ -223,28 +198,27 @@ static int rdb_i_s_dbstats_fill_table( There is no interface to retrieve this block cache, nor fetch the usage information from the column family. */ - val= (table_options.block_cache ? table_options.block_cache->GetUsage() : 0); + val = (table_options.block_cache ? table_options.block_cache->GetUsage() : 0); tables->table->field[RDB_DBSTATS_FIELD::STAT_TYPE]->store( - STRING_WITH_LEN("DB_BLOCK_CACHE_USAGE"), system_charset_info); + STRING_WITH_LEN("DB_BLOCK_CACHE_USAGE"), system_charset_info); tables->table->field[RDB_DBSTATS_FIELD::VALUE]->store(val, true); - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); DBUG_RETURN(ret); } -static int rdb_i_s_dbstats_init(void* const p) -{ +static int rdb_i_s_dbstats_init(void *const p) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(p != nullptr); my_core::ST_SCHEMA_TABLE *schema; - DBUG_ENTER("rdb_i_s_dbstats_init"); - - schema= (my_core::ST_SCHEMA_TABLE*) p; + schema = (my_core::ST_SCHEMA_TABLE *)p; - schema->fields_info= rdb_i_s_dbstats_fields_info; - schema->fill_table= rdb_i_s_dbstats_fill_table; + schema->fields_info = rdb_i_s_dbstats_fields_info; + schema->fill_table = rdb_i_s_dbstats_fill_table; DBUG_RETURN(0); } @@ -252,46 +226,32 @@ static int rdb_i_s_dbstats_init(void* const p) /* Support for INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT dynamic table */ -namespace RDB_PERF_CONTEXT_FIELD -{ - enum - { - TABLE_SCHEMA= 0, - TABLE_NAME, - PARTITION_NAME, - STAT_TYPE, - VALUE - }; -} // namespace RDB_PERF_CONTEXT_FIELD - -static ST_FIELD_INFO rdb_i_s_perf_context_fields_info[]= -{ - ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("PARTITION_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, - MY_I_S_MAYBE_NULL), - ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, - 0), - ROCKSDB_FIELD_INFO_END -}; +namespace RDB_PERF_CONTEXT_FIELD { +enum { TABLE_SCHEMA = 0, TABLE_NAME, PARTITION_NAME, STAT_TYPE, VALUE }; +} // namespace RDB_PERF_CONTEXT_FIELD + +static ST_FIELD_INFO rdb_i_s_perf_context_fields_info[] = { + ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("PARTITION_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, + MY_I_S_MAYBE_NULL), + ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO_END}; static int rdb_i_s_perf_context_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); - int ret= 0; - Field** field= tables->table->field; - - DBUG_ENTER("rdb_i_s_perf_context_fill_table"); + int ret = 0; + Field **field = tables->table->field; - const std::vector<std::string> tablenames= rdb_get_open_table_names(); - for (const auto& it : tablenames) - { + const std::vector<std::string> tablenames = rdb_get_open_table_names(); + for (const auto &it : tablenames) { std::string str, dbname, tablename, partname; Rdb_perf_counters counters; @@ -299,42 +259,35 @@ static int rdb_i_s_perf_context_fill_table( return HA_ERR_INTERNAL_ERROR; } - if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) - { + if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) { continue; } - if (rdb_get_table_perf_counters(it.c_str(), &counters)) - { + if (rdb_get_table_perf_counters(it.c_str(), &counters)) { continue; } DBUG_ASSERT(field != nullptr); field[RDB_PERF_CONTEXT_FIELD::TABLE_SCHEMA]->store( - dbname.c_str(), dbname.size(), system_charset_info); + dbname.c_str(), dbname.size(), system_charset_info); field[RDB_PERF_CONTEXT_FIELD::TABLE_NAME]->store( - tablename.c_str(), tablename.size(), system_charset_info); - if (partname.size() == 0) - { + tablename.c_str(), tablename.size(), system_charset_info); + if (partname.size() == 0) { field[RDB_PERF_CONTEXT_FIELD::PARTITION_NAME]->set_null(); - } - else - { + } else { field[RDB_PERF_CONTEXT_FIELD::PARTITION_NAME]->set_notnull(); field[RDB_PERF_CONTEXT_FIELD::PARTITION_NAME]->store( - partname.c_str(), partname.size(), system_charset_info); + partname.c_str(), partname.size(), system_charset_info); } - for (int i= 0; i < PC_MAX_IDX; i++) - { + for (int i = 0; i < PC_MAX_IDX; i++) { field[RDB_PERF_CONTEXT_FIELD::STAT_TYPE]->store( - rdb_pc_stat_types[i].c_str(), - rdb_pc_stat_types[i].size(), - system_charset_info); + rdb_pc_stat_types[i].c_str(), rdb_pc_stat_types[i].size(), + system_charset_info); field[RDB_PERF_CONTEXT_FIELD::VALUE]->store(counters.m_value[i], true); - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); if (ret) DBUG_RETURN(ret); } @@ -343,18 +296,17 @@ static int rdb_i_s_perf_context_fill_table( DBUG_RETURN(0); } -static int rdb_i_s_perf_context_init(void* const p) -{ +static int rdb_i_s_perf_context_init(void *const p) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(p != nullptr); my_core::ST_SCHEMA_TABLE *schema; - DBUG_ENTER("rdb_i_s_perf_context_init"); - - schema= (my_core::ST_SCHEMA_TABLE*) p; + schema = (my_core::ST_SCHEMA_TABLE *)p; - schema->fields_info= rdb_i_s_perf_context_fields_info; - schema->fill_table= rdb_i_s_perf_context_fill_table; + schema->fields_info = rdb_i_s_perf_context_fields_info; + schema->fill_table = rdb_i_s_perf_context_fill_table; DBUG_RETURN(0); } @@ -362,49 +314,40 @@ static int rdb_i_s_perf_context_init(void* const p) /* Support for INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL dynamic table */ -namespace RDB_PERF_CONTEXT_GLOBAL_FIELD -{ - enum - { - STAT_TYPE= 0, - VALUE - }; -} // namespace RDB_PERF_CONTEXT_GLOBAL_FIELD - -static ST_FIELD_INFO rdb_i_s_perf_context_global_fields_info[]= -{ - ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO_END -}; +namespace RDB_PERF_CONTEXT_GLOBAL_FIELD { +enum { STAT_TYPE = 0, VALUE }; +} // namespace RDB_PERF_CONTEXT_GLOBAL_FIELD + +static ST_FIELD_INFO rdb_i_s_perf_context_global_fields_info[] = { + ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO_END}; static int rdb_i_s_perf_context_global_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); - int ret= 0; - DBUG_ENTER("rdb_i_s_perf_context_global_fill_table"); + int ret = 0; // Get a copy of the global perf counters. Rdb_perf_counters global_counters; rdb_get_global_perf_counters(&global_counters); - for (int i= 0; i < PC_MAX_IDX; i++) { + for (int i = 0; i < PC_MAX_IDX; i++) { DBUG_ASSERT(tables->table != nullptr); DBUG_ASSERT(tables->table->field != nullptr); tables->table->field[RDB_PERF_CONTEXT_GLOBAL_FIELD::STAT_TYPE]->store( - rdb_pc_stat_types[i].c_str(), - rdb_pc_stat_types[i].size(), - system_charset_info); + rdb_pc_stat_types[i].c_str(), rdb_pc_stat_types[i].size(), + system_charset_info); tables->table->field[RDB_PERF_CONTEXT_GLOBAL_FIELD::VALUE]->store( - global_counters.m_value[i], true); + global_counters.m_value[i], true); - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); if (ret) DBUG_RETURN(ret); } @@ -412,18 +355,17 @@ static int rdb_i_s_perf_context_global_fill_table( DBUG_RETURN(0); } -static int rdb_i_s_perf_context_global_init(void* const p) -{ +static int rdb_i_s_perf_context_global_init(void *const p) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(p != nullptr); my_core::ST_SCHEMA_TABLE *schema; - DBUG_ENTER("rdb_i_s_perf_context_global_init"); - - schema= (my_core::ST_SCHEMA_TABLE*) p; + schema = (my_core::ST_SCHEMA_TABLE *)p; - schema->fields_info= rdb_i_s_perf_context_global_fields_info; - schema->fill_table= rdb_i_s_perf_context_global_fill_table; + schema->fields_info = rdb_i_s_perf_context_global_fields_info; + schema->fill_table = rdb_i_s_perf_context_global_fill_table; DBUG_RETURN(0); } @@ -431,131 +373,124 @@ static int rdb_i_s_perf_context_global_init(void* const p) /* Support for INFORMATION_SCHEMA.ROCKSDB_CFOPTIONS dynamic table */ -namespace RDB_CFOPTIONS_FIELD -{ - enum - { - CF_NAME= 0, - OPTION_TYPE, - VALUE - }; -} // namespace RDB_CFOPTIONS_FIELD - -static ST_FIELD_INFO rdb_i_s_cfoptions_fields_info[] = -{ - ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("OPTION_TYPE", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("VALUE", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO_END -}; +namespace RDB_CFOPTIONS_FIELD { +enum { CF_NAME = 0, OPTION_TYPE, VALUE }; +} // namespace RDB_CFOPTIONS_FIELD + +static ST_FIELD_INFO rdb_i_s_cfoptions_fields_info[] = { + ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("OPTION_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("VALUE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO_END}; static int rdb_i_s_cfoptions_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); bool ret; - DBUG_ENTER("rdb_i_s_cfoptions_fill_table"); - - Rdb_cf_manager& cf_manager= rdb_get_cf_manager(); + Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); - for (const auto &cf_name : cf_manager.get_cf_names()) - { + for (const auto &cf_name : cf_manager.get_cf_names()) { std::string val; rocksdb::ColumnFamilyOptions opts; cf_manager.get_cf_options(cf_name, &opts); std::vector<std::pair<std::string, std::string>> cf_option_types = { - {"COMPARATOR", opts.comparator == nullptr ? "NULL" : - std::string(opts.comparator->Name())}, - {"MERGE_OPERATOR", opts.merge_operator == nullptr ? "NULL" : - std::string(opts.merge_operator->Name())}, - {"COMPACTION_FILTER", opts.compaction_filter == nullptr ? "NULL" : - std::string(opts.compaction_filter->Name())}, - {"COMPACTION_FILTER_FACTORY", - opts.compaction_filter_factory == nullptr ? "NULL" : - std::string(opts.compaction_filter_factory->Name())}, - {"WRITE_BUFFER_SIZE", std::to_string(opts.write_buffer_size)}, - {"MAX_WRITE_BUFFER_NUMBER", std::to_string(opts.max_write_buffer_number)}, - {"MIN_WRITE_BUFFER_NUMBER_TO_MERGE", - std::to_string(opts.min_write_buffer_number_to_merge)}, - {"NUM_LEVELS", std::to_string(opts.num_levels)}, - {"LEVEL0_FILE_NUM_COMPACTION_TRIGGER", - std::to_string(opts.level0_file_num_compaction_trigger)}, - {"LEVEL0_SLOWDOWN_WRITES_TRIGGER", - std::to_string(opts.level0_slowdown_writes_trigger)}, - {"LEVEL0_STOP_WRITES_TRIGGER", - std::to_string(opts.level0_stop_writes_trigger)}, - {"MAX_MEM_COMPACTION_LEVEL", std::to_string(opts.max_mem_compaction_level)}, - {"TARGET_FILE_SIZE_BASE", std::to_string(opts.target_file_size_base)}, - {"TARGET_FILE_SIZE_MULTIPLIER", std::to_string(opts.target_file_size_multiplier)}, - {"MAX_BYTES_FOR_LEVEL_BASE", std::to_string(opts.max_bytes_for_level_base)}, - {"LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES", - opts.level_compaction_dynamic_level_bytes ? "ON" : "OFF"}, - {"MAX_BYTES_FOR_LEVEL_MULTIPLIER", - std::to_string(opts.max_bytes_for_level_multiplier)}, - {"SOFT_RATE_LIMIT", std::to_string(opts.soft_rate_limit)}, - {"HARD_RATE_LIMIT", std::to_string(opts.hard_rate_limit)}, - {"RATE_LIMIT_DELAY_MAX_MILLISECONDS", - std::to_string(opts.rate_limit_delay_max_milliseconds)}, - {"ARENA_BLOCK_SIZE", std::to_string(opts.arena_block_size)}, - {"DISABLE_AUTO_COMPACTIONS", - opts.disable_auto_compactions ? "ON" : "OFF"}, - {"PURGE_REDUNDANT_KVS_WHILE_FLUSH", - opts.purge_redundant_kvs_while_flush ? "ON" : "OFF"}, - {"VERIFY_CHECKSUM_IN_COMPACTION", - opts.verify_checksums_in_compaction ? "ON" : "OFF"}, - {"MAX_SEQUENTIAL_SKIP_IN_ITERATIONS", - std::to_string(opts.max_sequential_skip_in_iterations)}, - {"MEMTABLE_FACTORY", - opts.memtable_factory == nullptr ? "NULL" : - opts.memtable_factory->Name()}, - {"INPLACE_UPDATE_SUPPORT", - opts.inplace_update_support ? "ON" : "OFF"}, - {"INPLACE_UPDATE_NUM_LOCKS", - opts.inplace_update_num_locks ? "ON" : "OFF"}, - {"MEMTABLE_PREFIX_BLOOM_BITS_RATIO", - std::to_string(opts.memtable_prefix_bloom_size_ratio)}, - {"MEMTABLE_PREFIX_BLOOM_HUGE_PAGE_TLB_SIZE", - std::to_string(opts.memtable_huge_page_size)}, - {"BLOOM_LOCALITY", std::to_string(opts.bloom_locality)}, - {"MAX_SUCCESSIVE_MERGES", - std::to_string(opts.max_successive_merges)}, - {"MIN_PARTIAL_MERGE_OPERANDS", - std::to_string(opts.min_partial_merge_operands)}, - {"OPTIMIZE_FILTERS_FOR_HITS", - (opts.optimize_filters_for_hits ? "ON" : "OFF")}, + {"COMPARATOR", opts.comparator == nullptr + ? "NULL" + : std::string(opts.comparator->Name())}, + {"MERGE_OPERATOR", opts.merge_operator == nullptr + ? "NULL" + : std::string(opts.merge_operator->Name())}, + {"COMPACTION_FILTER", + opts.compaction_filter == nullptr + ? "NULL" + : std::string(opts.compaction_filter->Name())}, + {"COMPACTION_FILTER_FACTORY", + opts.compaction_filter_factory == nullptr + ? "NULL" + : std::string(opts.compaction_filter_factory->Name())}, + {"WRITE_BUFFER_SIZE", std::to_string(opts.write_buffer_size)}, + {"MAX_WRITE_BUFFER_NUMBER", + std::to_string(opts.max_write_buffer_number)}, + {"MIN_WRITE_BUFFER_NUMBER_TO_MERGE", + std::to_string(opts.min_write_buffer_number_to_merge)}, + {"NUM_LEVELS", std::to_string(opts.num_levels)}, + {"LEVEL0_FILE_NUM_COMPACTION_TRIGGER", + std::to_string(opts.level0_file_num_compaction_trigger)}, + {"LEVEL0_SLOWDOWN_WRITES_TRIGGER", + std::to_string(opts.level0_slowdown_writes_trigger)}, + {"LEVEL0_STOP_WRITES_TRIGGER", + std::to_string(opts.level0_stop_writes_trigger)}, + {"MAX_MEM_COMPACTION_LEVEL", + std::to_string(opts.max_mem_compaction_level)}, + {"TARGET_FILE_SIZE_BASE", std::to_string(opts.target_file_size_base)}, + {"TARGET_FILE_SIZE_MULTIPLIER", + std::to_string(opts.target_file_size_multiplier)}, + {"MAX_BYTES_FOR_LEVEL_BASE", + std::to_string(opts.max_bytes_for_level_base)}, + {"LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES", + opts.level_compaction_dynamic_level_bytes ? "ON" : "OFF"}, + {"MAX_BYTES_FOR_LEVEL_MULTIPLIER", + std::to_string(opts.max_bytes_for_level_multiplier)}, + {"SOFT_RATE_LIMIT", std::to_string(opts.soft_rate_limit)}, + {"HARD_RATE_LIMIT", std::to_string(opts.hard_rate_limit)}, + {"RATE_LIMIT_DELAY_MAX_MILLISECONDS", + std::to_string(opts.rate_limit_delay_max_milliseconds)}, + {"ARENA_BLOCK_SIZE", std::to_string(opts.arena_block_size)}, + {"DISABLE_AUTO_COMPACTIONS", + opts.disable_auto_compactions ? "ON" : "OFF"}, + {"PURGE_REDUNDANT_KVS_WHILE_FLUSH", + opts.purge_redundant_kvs_while_flush ? "ON" : "OFF"}, + {"VERIFY_CHECKSUM_IN_COMPACTION", + opts.verify_checksums_in_compaction ? "ON" : "OFF"}, + {"MAX_SEQUENTIAL_SKIP_IN_ITERATIONS", + std::to_string(opts.max_sequential_skip_in_iterations)}, + {"MEMTABLE_FACTORY", opts.memtable_factory == nullptr + ? "NULL" + : opts.memtable_factory->Name()}, + {"INPLACE_UPDATE_SUPPORT", opts.inplace_update_support ? "ON" : "OFF"}, + {"INPLACE_UPDATE_NUM_LOCKS", + opts.inplace_update_num_locks ? "ON" : "OFF"}, + {"MEMTABLE_PREFIX_BLOOM_BITS_RATIO", + std::to_string(opts.memtable_prefix_bloom_size_ratio)}, + {"MEMTABLE_PREFIX_BLOOM_HUGE_PAGE_TLB_SIZE", + std::to_string(opts.memtable_huge_page_size)}, + {"BLOOM_LOCALITY", std::to_string(opts.bloom_locality)}, + {"MAX_SUCCESSIVE_MERGES", std::to_string(opts.max_successive_merges)}, + {"MIN_PARTIAL_MERGE_OPERANDS", + std::to_string(opts.min_partial_merge_operands)}, + {"OPTIMIZE_FILTERS_FOR_HITS", + (opts.optimize_filters_for_hits ? "ON" : "OFF")}, }; // get MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL option value val = opts.max_bytes_for_level_multiplier_additional.empty() ? "NULL" : ""; - for (const auto &level : opts.max_bytes_for_level_multiplier_additional) - { + for (const auto &level : opts.max_bytes_for_level_multiplier_additional) { val.append(std::to_string(level) + ":"); } val.pop_back(); - cf_option_types.push_back({"MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL", val}); + cf_option_types.push_back( + {"MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL", val}); // get COMPRESSION_TYPE option value GetStringFromCompressionType(&val, opts.compression); - if (val.empty()) - { + if (val.empty()) { val = "NULL"; } cf_option_types.push_back({"COMPRESSION_TYPE", val}); // get COMPRESSION_PER_LEVEL option value val = opts.compression_per_level.empty() ? "NULL" : ""; - for (const auto &compression_type : opts.compression_per_level) - { + for (const auto &compression_type : opts.compression_per_level) { std::string res; GetStringFromCompressionType(&res, compression_type); - if (!res.empty()) - { + if (!res.empty()) { val.append(res + ":"); } } @@ -569,35 +504,42 @@ static int rdb_i_s_cfoptions_fill_table( cf_option_types.push_back({"COMPRESSION_OPTS", val}); // bottommost_compression - if (opts.bottommost_compression) - { + if (opts.bottommost_compression) { std::string res; GetStringFromCompressionType(&res, opts.bottommost_compression); - if (!res.empty()) - { + if (!res.empty()) { cf_option_types.push_back({"BOTTOMMOST_COMPRESSION", res}); } } // get PREFIX_EXTRACTOR option - cf_option_types.push_back({"PREFIX_EXTRACTOR", - opts.prefix_extractor == nullptr ? "NULL" : - std::string(opts.prefix_extractor->Name())}); + cf_option_types.push_back( + {"PREFIX_EXTRACTOR", opts.prefix_extractor == nullptr + ? "NULL" + : std::string(opts.prefix_extractor->Name())}); // get COMPACTION_STYLE option - switch (opts.compaction_style) - { - case rocksdb::kCompactionStyleLevel: val = "kCompactionStyleLevel"; break; - case rocksdb::kCompactionStyleUniversal: val = "kCompactionStyleUniversal"; break; - case rocksdb:: kCompactionStyleFIFO: val = "kCompactionStyleFIFO"; break; - case rocksdb:: kCompactionStyleNone: val = "kCompactionStyleNone"; break; - default: val = "NULL"; + switch (opts.compaction_style) { + case rocksdb::kCompactionStyleLevel: + val = "kCompactionStyleLevel"; + break; + case rocksdb::kCompactionStyleUniversal: + val = "kCompactionStyleUniversal"; + break; + case rocksdb::kCompactionStyleFIFO: + val = "kCompactionStyleFIFO"; + break; + case rocksdb::kCompactionStyleNone: + val = "kCompactionStyleNone"; + break; + default: + val = "NULL"; } cf_option_types.push_back({"COMPACTION_STYLE", val}); // get COMPACTION_OPTIONS_UNIVERSAL related options const rocksdb::CompactionOptionsUniversal compac_opts = - opts.compaction_options_universal; + opts.compaction_options_universal; val = "{SIZE_RATIO="; val.append(std::to_string(compac_opts.size_ratio)); val.append("; MIN_MERGE_WIDTH="); @@ -609,105 +551,126 @@ static int rdb_i_s_cfoptions_fill_table( val.append("; COMPRESSION_SIZE_PERCENT="); val.append(std::to_string(compac_opts.compression_size_percent)); val.append("; STOP_STYLE="); - switch (compac_opts.stop_style) - { - case rocksdb::kCompactionStopStyleSimilarSize: - val.append("kCompactionStopStyleSimilarSize}"); break; - case rocksdb::kCompactionStopStyleTotalSize: - val.append("kCompactionStopStyleTotalSize}"); break; - default: val.append("}"); + switch (compac_opts.stop_style) { + case rocksdb::kCompactionStopStyleSimilarSize: + val.append("kCompactionStopStyleSimilarSize}"); + break; + case rocksdb::kCompactionStopStyleTotalSize: + val.append("kCompactionStopStyleTotalSize}"); + break; + default: + val.append("}"); } cf_option_types.push_back({"COMPACTION_OPTIONS_UNIVERSAL", val}); // get COMPACTION_OPTION_FIFO option - cf_option_types.push_back({"COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE", - std::to_string(opts.compaction_options_fifo.max_table_files_size)}); + cf_option_types.push_back( + {"COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE", + std::to_string(opts.compaction_options_fifo.max_table_files_size)}); // get block-based table related options - const rocksdb::BlockBasedTableOptions& table_options= rdb_get_table_options(); + const rocksdb::BlockBasedTableOptions &table_options = + rdb_get_table_options(); // get BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS option cf_option_types.push_back( {"BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS", - table_options.cache_index_and_filter_blocks ? "1" : "0"}); + table_options.cache_index_and_filter_blocks ? "1" : "0"}); // get BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE option value - switch (table_options.index_type) - { - case rocksdb::BlockBasedTableOptions::kBinarySearch: val = "kBinarySearch"; break; - case rocksdb::BlockBasedTableOptions::kHashSearch: val = "kHashSearch"; break; - default: val = "NULL"; + switch (table_options.index_type) { + case rocksdb::BlockBasedTableOptions::kBinarySearch: + val = "kBinarySearch"; + break; + case rocksdb::BlockBasedTableOptions::kHashSearch: + val = "kHashSearch"; + break; + default: + val = "NULL"; } cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE", val}); // get BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION option value - cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION", - table_options.hash_index_allow_collision ? "ON" : "OFF"}); + cf_option_types.push_back( + {"BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION", + table_options.hash_index_allow_collision ? "ON" : "OFF"}); // get BLOCK_BASED_TABLE_FACTORY::CHECKSUM option value - switch (table_options.checksum) - { - case rocksdb::kNoChecksum: val = "kNoChecksum"; break; - case rocksdb::kCRC32c: val = "kCRC32c"; break; - case rocksdb::kxxHash: val = "kxxHash"; break; - default: val = "NULL"; + switch (table_options.checksum) { + case rocksdb::kNoChecksum: + val = "kNoChecksum"; + break; + case rocksdb::kCRC32c: + val = "kCRC32c"; + break; + case rocksdb::kxxHash: + val = "kxxHash"; + break; + default: + val = "NULL"; } cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::CHECKSUM", val}); // get BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE option value cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE", - table_options.no_block_cache ? "ON" : "OFF"}); + table_options.no_block_cache ? "ON" : "OFF"}); // get BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY option - cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY", - table_options.filter_policy == nullptr ? "NULL" : - std::string(table_options.filter_policy->Name())}); + cf_option_types.push_back( + {"BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY", + table_options.filter_policy == nullptr + ? "NULL" + : std::string(table_options.filter_policy->Name())}); // get BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING option cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING", - table_options.whole_key_filtering ? "1" : "0"}); + table_options.whole_key_filtering ? "1" : "0"}); // get BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE option - cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE", - table_options.block_cache == nullptr ? "NULL" : - std::to_string(table_options.block_cache->GetUsage())}); + cf_option_types.push_back( + {"BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE", + table_options.block_cache == nullptr + ? "NULL" + : std::to_string(table_options.block_cache->GetUsage())}); // get BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED option - cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED", - table_options.block_cache_compressed == nullptr ? "NULL" : - std::to_string(table_options.block_cache_compressed->GetUsage())}); + cf_option_types.push_back( + {"BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED", + table_options.block_cache_compressed == nullptr + ? "NULL" + : std::to_string( + table_options.block_cache_compressed->GetUsage())}); // get BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE option cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE", - std::to_string(table_options.block_size)}); + std::to_string(table_options.block_size)}); // get BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION option - cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION", - std::to_string(table_options.block_size_deviation)}); + cf_option_types.push_back( + {"BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION", + std::to_string(table_options.block_size_deviation)}); // get BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL option - cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL", - std::to_string(table_options.block_restart_interval)}); + cf_option_types.push_back( + {"BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL", + std::to_string(table_options.block_restart_interval)}); // get BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION option cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION", - std::to_string(table_options.format_version)}); + std::to_string(table_options.format_version)}); - for (const auto &cf_option_type : cf_option_types) - { + for (const auto &cf_option_type : cf_option_types) { DBUG_ASSERT(tables->table != nullptr); DBUG_ASSERT(tables->table->field != nullptr); tables->table->field[RDB_CFOPTIONS_FIELD::CF_NAME]->store( - cf_name.c_str(), cf_name.size(), system_charset_info); + cf_name.c_str(), cf_name.size(), system_charset_info); tables->table->field[RDB_CFOPTIONS_FIELD::OPTION_TYPE]->store( - cf_option_type.first.c_str(), - cf_option_type.first.size(), - system_charset_info); + cf_option_type.first.c_str(), cf_option_type.first.size(), + system_charset_info); tables->table->field[RDB_CFOPTIONS_FIELD::VALUE]->store( - cf_option_type.second.c_str(), - cf_option_type.second.size(), - system_charset_info); + cf_option_type.second.c_str(), cf_option_type.second.size(), + system_charset_info); ret = my_core::schema_table_store_record(thd, tables->table); @@ -721,35 +684,26 @@ static int rdb_i_s_cfoptions_fill_table( /* Support for INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO dynamic table */ -namespace RDB_GLOBAL_INFO_FIELD -{ - enum - { - TYPE= 0, - NAME, - VALUE - }; +namespace RDB_GLOBAL_INFO_FIELD { +enum { TYPE = 0, NAME, VALUE }; } -static ST_FIELD_INFO rdb_i_s_global_info_fields_info[] = -{ - ROCKSDB_FIELD_INFO("TYPE", FN_REFLEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("NAME", FN_REFLEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("VALUE", FN_REFLEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO_END -}; +static ST_FIELD_INFO rdb_i_s_global_info_fields_info[] = { + ROCKSDB_FIELD_INFO("TYPE", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("NAME", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("VALUE", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO_END}; /* * helper function for rdb_i_s_global_info_fill_table * to insert (TYPE, KEY, VALUE) rows into * information_schema.rocksdb_global_info */ -static int rdb_global_info_fill_row(my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - const char* const type, - const char* const name, - const char* const value) -{ +static int rdb_global_info_fill_row(my_core::THD *const thd, + my_core::TABLE_LIST *const tables, + const char *const type, + const char *const name, + const char *const value) { DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); DBUG_ASSERT(tables->table != nullptr); @@ -757,56 +711,55 @@ static int rdb_global_info_fill_row(my_core::THD* const thd, DBUG_ASSERT(name != nullptr); DBUG_ASSERT(value != nullptr); - Field **field= tables->table->field; + Field **field = tables->table->field; DBUG_ASSERT(field != nullptr); - field[RDB_GLOBAL_INFO_FIELD::TYPE]->store( - type, strlen(type), system_charset_info); - field[RDB_GLOBAL_INFO_FIELD::NAME]->store( - name, strlen(name), system_charset_info); - field[RDB_GLOBAL_INFO_FIELD::VALUE]->store( - value, strlen(value), system_charset_info); + field[RDB_GLOBAL_INFO_FIELD::TYPE]->store(type, strlen(type), + system_charset_info); + field[RDB_GLOBAL_INFO_FIELD::NAME]->store(name, strlen(name), + system_charset_info); + field[RDB_GLOBAL_INFO_FIELD::VALUE]->store(value, strlen(value), + system_charset_info); return my_core::schema_table_store_record(thd, tables->table); } static int rdb_i_s_global_info_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); - DBUG_ENTER("rdb_i_s_global_info_fill_table"); static const uint32_t INT_BUF_LEN = 21; static const uint32_t GTID_BUF_LEN = 60; static const uint32_t CF_ID_INDEX_BUF_LEN = 60; - int ret= 0; + int ret = 0; /* binlog info */ - Rdb_binlog_manager* const blm= rdb_get_binlog_manager(); + Rdb_binlog_manager *const blm = rdb_get_binlog_manager(); DBUG_ASSERT(blm != nullptr); - char file_buf[FN_REFLEN+1]= {0}; + char file_buf[FN_REFLEN + 1] = {0}; my_off_t pos = 0; - char pos_buf[INT_BUF_LEN]= {0}; - char gtid_buf[GTID_BUF_LEN]= {0}; + char pos_buf[INT_BUF_LEN] = {0}; + char gtid_buf[GTID_BUF_LEN] = {0}; if (blm->read(file_buf, &pos, gtid_buf)) { - snprintf(pos_buf, INT_BUF_LEN, "%lu", (uint64_t) pos); + snprintf(pos_buf, INT_BUF_LEN, "%lu", (uint64_t)pos); ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "FILE", file_buf); ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "POS", pos_buf); ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "GTID", gtid_buf); } /* max index info */ - const Rdb_dict_manager* const dict_manager= rdb_get_dict_manager(); + const Rdb_dict_manager *const dict_manager = rdb_get_dict_manager(); DBUG_ASSERT(dict_manager != nullptr); uint32_t max_index_id; - char max_index_id_buf[INT_BUF_LEN]= {0}; + char max_index_id_buf[INT_BUF_LEN] = {0}; if (dict_manager->get_max_index_id(&max_index_id)) { snprintf(max_index_id_buf, INT_BUF_LEN, "%u", max_index_id); @@ -815,32 +768,32 @@ static int rdb_i_s_global_info_fill_table( } /* cf_id -> cf_flags */ - char cf_id_buf[INT_BUF_LEN]= {0}; - char cf_value_buf[FN_REFLEN+1] = {0}; - const Rdb_cf_manager& cf_manager= rdb_get_cf_manager(); + char cf_id_buf[INT_BUF_LEN] = {0}; + char cf_value_buf[FN_REFLEN + 1] = {0}; + const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); for (const auto &cf_handle : cf_manager.get_all_cf()) { uint flags; dict_manager->get_cf_flags(cf_handle->GetID(), &flags); snprintf(cf_id_buf, INT_BUF_LEN, "%u", cf_handle->GetID()); snprintf(cf_value_buf, FN_REFLEN, "%s [%u]", cf_handle->GetName().c_str(), - flags); + flags); ret |= rdb_global_info_fill_row(thd, tables, "CF_FLAGS", cf_id_buf, - cf_value_buf); + cf_value_buf); if (ret) break; } /* DDL_DROP_INDEX_ONGOING */ - std::vector<GL_INDEX_ID> gl_index_ids; - dict_manager->get_ongoing_index_operation(&gl_index_ids, - Rdb_key_def::DDL_DROP_INDEX_ONGOING); - char cf_id_index_buf[CF_ID_INDEX_BUF_LEN]= {0}; + std::unordered_set<GL_INDEX_ID> gl_index_ids; + dict_manager->get_ongoing_index_operation( + &gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING); + char cf_id_index_buf[CF_ID_INDEX_BUF_LEN] = {0}; for (auto gl_index_id : gl_index_ids) { snprintf(cf_id_index_buf, CF_ID_INDEX_BUF_LEN, "cf_id:%u,index_id:%u", - gl_index_id.cf_id, gl_index_id.index_id); + gl_index_id.cf_id, gl_index_id.index_id); ret |= rdb_global_info_fill_row(thd, tables, "DDL_DROP_INDEX_ONGOING", - cf_id_index_buf, ""); + cf_id_index_buf, ""); if (ret) break; @@ -849,177 +802,242 @@ static int rdb_i_s_global_info_fill_table( DBUG_RETURN(ret); } +/* + Support for INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS dynamic table + */ +static int rdb_i_s_compact_stats_fill_table( + my_core::THD *thd, my_core::TABLE_LIST *tables, + my_core::Item *cond MY_ATTRIBUTE((__unused__))) { + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(tables != nullptr); -namespace // anonymous namespace = not visible outside this source file -{ -struct Rdb_ddl_scanner : public Rdb_tables_scanner + DBUG_ENTER("rdb_i_s_global_compact_stats_table"); + + int ret = 0; + + rocksdb::DB *rdb = rdb_get_rocksdb_db(); + Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); + DBUG_ASSERT(rdb != nullptr); + + for (auto cf_name : cf_manager.get_cf_names()) { + rocksdb::ColumnFamilyHandle *cfh; + bool is_automatic; + /* + Only the cf name is important. Whether it was generated automatically + does not matter, so is_automatic is ignored. + */ + cfh = cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic); + if (cfh == nullptr) { + continue; + } + std::map<std::string, double> props; + bool bool_ret MY_ATTRIBUTE((__unused__)); + bool_ret = rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props); + DBUG_ASSERT(bool_ret); + + for (auto const &prop_ent : props) { + std::string prop_name = prop_ent.first; + double value = prop_ent.second; + std::size_t del_pos = prop_name.find('.'); + DBUG_ASSERT(del_pos != std::string::npos); + std::string level_str = prop_name.substr(0, del_pos); + std::string type_str = prop_name.substr(del_pos + 1); + + Field **field = tables->table->field; + DBUG_ASSERT(field != nullptr); + field[0]->store(cf_name.c_str(), cf_name.size(), system_charset_info); + field[1]->store(level_str.c_str(), level_str.size(), system_charset_info); + field[2]->store(type_str.c_str(), type_str.size(), system_charset_info); + field[3]->store(value, true); + + ret |= my_core::schema_table_store_record(thd, tables->table); + if (ret != 0) { + DBUG_RETURN(ret); + } + } + } + + DBUG_RETURN(ret); +} + +static ST_FIELD_INFO rdb_i_s_compact_stats_fields_info[] = { + ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("LEVEL", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("TYPE", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("VALUE", sizeof(double), MYSQL_TYPE_DOUBLE, 0), + ROCKSDB_FIELD_INFO_END}; + +namespace // anonymous namespace = not visible outside this source file { - my_core::THD *m_thd; +struct Rdb_ddl_scanner : public Rdb_tables_scanner { + my_core::THD *m_thd; my_core::TABLE *m_table; - int add_table(Rdb_tbl_def* tdef) override; + int add_table(Rdb_tbl_def *tdef) override; }; -} // anonymous namespace +} // anonymous namespace /* Support for INFORMATION_SCHEMA.ROCKSDB_DDL dynamic table */ -namespace RDB_DDL_FIELD -{ - enum - { - TABLE_SCHEMA= 0, - TABLE_NAME, - PARTITION_NAME, - INDEX_NAME, - COLUMN_FAMILY, - INDEX_NUMBER, - INDEX_TYPE, - KV_FORMAT_VERSION, - CF - }; -} // namespace RDB_DDL_FIELD - -static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = -{ - ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("PARTITION_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, - MY_I_S_MAYBE_NULL), - ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("COLUMN_FAMILY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("INDEX_NUMBER", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("INDEX_TYPE", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0), - ROCKSDB_FIELD_INFO("KV_FORMAT_VERSION", sizeof(uint16_t), - MYSQL_TYPE_SHORT, 0), - ROCKSDB_FIELD_INFO("CF", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO_END +namespace RDB_DDL_FIELD { +enum { + TABLE_SCHEMA = 0, + TABLE_NAME, + PARTITION_NAME, + INDEX_NAME, + COLUMN_FAMILY, + INDEX_NUMBER, + INDEX_TYPE, + KV_FORMAT_VERSION, + CF }; - -int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) -{ +} // namespace RDB_DDL_FIELD + +static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = { + ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("PARTITION_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, + MY_I_S_MAYBE_NULL), + ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("COLUMN_FAMILY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("INDEX_NUMBER", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("INDEX_TYPE", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0), + ROCKSDB_FIELD_INFO("KV_FORMAT_VERSION", sizeof(uint16_t), MYSQL_TYPE_SHORT, + 0), + ROCKSDB_FIELD_INFO("CF", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO_END}; + +int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { DBUG_ASSERT(tdef != nullptr); - int ret= 0; + int ret = 0; DBUG_ASSERT(m_table != nullptr); - Field** field= m_table->field; + Field **field = m_table->field; DBUG_ASSERT(field != nullptr); - const std::string& dbname= tdef->base_dbname(); - field[RDB_DDL_FIELD::TABLE_SCHEMA]->store( - dbname.c_str(), dbname.size(), system_charset_info); + const std::string &dbname = tdef->base_dbname(); + field[RDB_DDL_FIELD::TABLE_SCHEMA]->store(dbname.c_str(), dbname.size(), + system_charset_info); - const std::string& tablename= tdef->base_tablename(); - field[RDB_DDL_FIELD::TABLE_NAME]->store( - tablename.c_str(), tablename.size(), system_charset_info); + const std::string &tablename = tdef->base_tablename(); + field[RDB_DDL_FIELD::TABLE_NAME]->store(tablename.c_str(), tablename.size(), + system_charset_info); - const std::string& partname= tdef->base_partition(); - if (partname.length() == 0) - { + const std::string &partname = tdef->base_partition(); + if (partname.length() == 0) { field[RDB_DDL_FIELD::PARTITION_NAME]->set_null(); - } - else - { + } else { field[RDB_DDL_FIELD::PARTITION_NAME]->set_notnull(); field[RDB_DDL_FIELD::PARTITION_NAME]->store( - partname.c_str(), partname.size(), system_charset_info); + partname.c_str(), partname.size(), system_charset_info); } - for (uint i= 0; i < tdef->m_key_count; i++) - { - const Rdb_key_def& kd= *tdef->m_key_descr_arr[i]; + for (uint i = 0; i < tdef->m_key_count; i++) { + const Rdb_key_def &kd = *tdef->m_key_descr_arr[i]; - field[RDB_DDL_FIELD::INDEX_NAME]->store( - kd.m_name.c_str(), kd.m_name.size(), system_charset_info); + field[RDB_DDL_FIELD::INDEX_NAME]->store(kd.m_name.c_str(), kd.m_name.size(), + system_charset_info); GL_INDEX_ID gl_index_id = kd.get_gl_index_id(); field[RDB_DDL_FIELD::COLUMN_FAMILY]->store(gl_index_id.cf_id, true); field[RDB_DDL_FIELD::INDEX_NUMBER]->store(gl_index_id.index_id, true); field[RDB_DDL_FIELD::INDEX_TYPE]->store(kd.m_index_type, true); - field[RDB_DDL_FIELD::KV_FORMAT_VERSION]->store( - kd.m_kv_format_version, true); + field[RDB_DDL_FIELD::KV_FORMAT_VERSION]->store(kd.m_kv_format_version, + true); - std::string cf_name= kd.get_cf()->GetName(); - field[RDB_DDL_FIELD::CF]->store( - cf_name.c_str(), cf_name.size(), system_charset_info); + std::string cf_name = kd.get_cf()->GetName(); + field[RDB_DDL_FIELD::CF]->store(cf_name.c_str(), cf_name.size(), + system_charset_info); - ret= my_core::schema_table_store_record(m_thd, m_table); + ret = my_core::schema_table_store_record(m_thd, m_table); if (ret) return ret; } - return 0; + return HA_EXIT_SUCCESS; } -static int rdb_i_s_ddl_fill_table(my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond) -{ - DBUG_ENTER("rdb_i_s_ddl_fill_table"); +static int rdb_i_s_ddl_fill_table(my_core::THD *const thd, + my_core::TABLE_LIST *const tables, + my_core::Item *const cond) { + DBUG_ENTER_FUNC(); DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); Rdb_ddl_scanner ddl_arg; - ddl_arg.m_thd= thd; - ddl_arg.m_table= tables->table; + ddl_arg.m_thd = thd; + ddl_arg.m_table = tables->table; - Rdb_ddl_manager *ddl_manager= rdb_get_ddl_manager(); + Rdb_ddl_manager *ddl_manager = rdb_get_ddl_manager(); DBUG_ASSERT(ddl_manager != nullptr); - int ret= ddl_manager->scan_for_tables(&ddl_arg); + int ret = ddl_manager->scan_for_tables(&ddl_arg); DBUG_RETURN(ret); } -static int rdb_i_s_ddl_init(void* const p) -{ +static int rdb_i_s_ddl_init(void *const p) { + DBUG_ENTER_FUNC(); + my_core::ST_SCHEMA_TABLE *schema; - DBUG_ENTER("rdb_i_s_ddl_init"); DBUG_ASSERT(p != nullptr); - schema= (my_core::ST_SCHEMA_TABLE*) p; + schema = (my_core::ST_SCHEMA_TABLE *)p; - schema->fields_info= rdb_i_s_ddl_fields_info; - schema->fill_table= rdb_i_s_ddl_fill_table; + schema->fields_info = rdb_i_s_ddl_fields_info; + schema->fill_table = rdb_i_s_ddl_fill_table; DBUG_RETURN(0); } -static int rdb_i_s_cfoptions_init(void* const p) -{ +static int rdb_i_s_cfoptions_init(void *const p) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(p != nullptr); + my_core::ST_SCHEMA_TABLE *schema; - DBUG_ENTER("rdb_i_s_cfoptions_init"); + schema = (my_core::ST_SCHEMA_TABLE *)p; + + schema->fields_info = rdb_i_s_cfoptions_fields_info; + schema->fill_table = rdb_i_s_cfoptions_fill_table; + + DBUG_RETURN(0); +} + +static int rdb_i_s_global_info_init(void *const p) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(p != nullptr); - schema= (my_core::ST_SCHEMA_TABLE*) p; + my_core::ST_SCHEMA_TABLE *schema; + + schema = reinterpret_cast<my_core::ST_SCHEMA_TABLE *>(p); - schema->fields_info= rdb_i_s_cfoptions_fields_info; - schema->fill_table= rdb_i_s_cfoptions_fill_table; + schema->fields_info = rdb_i_s_global_info_fields_info; + schema->fill_table = rdb_i_s_global_info_fill_table; DBUG_RETURN(0); } -static int rdb_i_s_global_info_init(void* const p) -{ +static int rdb_i_s_compact_stats_init(void *p) { my_core::ST_SCHEMA_TABLE *schema; - DBUG_ENTER("rdb_i_s_global_info_init"); + DBUG_ENTER("rdb_i_s_compact_stats_init"); DBUG_ASSERT(p != nullptr); - schema= reinterpret_cast<my_core::ST_SCHEMA_TABLE*>(p); + schema = reinterpret_cast<my_core::ST_SCHEMA_TABLE *>(p); - schema->fields_info= rdb_i_s_global_info_fields_info; - schema->fill_table= rdb_i_s_global_info_fill_table; + schema->fields_info = rdb_i_s_compact_stats_fields_info; + schema->fill_table = rdb_i_s_compact_stats_fill_table; DBUG_RETURN(0); } /* Given a path to a file return just the filename portion. */ -static std::string rdb_filename_without_path( - const std::string& path) -{ +static std::string rdb_filename_without_path(const std::string &path) { /* Find last slash in path */ const size_t pos = path.rfind('/'); @@ -1035,70 +1053,68 @@ static std::string rdb_filename_without_path( /* Support for INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP dynamic table */ -namespace RDB_INDEX_FILE_MAP_FIELD -{ - enum - { - COLUMN_FAMILY= 0, - INDEX_NUMBER, - SST_NAME, - NUM_ROWS, - DATA_SIZE, - ENTRY_DELETES, - ENTRY_SINGLEDELETES, - ENTRY_MERGES, - ENTRY_OTHERS - }; -} // namespace RDB_INDEX_FILE_MAP_FIELD - -static ST_FIELD_INFO rdb_i_s_index_file_map_fields_info[] = -{ - /* The information_schema.rocksdb_index_file_map virtual table has four - * fields: - * COLUMN_FAMILY => the index's column family contained in the SST file - * INDEX_NUMBER => the index id contained in the SST file - * SST_NAME => the name of the SST file containing some indexes - * NUM_ROWS => the number of entries of this index id in this SST file - * DATA_SIZE => the data size stored in this SST file for this index id */ - ROCKSDB_FIELD_INFO("COLUMN_FAMILY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("INDEX_NUMBER", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("SST_NAME", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("NUM_ROWS", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("DATA_SIZE", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("ENTRY_DELETES", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("ENTRY_SINGLEDELETES", sizeof(int64_t), - MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("ENTRY_MERGES", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("ENTRY_OTHERS", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO_END +namespace RDB_INDEX_FILE_MAP_FIELD { +enum { + COLUMN_FAMILY = 0, + INDEX_NUMBER, + SST_NAME, + NUM_ROWS, + DATA_SIZE, + ENTRY_DELETES, + ENTRY_SINGLEDELETES, + ENTRY_MERGES, + ENTRY_OTHERS, + DISTINCT_KEYS_PREFIX }; +} // namespace RDB_INDEX_FILE_MAP_FIELD + +static ST_FIELD_INFO rdb_i_s_index_file_map_fields_info[] = { + /* The information_schema.rocksdb_index_file_map virtual table has four + * fields: + * COLUMN_FAMILY => the index's column family contained in the SST file + * INDEX_NUMBER => the index id contained in the SST file + * SST_NAME => the name of the SST file containing some indexes + * NUM_ROWS => the number of entries of this index id in this SST file + * DATA_SIZE => the data size stored in this SST file for this index id */ + ROCKSDB_FIELD_INFO("COLUMN_FAMILY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("INDEX_NUMBER", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("SST_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("NUM_ROWS", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("DATA_SIZE", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("ENTRY_DELETES", sizeof(int64_t), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO("ENTRY_SINGLEDELETES", sizeof(int64_t), + MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("ENTRY_MERGES", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("ENTRY_OTHERS", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("DISTINCT_KEYS_PREFIX", MAX_REF_PARTS * 25, + MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO_END}; /* Fill the information_schema.rocksdb_index_file_map virtual table */ static int rdb_i_s_index_file_map_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); DBUG_ASSERT(tables->table != nullptr); - int ret = 0; + int ret = 0; Field **field = tables->table->field; DBUG_ASSERT(field != nullptr); - DBUG_ENTER("rdb_i_s_index_file_map_fill_table"); - /* Iterate over all the column families */ - rocksdb::DB* const rdb= rdb_get_rocksdb_db(); + rocksdb::DB *const rdb = rdb_get_rocksdb_db(); DBUG_ASSERT(rdb != nullptr); - const Rdb_cf_manager& cf_manager= rdb_get_cf_manager(); + const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); for (const auto &cf_handle : cf_manager.get_all_cf()) { /* Grab the the properties of all the tables in the column family */ rocksdb::TablePropertiesCollection table_props_collection; - const rocksdb::Status s = rdb->GetPropertiesOfAllTables(cf_handle, - &table_props_collection); + const rocksdb::Status s = + rdb->GetPropertiesOfAllTables(cf_handle, &table_props_collection); if (!s.ok()) { continue; } @@ -1109,7 +1125,7 @@ static int rdb_i_s_index_file_map_fill_table( /* Add the SST name into the output */ const std::string sst_name = rdb_filename_without_path(props.first); field[RDB_INDEX_FILE_MAP_FIELD::SST_NAME]->store( - sst_name.data(), sst_name.size(), system_charset_info); + sst_name.data(), sst_name.size(), system_charset_info); /* Get the __indexstats__ data out of the table property */ std::vector<Rdb_index_stats> stats; @@ -1123,28 +1139,41 @@ static int rdb_i_s_index_file_map_fill_table( field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_SINGLEDELETES]->store(-1, true); field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_MERGES]->store(-1, true); field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_OTHERS]->store(-1, true); - } - else { + } else { for (auto it : stats) { - /* Add the index number, the number of rows, and data size to the output */ + /* Add the index number, the number of rows, and data size to the + * output */ field[RDB_INDEX_FILE_MAP_FIELD::COLUMN_FAMILY]->store( - it.m_gl_index_id.cf_id, true); + it.m_gl_index_id.cf_id, true); field[RDB_INDEX_FILE_MAP_FIELD::INDEX_NUMBER]->store( - it.m_gl_index_id.index_id, true); + it.m_gl_index_id.index_id, true); field[RDB_INDEX_FILE_MAP_FIELD::NUM_ROWS]->store(it.m_rows, true); - field[RDB_INDEX_FILE_MAP_FIELD::DATA_SIZE]->store( - it.m_data_size, true); + field[RDB_INDEX_FILE_MAP_FIELD::DATA_SIZE]->store(it.m_data_size, + true); field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_DELETES]->store( - it.m_entry_deletes, true); + it.m_entry_deletes, true); field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_SINGLEDELETES]->store( - it.m_entry_single_deletes, true); + it.m_entry_single_deletes, true); field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_MERGES]->store( - it.m_entry_merges, true); + it.m_entry_merges, true); field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_OTHERS]->store( - it.m_entry_others, true); + it.m_entry_others, true); + std::string distinct_keys_prefix; + + for (size_t i = 0; i < it.m_distinct_keys_per_prefix.size(); i++) { + if (i > 0) { + distinct_keys_prefix += ","; + } + distinct_keys_prefix += + std::to_string(it.m_distinct_keys_per_prefix[i]); + } + + field[RDB_INDEX_FILE_MAP_FIELD::DISTINCT_KEYS_PREFIX]->store( + distinct_keys_prefix.data(), distinct_keys_prefix.size(), + system_charset_info); /* Tell MySQL about this row in the virtual table */ - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); if (ret != 0) { break; } @@ -1157,17 +1186,17 @@ static int rdb_i_s_index_file_map_fill_table( } /* Initialize the information_schema.rocksdb_index_file_map virtual table */ -static int rdb_i_s_index_file_map_init(void* const p) -{ - my_core::ST_SCHEMA_TABLE *schema; +static int rdb_i_s_index_file_map_init(void *const p) { + DBUG_ENTER_FUNC(); - DBUG_ENTER("rdb_i_s_index_file_map_init"); DBUG_ASSERT(p != nullptr); - schema= (my_core::ST_SCHEMA_TABLE*) p; + my_core::ST_SCHEMA_TABLE *schema; - schema->fields_info= rdb_i_s_index_file_map_fields_info; - schema->fill_table= rdb_i_s_index_file_map_fill_table; + schema = (my_core::ST_SCHEMA_TABLE *)p; + + schema->fields_info = rdb_i_s_index_file_map_fields_info; + schema->fill_table = rdb_i_s_index_file_map_fill_table; DBUG_RETURN(0); } @@ -1175,67 +1204,55 @@ static int rdb_i_s_index_file_map_init(void* const p) /* Support for INFORMATION_SCHEMA.ROCKSDB_LOCKS dynamic table */ -namespace RDB_LOCKS_FIELD -{ - enum - { - COLUMN_FAMILY_ID= 0, - TRANSACTION_ID, - KEY, - MODE - }; -} // namespace RDB_LOCKS_FIELD - -static ST_FIELD_INFO rdb_i_s_lock_info_fields_info[] = -{ - ROCKSDB_FIELD_INFO("COLUMN_FAMILY_ID", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("KEY", FN_REFLEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("MODE", 32, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO_END -}; +namespace RDB_LOCKS_FIELD { +enum { COLUMN_FAMILY_ID = 0, TRANSACTION_ID, KEY, MODE }; +} // namespace RDB_LOCKS_FIELD + +static ST_FIELD_INFO rdb_i_s_lock_info_fields_info[] = { + ROCKSDB_FIELD_INFO("COLUMN_FAMILY_ID", sizeof(uint32_t), MYSQL_TYPE_LONG, + 0), + ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("MODE", 32, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO_END}; /* Fill the information_schema.rocksdb_locks virtual table */ static int rdb_i_s_lock_info_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); DBUG_ASSERT(tables->table != nullptr); int ret = 0; - DBUG_ENTER("rdb_i_s_lock_info_fill_table"); - - rocksdb::TransactionDB* const rdb= rdb_get_rocksdb_db(); + rocksdb::TransactionDB *const rdb = rdb_get_rocksdb_db(); DBUG_ASSERT(rdb != nullptr); /* cf id -> rocksdb::KeyLockInfo */ std::unordered_multimap<uint32_t, rocksdb::KeyLockInfo> lock_info = - rdb->GetLockStatusData(); + rdb->GetLockStatusData(); - for (const auto& lock : lock_info) { + for (const auto &lock : lock_info) { const uint32_t cf_id = lock.first; - const auto& key_lock_info = lock.second; + const auto &key_lock_info = lock.second; const auto key_hexstr = rdb_hexdump(key_lock_info.key.c_str(), - key_lock_info.key.length(), FN_REFLEN); + key_lock_info.key.length(), FN_REFLEN); for (const auto &id : key_lock_info.ids) { - tables->table->field[RDB_LOCKS_FIELD::COLUMN_FAMILY_ID]->store( - cf_id, true); + tables->table->field[RDB_LOCKS_FIELD::COLUMN_FAMILY_ID]->store(cf_id, + true); tables->table->field[RDB_LOCKS_FIELD::TRANSACTION_ID]->store(id, true); tables->table->field[RDB_LOCKS_FIELD::KEY]->store( - key_hexstr.c_str(), key_hexstr.size(), - system_charset_info); + key_hexstr.c_str(), key_hexstr.size(), system_charset_info); tables->table->field[RDB_LOCKS_FIELD::MODE]->store( - key_lock_info.exclusive ? "X" : "S", - 1, system_charset_info); + key_lock_info.exclusive ? "X" : "S", 1, system_charset_info); /* Tell MySQL about this row in the virtual table */ - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); if (ret != 0) { break; } @@ -1245,17 +1262,17 @@ static int rdb_i_s_lock_info_fill_table( } /* Initialize the information_schema.rocksdb_lock_info virtual table */ -static int rdb_i_s_lock_info_init(void* const p) -{ - my_core::ST_SCHEMA_TABLE *schema; +static int rdb_i_s_lock_info_init(void *const p) { + DBUG_ENTER_FUNC(); - DBUG_ENTER("rdb_i_s_lock_info_init"); DBUG_ASSERT(p != nullptr); - schema= (my_core::ST_SCHEMA_TABLE*) p; + my_core::ST_SCHEMA_TABLE *schema; + + schema = (my_core::ST_SCHEMA_TABLE *)p; - schema->fields_info= rdb_i_s_lock_info_fields_info; - schema->fill_table= rdb_i_s_lock_info_fill_table; + schema->fields_info = rdb_i_s_lock_info_fields_info; + schema->fill_table = rdb_i_s_lock_info_fill_table; DBUG_RETURN(0); } @@ -1263,106 +1280,100 @@ static int rdb_i_s_lock_info_init(void* const p) /* Support for INFORMATION_SCHEMA.ROCKSDB_TRX dynamic table */ -namespace RDB_TRX_FIELD -{ - enum - { - TRANSACTION_ID= 0, - STATE, - NAME, - WRITE_COUNT, - LOCK_COUNT, - TIMEOUT_SEC, - WAITING_KEY, - WAITING_COLUMN_FAMILY_ID, - IS_REPLICATION, - SKIP_TRX_API, - READ_ONLY, - HAS_DEADLOCK_DETECTION, - NUM_ONGOING_BULKLOAD, - THREAD_ID, - QUERY - }; -} // namespace RDB_TRX_FIELD - -static ST_FIELD_INFO rdb_i_s_trx_info_fields_info[] = -{ - ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), - MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("STATE", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("NAME", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("WRITE_COUNT", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("LOCK_COUNT", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, 0), - ROCKSDB_FIELD_INFO("TIMEOUT_SEC", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO("WAITING_COLUMN_FAMILY_ID", sizeof(uint32_t), - MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("IS_REPLICATION", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("SKIP_TRX_API", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("READ_ONLY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("HAS_DEADLOCK_DETECTION", sizeof(uint32_t), - MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("NUM_ONGOING_BULKLOAD", sizeof(uint32_t), - MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("THREAD_ID", sizeof(ulong), MYSQL_TYPE_LONG, 0), - ROCKSDB_FIELD_INFO("QUERY", NAME_LEN+1, MYSQL_TYPE_STRING, 0), - ROCKSDB_FIELD_INFO_END +namespace RDB_TRX_FIELD { +enum { + TRANSACTION_ID = 0, + STATE, + NAME, + WRITE_COUNT, + LOCK_COUNT, + TIMEOUT_SEC, + WAITING_KEY, + WAITING_COLUMN_FAMILY_ID, + IS_REPLICATION, + SKIP_TRX_API, + READ_ONLY, + HAS_DEADLOCK_DETECTION, + NUM_ONGOING_BULKLOAD, + THREAD_ID, + QUERY }; +} // namespace RDB_TRX_FIELD + +static ST_FIELD_INFO rdb_i_s_trx_info_fields_info[] = { + ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO("STATE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("WRITE_COUNT", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO("LOCK_COUNT", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("TIMEOUT_SEC", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("WAITING_COLUMN_FAMILY_ID", sizeof(uint32_t), + MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("IS_REPLICATION", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("SKIP_TRX_API", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("READ_ONLY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("HAS_DEADLOCK_DETECTION", sizeof(uint32_t), + MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("NUM_ONGOING_BULKLOAD", sizeof(uint32_t), + MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("THREAD_ID", sizeof(ulong), MYSQL_TYPE_LONG, 0), + ROCKSDB_FIELD_INFO("QUERY", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO_END}; /* Fill the information_schema.rocksdb_trx virtual table */ static int rdb_i_s_trx_info_fill_table( - my_core::THD* const thd, - my_core::TABLE_LIST* const tables, - my_core::Item* const cond __attribute__((__unused__))) -{ + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(tables != nullptr); DBUG_ASSERT(tables->table != nullptr); int ret = 0; - DBUG_ENTER("rdb_i_s_trx_info_fill_table"); - const std::vector<Rdb_trx_info> &all_trx_info = rdb_get_all_trx_info(); for (const auto &info : all_trx_info) { - auto name_hexstr = rdb_hexdump(info.name.c_str(), info.name.length(), - NAME_LEN); + auto name_hexstr = + rdb_hexdump(info.name.c_str(), info.name.length(), NAME_LEN); auto key_hexstr = rdb_hexdump(info.waiting_key.c_str(), info.waiting_key.length(), FN_REFLEN); - tables->table->field[RDB_TRX_FIELD::TRANSACTION_ID]->store( - info.trx_id, true); + tables->table->field[RDB_TRX_FIELD::TRANSACTION_ID]->store(info.trx_id, + true); tables->table->field[RDB_TRX_FIELD::STATE]->store( - info.state.c_str(), info.state.length(), system_charset_info); + info.state.c_str(), info.state.length(), system_charset_info); tables->table->field[RDB_TRX_FIELD::NAME]->store( - name_hexstr.c_str(), name_hexstr.length(), system_charset_info); - tables->table->field[RDB_TRX_FIELD::WRITE_COUNT]->store( - info.write_count, true); - tables->table->field[RDB_TRX_FIELD::LOCK_COUNT]->store( - info.lock_count, true); - tables->table->field[RDB_TRX_FIELD::TIMEOUT_SEC]->store( - info.timeout_sec, false); + name_hexstr.c_str(), name_hexstr.length(), system_charset_info); + tables->table->field[RDB_TRX_FIELD::WRITE_COUNT]->store(info.write_count, + true); + tables->table->field[RDB_TRX_FIELD::LOCK_COUNT]->store(info.lock_count, + true); + tables->table->field[RDB_TRX_FIELD::TIMEOUT_SEC]->store(info.timeout_sec, + false); tables->table->field[RDB_TRX_FIELD::WAITING_KEY]->store( - key_hexstr.c_str(), key_hexstr.length(), system_charset_info); + key_hexstr.c_str(), key_hexstr.length(), system_charset_info); tables->table->field[RDB_TRX_FIELD::WAITING_COLUMN_FAMILY_ID]->store( - info.waiting_cf_id, true); + info.waiting_cf_id, true); tables->table->field[RDB_TRX_FIELD::IS_REPLICATION]->store( - info.is_replication, false); - tables->table->field[RDB_TRX_FIELD::SKIP_TRX_API]->store( - info.skip_trx_api, false); - tables->table->field[RDB_TRX_FIELD::READ_ONLY]->store( - info.read_only, false); + info.is_replication, false); + tables->table->field[RDB_TRX_FIELD::SKIP_TRX_API]->store(info.skip_trx_api, + false); + tables->table->field[RDB_TRX_FIELD::READ_ONLY]->store(info.read_only, + false); tables->table->field[RDB_TRX_FIELD::HAS_DEADLOCK_DETECTION]->store( - info.deadlock_detect, false); + info.deadlock_detect, false); tables->table->field[RDB_TRX_FIELD::NUM_ONGOING_BULKLOAD]->store( - info.num_ongoing_bulk_load, false); - tables->table->field[RDB_TRX_FIELD::THREAD_ID]->store( - info.thread_id, true); + info.num_ongoing_bulk_load, false); + tables->table->field[RDB_TRX_FIELD::THREAD_ID]->store(info.thread_id, true); tables->table->field[RDB_TRX_FIELD::QUERY]->store( - info.query_str.c_str(), info.query_str.length(), system_charset_info); + info.query_str.c_str(), info.query_str.length(), system_charset_info); /* Tell MySQL about this row in the virtual table */ - ret= my_core::schema_table_store_record(thd, tables->table); + ret = my_core::schema_table_store_record(thd, tables->table); if (ret != 0) { break; } @@ -1372,197 +1383,202 @@ static int rdb_i_s_trx_info_fill_table( } /* Initialize the information_schema.rocksdb_trx_info virtual table */ -static int rdb_i_s_trx_info_init(void* const p) -{ - my_core::ST_SCHEMA_TABLE *schema; +static int rdb_i_s_trx_info_init(void *const p) { + DBUG_ENTER_FUNC(); - DBUG_ENTER("rdb_i_s_trx_info_init"); DBUG_ASSERT(p != nullptr); - schema= (my_core::ST_SCHEMA_TABLE*) p; + my_core::ST_SCHEMA_TABLE *schema; + + schema = (my_core::ST_SCHEMA_TABLE *)p; - schema->fields_info= rdb_i_s_trx_info_fields_info; - schema->fill_table= rdb_i_s_trx_info_fill_table; + schema->fields_info = rdb_i_s_trx_info_fields_info; + schema->fill_table = rdb_i_s_trx_info_fill_table; DBUG_RETURN(0); } -static int rdb_i_s_deinit(void *p __attribute__((__unused__))) -{ - DBUG_ENTER("rdb_i_s_deinit"); +static int rdb_i_s_deinit(void *p MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); DBUG_RETURN(0); } -static struct st_mysql_information_schema rdb_i_s_info= -{ MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION }; +static struct st_mysql_information_schema rdb_i_s_info = { + MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION}; + +struct st_mysql_plugin rdb_i_s_cfstats = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_CFSTATS", + "Facebook", + "RocksDB column family stats", + PLUGIN_LICENSE_GPL, + rdb_i_s_cfstats_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ +}; -struct st_mysql_plugin rdb_i_s_cfstats= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_CFSTATS", - "Facebook", - "RocksDB column family stats", - PLUGIN_LICENSE_GPL, - rdb_i_s_cfstats_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_dbstats = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_DBSTATS", + "Facebook", + "RocksDB database stats", + PLUGIN_LICENSE_GPL, + rdb_i_s_dbstats_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_dbstats= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_DBSTATS", - "Facebook", - "RocksDB database stats", - PLUGIN_LICENSE_GPL, - rdb_i_s_dbstats_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_perf_context = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_PERF_CONTEXT", + "Facebook", + "RocksDB perf context stats", + PLUGIN_LICENSE_GPL, + rdb_i_s_perf_context_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_perf_context= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_PERF_CONTEXT", - "Facebook", - "RocksDB perf context stats", - PLUGIN_LICENSE_GPL, - rdb_i_s_perf_context_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_perf_context_global = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_PERF_CONTEXT_GLOBAL", + "Facebook", + "RocksDB perf context stats (all)", + PLUGIN_LICENSE_GPL, + rdb_i_s_perf_context_global_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_perf_context_global= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_PERF_CONTEXT_GLOBAL", - "Facebook", - "RocksDB perf context stats (all)", - PLUGIN_LICENSE_GPL, - rdb_i_s_perf_context_global_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_cfoptions = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_CF_OPTIONS", + "Facebook", + "RocksDB column family options", + PLUGIN_LICENSE_GPL, + rdb_i_s_cfoptions_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_cfoptions= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_CF_OPTIONS", - "Facebook", - "RocksDB column family options", - PLUGIN_LICENSE_GPL, - rdb_i_s_cfoptions_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_global_info = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_GLOBAL_INFO", + "Facebook", + "RocksDB global info", + PLUGIN_LICENSE_GPL, + rdb_i_s_global_info_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_global_info= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_GLOBAL_INFO", - "Facebook", - "RocksDB global info", - PLUGIN_LICENSE_GPL, - rdb_i_s_global_info_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_compact_stats = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_COMPACTION_STATS", + "Facebook", + "RocksDB compaction stats", + PLUGIN_LICENSE_GPL, + rdb_i_s_compact_stats_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_ddl= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_DDL", - "Facebook", - "RocksDB Data Dictionary", - PLUGIN_LICENSE_GPL, - rdb_i_s_ddl_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_ddl = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_DDL", + "Facebook", + "RocksDB Data Dictionary", + PLUGIN_LICENSE_GPL, + rdb_i_s_ddl_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_index_file_map= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_INDEX_FILE_MAP", - "Facebook", - "RocksDB index file map", - PLUGIN_LICENSE_GPL, - rdb_i_s_index_file_map_init, - rdb_i_s_deinit, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_index_file_map = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_INDEX_FILE_MAP", + "Facebook", + "RocksDB index file map", + PLUGIN_LICENSE_GPL, + rdb_i_s_index_file_map_init, + rdb_i_s_deinit, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_lock_info= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_LOCKS", - "Facebook", - "RocksDB lock information", - PLUGIN_LICENSE_GPL, - rdb_i_s_lock_info_init, - nullptr, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_lock_info = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_LOCKS", + "Facebook", + "RocksDB lock information", + PLUGIN_LICENSE_GPL, + rdb_i_s_lock_info_init, + nullptr, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -struct st_mysql_plugin rdb_i_s_trx_info= -{ - MYSQL_INFORMATION_SCHEMA_PLUGIN, - &rdb_i_s_info, - "ROCKSDB_TRX", - "Facebook", - "RocksDB transaction information", - PLUGIN_LICENSE_GPL, - rdb_i_s_trx_info_init, - nullptr, - 0x0001, /* version number (0.1) */ - nullptr, /* status variables */ - nullptr, /* system variables */ - nullptr, /* config options */ - 0, /* flags */ +struct st_mysql_plugin rdb_i_s_trx_info = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_TRX", + "Facebook", + "RocksDB transaction information", + PLUGIN_LICENSE_GPL, + rdb_i_s_trx_info_init, + nullptr, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h index 5384d237d3b..c684464c996 100644 --- a/storage/rocksdb/rdb_i_s.h +++ b/storage/rocksdb/rdb_i_s.h @@ -26,10 +26,10 @@ extern struct st_mysql_plugin rdb_i_s_dbstats; extern struct st_mysql_plugin rdb_i_s_perf_context; extern struct st_mysql_plugin rdb_i_s_perf_context_global; extern struct st_mysql_plugin rdb_i_s_cfoptions; +extern struct st_mysql_plugin rdb_i_s_compact_stats; extern struct st_mysql_plugin rdb_i_s_global_info; extern struct st_mysql_plugin rdb_i_s_ddl; extern struct st_mysql_plugin rdb_i_s_index_file_map; extern struct st_mysql_plugin rdb_i_s_lock_info; extern struct st_mysql_plugin rdb_i_s_trx_info; -} // namespace myrocks - +} // namespace myrocks diff --git a/storage/rocksdb/rdb_index_merge.cc b/storage/rocksdb/rdb_index_merge.cc index 8544bdf52fd..8f27616e4d9 100644 --- a/storage/rocksdb/rdb_index_merge.cc +++ b/storage/rocksdb/rdb_index_merge.cc @@ -17,47 +17,37 @@ /* This C++ file's header file */ #include "./rdb_index_merge.h" +/* MySQL header files */ +#include "../sql/sql_class.h" + /* MyRocks header files */ #include "./ha_rocksdb.h" #include "./rdb_datadic.h" namespace myrocks { -Rdb_index_merge::Rdb_index_merge(const char* const tmpfile_path, +Rdb_index_merge::Rdb_index_merge(const char *const tmpfile_path, const ulonglong &merge_buf_size, const ulonglong &merge_combine_read_size, - const rocksdb::Comparator* const comparator) : - m_tmpfile_path(tmpfile_path), - m_merge_buf_size(merge_buf_size), - m_merge_combine_read_size(merge_combine_read_size), - m_comparator(comparator), - m_rec_buf_unsorted(nullptr), - m_output_buf(nullptr) -{ -} + const rocksdb::Comparator *const comparator) + : m_tmpfile_path(tmpfile_path), m_merge_buf_size(merge_buf_size), + m_merge_combine_read_size(merge_combine_read_size), + m_comparator(comparator), m_rec_buf_unsorted(nullptr), + m_output_buf(nullptr) {} -Rdb_index_merge::~Rdb_index_merge() -{ +Rdb_index_merge::~Rdb_index_merge() { /* Close tmp file, we don't need to worry about deletion, mysql handles it. */ my_close(m_merge_file.fd, MYF(MY_WME)); - - /* There should be no records left in the offset tree */ - DBUG_ASSERT(m_offset_tree.empty()); - - /* There should be no pointers left on the merge heap */ - DBUG_ASSERT(m_merge_min_heap.empty()); } -int Rdb_index_merge::init() -{ +int Rdb_index_merge::init() { /* Create a temporary merge file on disk to store sorted chunks during inplace index creation. */ - if (merge_file_create()) - { + if (merge_file_create()) { return HA_ERR_INTERNAL_ERROR; } @@ -66,46 +56,41 @@ int Rdb_index_merge::init() to disk. They will be written to disk sorted. A sorted tree is used to keep track of the offset of each record within the unsorted buffer. */ - m_rec_buf_unsorted= std::shared_ptr<merge_buf_info>( - new merge_buf_info(m_merge_buf_size)); + m_rec_buf_unsorted = + std::shared_ptr<merge_buf_info>(new merge_buf_info(m_merge_buf_size)); /* Allocate output buffer that will contain sorted block that is written to disk. */ - m_output_buf= std::shared_ptr<merge_buf_info>( - new merge_buf_info(m_merge_buf_size)); + m_output_buf = + std::shared_ptr<merge_buf_info>(new merge_buf_info(m_merge_buf_size)); - return 0; + return HA_EXIT_SUCCESS; } /** Create a merge file in the given location. */ -int Rdb_index_merge::merge_file_create() -{ +int Rdb_index_merge::merge_file_create() { DBUG_ASSERT(m_merge_file.fd == -1); int fd; /* If no path set for tmpfile, use mysql_tmpdir by default */ - if (m_tmpfile_path == nullptr) - { + if (m_tmpfile_path == nullptr) { fd = mysql_tmpfile("myrocks"); - } - else - { + } else { fd = mysql_tmpfile_path(m_tmpfile_path, "myrocks"); } - if (fd < 0) - { + if (fd < 0) { return HA_ERR_INTERNAL_ERROR; } m_merge_file.fd = fd; m_merge_file.num_sort_buffers = 0; - return 0; + return HA_EXIT_SUCCESS; } /** @@ -115,9 +100,7 @@ int Rdb_index_merge::merge_file_create() If buffer in memory is full, write the buffer out to disk sorted using the offset tree, and clear the tree. (Happens in merge_buf_write) */ -int Rdb_index_merge::add(const rocksdb::Slice& key, - const rocksdb::Slice& val) -{ +int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) { /* Adding a record after heap is already created results in error */ DBUG_ASSERT(m_merge_min_heap.empty()); @@ -125,33 +108,30 @@ int Rdb_index_merge::add(const rocksdb::Slice& key, Check if sort buffer is going to be out of space, if so write it out to disk in sorted order using offset tree. */ - const uint total_offset= - RDB_MERGE_CHUNK_LEN + m_rec_buf_unsorted->curr_offset + - RDB_MERGE_KEY_DELIMITER + RDB_MERGE_VAL_DELIMITER + - key.size() + val.size(); - if (total_offset >= m_rec_buf_unsorted->total_size) - { + const uint total_offset = RDB_MERGE_CHUNK_LEN + + m_rec_buf_unsorted->curr_offset + + RDB_MERGE_KEY_DELIMITER + RDB_MERGE_VAL_DELIMITER + + key.size() + val.size(); + if (total_offset >= m_rec_buf_unsorted->total_size) { /* If the offset tree is empty here, that means that the proposed key to add is too large for the buffer. */ - if (m_offset_tree.empty()) - { + if (m_offset_tree.empty()) { // NO_LINT_DEBUG sql_print_error("Sort buffer size is too small to process merge. " "Please set merge buffer size to a higher value."); return HA_ERR_INTERNAL_ERROR; } - if (merge_buf_write()) - { + if (merge_buf_write()) { // NO_LINT_DEBUG sql_print_error("Error writing sort buffer to disk."); return HA_ERR_INTERNAL_ERROR; } } - const ulonglong rec_offset= m_rec_buf_unsorted->curr_offset; + const ulonglong rec_offset = m_rec_buf_unsorted->curr_offset; /* Store key and value in temporary unsorted in memory buffer pointed to by @@ -163,14 +143,13 @@ int Rdb_index_merge::add(const rocksdb::Slice& key, m_offset_tree.emplace(m_rec_buf_unsorted->block.get() + rec_offset, m_comparator); - return 0; + return HA_EXIT_SUCCESS; } /** Sort + write merge buffer chunk out to disk. */ -int Rdb_index_merge::merge_buf_write() -{ +int Rdb_index_merge::merge_buf_write() { DBUG_ASSERT(m_merge_file.fd != -1); DBUG_ASSERT(m_rec_buf_unsorted != nullptr); DBUG_ASSERT(m_output_buf != nullptr); @@ -185,8 +164,7 @@ int Rdb_index_merge::merge_buf_write() Iterate through the offset tree. Should be ordered by the secondary key at this point. */ - for (const auto& rec : m_offset_tree) - { + for (const auto &rec : m_offset_tree) { DBUG_ASSERT(m_output_buf->curr_offset <= m_merge_buf_size); /* Read record from offset (should never fail) */ @@ -207,8 +185,7 @@ int Rdb_index_merge::merge_buf_write() then write into the respective merge buffer. */ if (my_seek(m_merge_file.fd, m_merge_file.num_sort_buffers * m_merge_buf_size, - SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) - { + SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) { // NO_LINT_DEBUG sql_print_error("Error seeking to location in merge file on disk."); return HA_ERR_INTERNAL_ERROR; @@ -220,9 +197,8 @@ int Rdb_index_merge::merge_buf_write() burst. */ if (my_write(m_merge_file.fd, m_output_buf->block.get(), - m_output_buf->total_size, MYF(MY_WME | MY_NABP)) || - mysql_file_sync(m_merge_file.fd, MYF(MY_WME))) - { + m_output_buf->total_size, MYF(MY_WME | MY_NABP)) || + mysql_file_sync(m_merge_file.fd, MYF(MY_WME))) { // NO_LINT_DEBUG sql_print_error("Error writing sorted merge buffer to disk."); return HA_ERR_INTERNAL_ERROR; @@ -234,23 +210,21 @@ int Rdb_index_merge::merge_buf_write() /* Reset everything for next run */ merge_reset(); - return 0; + return HA_EXIT_SUCCESS; } /** Prepare n-way merge of n sorted buffers on disk, using a heap sorted by secondary key records. */ -int Rdb_index_merge::merge_heap_prepare() -{ +int Rdb_index_merge::merge_heap_prepare() { DBUG_ASSERT(m_merge_min_heap.empty()); /* If the offset tree is not empty, there are still some records that need to be written to disk. Write them out now. */ - if (!m_offset_tree.empty() && merge_buf_write()) - { + if (!m_offset_tree.empty() && merge_buf_write()) { return HA_ERR_INTERNAL_ERROR; } @@ -260,39 +234,34 @@ int Rdb_index_merge::merge_heap_prepare() For an n-way merge, we need to read chunks of each merge file simultaneously. */ - ulonglong chunk_size= m_merge_combine_read_size/ - m_merge_file.num_sort_buffers; - if (chunk_size >= m_merge_buf_size) - { - chunk_size= m_merge_buf_size; + ulonglong chunk_size = + m_merge_combine_read_size / m_merge_file.num_sort_buffers; + if (chunk_size >= m_merge_buf_size) { + chunk_size = m_merge_buf_size; } /* Allocate buffers for each chunk */ - for (ulonglong i = 0; i < m_merge_file.num_sort_buffers; i++) - { - const auto entry= std::make_shared<merge_heap_entry>(m_comparator); + for (ulonglong i = 0; i < m_merge_file.num_sort_buffers; i++) { + const auto entry = std::make_shared<merge_heap_entry>(m_comparator); /* Read chunk_size bytes from each chunk on disk, and place inside respective chunk buffer. */ - const size_t total_size= - entry->prepare(m_merge_file.fd, i * m_merge_buf_size, chunk_size); + const size_t total_size = + entry->prepare(m_merge_file.fd, i * m_merge_buf_size, chunk_size); - if (total_size == (size_t) - 1) - { + if (total_size == (size_t)-1) { return HA_ERR_INTERNAL_ERROR; } /* Can reach this condition if an index was added on table w/ no rows */ - if (total_size - RDB_MERGE_CHUNK_LEN == 0) - { + if (total_size - RDB_MERGE_CHUNK_LEN == 0) { break; } /* Read the first record from each buffer to initially populate the heap */ - if (entry->read_rec(&entry->key, &entry->val)) - { + if (entry->read_rec(&entry->key, &entry->val)) { // NO_LINT_DEBUG sql_print_error("Chunk size is too small to process merge."); return HA_ERR_INTERNAL_ERROR; @@ -301,14 +270,14 @@ int Rdb_index_merge::merge_heap_prepare() m_merge_min_heap.push(std::move(entry)); } - return 0; + return HA_EXIT_SUCCESS; } /** Create and/or iterate through keys in the merge heap. */ -int Rdb_index_merge::next(rocksdb::Slice* const key, rocksdb::Slice* const val) -{ +int Rdb_index_merge::next(rocksdb::Slice *const key, + rocksdb::Slice *const val) { /* If table fits in one sort buffer, we can optimize by writing the sort buffer directly through to the sstfilewriter instead of @@ -317,20 +286,18 @@ int Rdb_index_merge::next(rocksdb::Slice* const key, rocksdb::Slice* const val) If there are no sort buffer records (alters on empty tables), also exit here. */ - if (m_merge_file.num_sort_buffers == 0) - { - if (m_offset_tree.empty()) - { + if (m_merge_file.num_sort_buffers == 0) { + if (m_offset_tree.empty()) { return -1; } - const auto rec= m_offset_tree.begin(); + const auto rec = m_offset_tree.begin(); /* Read record from offset */ merge_read_rec(rec->block, key, val); m_offset_tree.erase(rec); - return 0; + return HA_EXIT_SUCCESS; } int res; @@ -340,10 +307,8 @@ int Rdb_index_merge::next(rocksdb::Slice* const key, rocksdb::Slice* const val) of the external sort. Populate the heap with initial values from each disk chunk. */ - if (m_merge_min_heap.empty()) - { - if ((res= merge_heap_prepare())) - { + if (m_merge_min_heap.empty()) { + if ((res = merge_heap_prepare())) { // NO_LINT_DEBUG sql_print_error("Error during preparation of heap."); return res; @@ -354,7 +319,7 @@ int Rdb_index_merge::next(rocksdb::Slice* const key, rocksdb::Slice* const val) inside the SST file yet. */ merge_heap_top(key, val); - return 0; + return HA_EXIT_SUCCESS; } DBUG_ASSERT(!m_merge_min_heap.empty()); @@ -364,14 +329,13 @@ int Rdb_index_merge::next(rocksdb::Slice* const key, rocksdb::Slice* const val) /** Get current top record from the heap. */ -void Rdb_index_merge::merge_heap_top(rocksdb::Slice* const key, - rocksdb::Slice* const val) -{ +void Rdb_index_merge::merge_heap_top(rocksdb::Slice *const key, + rocksdb::Slice *const val) { DBUG_ASSERT(!m_merge_min_heap.empty()); - const std::shared_ptr<merge_heap_entry>& entry= m_merge_min_heap.top(); - *key= entry->key; - *val= entry->val; + const std::shared_ptr<merge_heap_entry> &entry = m_merge_min_heap.top(); + *key = entry->key; + *val = entry->val; } /** @@ -380,14 +344,13 @@ void Rdb_index_merge::merge_heap_top(rocksdb::Slice* const key, Returns -1 when there are no more records in the heap. */ -int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice* const key, - rocksdb::Slice* const val) -{ +int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key, + rocksdb::Slice *const val) { /* Make a new reference to shared ptr so it doesn't get destroyed during pop(). We are going to push this entry back onto the heap. */ - const std::shared_ptr<merge_heap_entry> entry= m_merge_min_heap.top(); + const std::shared_ptr<merge_heap_entry> entry = m_merge_min_heap.top(); m_merge_min_heap.pop(); /* @@ -397,15 +360,13 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice* const key, Return without adding entry back onto heap. If heap is also empty, we must be finished with merge. */ - if (entry->chunk_info->is_chunk_finished()) - { - if (m_merge_min_heap.empty()) - { + if (entry->chunk_info->is_chunk_finished()) { + if (m_merge_min_heap.empty()) { return -1; } merge_heap_top(key, val); - return 0; + return HA_EXIT_SUCCESS; } /* @@ -417,16 +378,13 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice* const key, If merge_read_rec fails, it means the either the chunk was cut off or we've reached the end of the respective chunk. */ - if (entry->read_rec(&entry->key, &entry->val)) - { - if (entry->read_next_chunk_from_disk(m_merge_file.fd)) - { + if (entry->read_rec(&entry->key, &entry->val)) { + if (entry->read_next_chunk_from_disk(m_merge_file.fd)) { return HA_ERR_INTERNAL_ERROR; } /* Try reading record again, should never fail. */ - if (entry->read_rec(&entry->key, &entry->val)) - { + if (entry->read_rec(&entry->key, &entry->val)) { return HA_ERR_INTERNAL_ERROR; } } @@ -436,52 +394,46 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice* const key, /* Return the current top record on heap */ merge_heap_top(key, val); - return 0; + return HA_EXIT_SUCCESS; } -int Rdb_index_merge::merge_heap_entry::read_next_chunk_from_disk(File fd) -{ - if (chunk_info->read_next_chunk_from_disk(fd)) - { - return 1; +int Rdb_index_merge::merge_heap_entry::read_next_chunk_from_disk(File fd) { + if (chunk_info->read_next_chunk_from_disk(fd)) { + return HA_EXIT_FAILURE; } - block= chunk_info->block.get(); - return 0; + block = chunk_info->block.get(); + return HA_EXIT_SUCCESS; } -int Rdb_index_merge::merge_buf_info::read_next_chunk_from_disk(File fd) -{ +int Rdb_index_merge::merge_buf_info::read_next_chunk_from_disk(File fd) { disk_curr_offset += curr_offset; - if (my_seek(fd, disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) - { + if (my_seek(fd, disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) { // NO_LINT_DEBUG sql_print_error("Error seeking to location in merge file on disk."); - return 1; + return HA_EXIT_FAILURE; } /* Overwrite the old block */ - const size_t bytes_read= my_read(fd, block.get(), block_len, MYF(MY_WME)); - if (bytes_read == (size_t) -1) - { + const size_t bytes_read = my_read(fd, block.get(), block_len, MYF(MY_WME)); + if (bytes_read == (size_t)-1) { // NO_LINT_DEBUG sql_print_error("Error reading merge file from disk."); - return 1; + return HA_EXIT_FAILURE; } - curr_offset= 0; - return 0; + curr_offset = 0; + return HA_EXIT_SUCCESS; } /** Get records from offset within sort buffer and compare them. Sort by least to greatest. */ -int Rdb_index_merge::merge_record_compare(const uchar* const a_block, - const uchar* const b_block, - const rocksdb::Comparator* const comparator) -{ +int Rdb_index_merge::merge_record_compare( + const uchar *const a_block, const uchar *const b_block, + const rocksdb::Comparator *const comparator) { return comparator->Compare(as_slice(a_block), as_slice(b_block)); } @@ -489,114 +441,103 @@ int Rdb_index_merge::merge_record_compare(const uchar* const a_block, Given an offset in a merge sort buffer, read out the keys + values. After this, block will point to the next record in the buffer. **/ -void Rdb_index_merge::merge_read_rec(const uchar* const block, - rocksdb::Slice* const key, - rocksdb::Slice* const val) -{ +void Rdb_index_merge::merge_read_rec(const uchar *const block, + rocksdb::Slice *const key, + rocksdb::Slice *const val) { /* Read key at block offset into key slice and the value into value slice*/ read_slice(key, block); read_slice(val, block + RDB_MERGE_REC_DELIMITER + key->size()); } -void Rdb_index_merge::read_slice(rocksdb::Slice* slice, const uchar* block_ptr) -{ +void Rdb_index_merge::read_slice(rocksdb::Slice *slice, + const uchar *block_ptr) { uint64 slice_len; merge_read_uint64(&block_ptr, &slice_len); - *slice= rocksdb::Slice(reinterpret_cast<const char*>(block_ptr), slice_len); + *slice = rocksdb::Slice(reinterpret_cast<const char *>(block_ptr), slice_len); } -int Rdb_index_merge::merge_heap_entry::read_rec(rocksdb::Slice* const key, - rocksdb::Slice* const val) -{ - const uchar* block_ptr= block; +int Rdb_index_merge::merge_heap_entry::read_rec(rocksdb::Slice *const key, + rocksdb::Slice *const val) { + const uchar *block_ptr = block; const auto orig_offset = chunk_info->curr_offset; const auto orig_block = block; /* Read key at block offset into key slice and the value into value slice*/ - if (read_slice(key, &block_ptr) != 0) - { - return 1; + if (read_slice(key, &block_ptr) != 0) { + return HA_EXIT_FAILURE; } - chunk_info->curr_offset += (uintptr_t) block_ptr - (uintptr_t) block; - block += (uintptr_t) block_ptr - (uintptr_t) block; + chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block; + block += (uintptr_t)block_ptr - (uintptr_t)block; - if (read_slice(val, &block_ptr) != 0) - { - chunk_info->curr_offset= orig_offset; - block= orig_block; - return 1; + if (read_slice(val, &block_ptr) != 0) { + chunk_info->curr_offset = orig_offset; + block = orig_block; + return HA_EXIT_FAILURE; } - chunk_info->curr_offset += (uintptr_t) block_ptr - (uintptr_t) block; - block += (uintptr_t) block_ptr - (uintptr_t) block; + chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block; + block += (uintptr_t)block_ptr - (uintptr_t)block; - return 0; + return HA_EXIT_SUCCESS; } -int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice* const slice, - const uchar** block_ptr) -{ - if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) - { - return 1; +int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice, + const uchar **block_ptr) { + if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) { + return HA_EXIT_FAILURE; } uint64 slice_len; merge_read_uint64(block_ptr, &slice_len); - if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) - { - return 1; + if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) { + return HA_EXIT_FAILURE; } - *slice= rocksdb::Slice(reinterpret_cast<const char*>(*block_ptr), slice_len); + *slice = + rocksdb::Slice(reinterpret_cast<const char *>(*block_ptr), slice_len); *block_ptr += slice_len; - return 0; + return HA_EXIT_SUCCESS; } size_t Rdb_index_merge::merge_heap_entry::prepare(File fd, ulonglong f_offset, - ulonglong chunk_size) -{ - chunk_info= std::make_shared<merge_buf_info>(chunk_size); + ulonglong chunk_size) { + chunk_info = std::make_shared<merge_buf_info>(chunk_size); const size_t res = chunk_info->prepare(fd, f_offset); - if (res != (size_t) - 1) - { - block= chunk_info->block.get() + RDB_MERGE_CHUNK_LEN; + if (res != (size_t)-1) { + block = chunk_info->block.get() + RDB_MERGE_CHUNK_LEN; } return res; } -size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) -{ - disk_start_offset= f_offset; - disk_curr_offset= f_offset; +size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) { + disk_start_offset = f_offset; + disk_curr_offset = f_offset; /* Need to position cursor to the chunk it needs to be at on filesystem then read 'chunk_size' bytes into the respective chunk buffer. */ - if (my_seek(fd, f_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) - { + if (my_seek(fd, f_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) { // NO_LINT_DEBUG sql_print_error("Error seeking to location in merge file on disk."); - return (size_t) - 1; + return (size_t)-1; } - const size_t bytes_read= my_read(fd, block.get(), total_size, MYF(MY_WME)); - if (bytes_read == (size_t) - 1) - { + const size_t bytes_read = my_read(fd, block.get(), total_size, MYF(MY_WME)); + if (bytes_read == (size_t)-1) { // NO_LINT_DEBUG sql_print_error("Error reading merge file from disk."); - return (size_t) - 1; + return (size_t)-1; } /* Read the first 8 bytes of each chunk, this gives us the actual size of each chunk. */ - const uchar *block_ptr= block.get(); + const uchar *block_ptr = block.get(); merge_read_uint64(&block_ptr, &total_size); curr_offset += RDB_MERGE_CHUNK_LEN; return total_size; @@ -604,27 +545,23 @@ size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) /* Store key and value w/ their respective delimiters at the given offset */ void Rdb_index_merge::merge_buf_info::store_key_value( - const rocksdb::Slice& key, const rocksdb::Slice& val) -{ + const rocksdb::Slice &key, const rocksdb::Slice &val) { store_slice(key); store_slice(val); } -void Rdb_index_merge::merge_buf_info::store_slice(const rocksdb::Slice& slice) -{ +void Rdb_index_merge::merge_buf_info::store_slice(const rocksdb::Slice &slice) { /* Store length delimiter */ merge_store_uint64(&block[curr_offset], slice.size()); /* Store slice data */ memcpy(&block[curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(), - slice.size()); + slice.size()); curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER; } - -void Rdb_index_merge::merge_reset() -{ +void Rdb_index_merge::merge_reset() { /* Either error, or all values in the sort buffer have been written to disk, so we need to clear the offset tree. @@ -632,16 +569,14 @@ void Rdb_index_merge::merge_reset() m_offset_tree.clear(); /* Reset sort buffer block */ - if (m_rec_buf_unsorted && m_rec_buf_unsorted->block) - { - m_rec_buf_unsorted->curr_offset= 0; + if (m_rec_buf_unsorted && m_rec_buf_unsorted->block) { + m_rec_buf_unsorted->curr_offset = 0; } /* Reset output buf */ - if (m_output_buf && m_output_buf->block) - { - m_output_buf->curr_offset= 0; + if (m_output_buf && m_output_buf->block) { + m_output_buf->curr_offset = 0; } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_index_merge.h b/storage/rocksdb/rdb_index_merge.h index 86893bf316c..9d1469fc34e 100644 --- a/storage/rocksdb/rdb_index_merge.h +++ b/storage/rocksdb/rdb_index_merge.h @@ -18,13 +18,13 @@ /* MySQL header files */ #include "../sql/log.h" -#include "./handler.h" /* handler */ -#include "./my_global.h" /* ulonglong */ +#include "./handler.h" /* handler */ +#include "./my_global.h" /* ulonglong */ /* C++ standard header files */ +#include <queue> #include <set> #include <vector> -#include <queue> /* RocksDB header files */ #include "rocksdb/db.h" @@ -46,13 +46,13 @@ class Rdb_key_def; class Rdb_tbl_def; class Rdb_index_merge { - Rdb_index_merge(const Rdb_index_merge& p)= delete; - Rdb_index_merge& operator=(const Rdb_index_merge& p)= delete; + Rdb_index_merge(const Rdb_index_merge &p) = delete; + Rdb_index_merge &operator=(const Rdb_index_merge &p) = delete; - public: +public: /* Information about temporary files used in external merge sort */ struct merge_file_info { - File fd= -1; /* file descriptor */ + File fd = -1; /* file descriptor */ ulong num_sort_buffers; /* number of sort buffers in temp file */ }; @@ -60,40 +60,37 @@ class Rdb_index_merge { struct merge_buf_info { /* heap memory allocated for main memory sort/merge */ std::unique_ptr<uchar[]> block; - const ulonglong block_len; /* amount of data bytes allocated for block above */ + const ulonglong + block_len; /* amount of data bytes allocated for block above */ ulonglong curr_offset; /* offset of the record pointer for the block */ ulonglong disk_start_offset; /* where the chunk starts on disk */ - ulonglong disk_curr_offset; /* current offset on disk */ - ulonglong total_size; /* total # of data bytes in chunk */ + ulonglong disk_curr_offset; /* current offset on disk */ + ulonglong total_size; /* total # of data bytes in chunk */ - void store_key_value(const rocksdb::Slice& key, const rocksdb::Slice& val) - __attribute__((__nonnull__)); + void store_key_value(const rocksdb::Slice &key, const rocksdb::Slice &val) + MY_ATTRIBUTE((__nonnull__)); - void store_slice(const rocksdb::Slice& slice) - __attribute__((__nonnull__)); + void store_slice(const rocksdb::Slice &slice) MY_ATTRIBUTE((__nonnull__)); - size_t prepare(File fd, ulonglong f_offset) - __attribute__((__nonnull__)); + size_t prepare(File fd, ulonglong f_offset) MY_ATTRIBUTE((__nonnull__)); int read_next_chunk_from_disk(File fd) - __attribute__((__nonnull__, __warn_unused_result__)); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - inline bool is_chunk_finished() const - { + inline bool is_chunk_finished() const { return curr_offset + disk_curr_offset - disk_start_offset == total_size; } - inline bool has_space(uint64 needed) const - { + inline bool has_space(uint64 needed) const { return curr_offset + needed <= block_len; } - explicit merge_buf_info(const ulonglong merge_block_size) : - block(nullptr), block_len(merge_block_size), curr_offset(0), - disk_start_offset(0), disk_curr_offset(0), total_size(merge_block_size) - { + explicit merge_buf_info(const ulonglong merge_block_size) + : block(nullptr), block_len(merge_block_size), curr_offset(0), + disk_start_offset(0), disk_curr_offset(0), + total_size(merge_block_size) { /* Will throw an exception if it runs out of memory here */ - block= std::unique_ptr<uchar[]>(new uchar[merge_block_size]); + block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]); /* Initialize entire buffer to 0 to avoid valgrind errors */ memset(block.get(), 0, merge_block_size); @@ -101,132 +98,121 @@ class Rdb_index_merge { }; /* Represents an entry in the heap during merge phase of external sort */ - struct merge_heap_entry - { + struct merge_heap_entry { std::shared_ptr<merge_buf_info> chunk_info; /* pointer to buffer info */ - uchar* block; /* pointer to heap memory where record is stored */ - const rocksdb::Comparator* const comparator; + uchar *block; /* pointer to heap memory where record is stored */ + const rocksdb::Comparator *const comparator; rocksdb::Slice key; /* current key pointed to by block ptr */ rocksdb::Slice val; size_t prepare(File fd, ulonglong f_offset, ulonglong chunk_size) - __attribute__((__nonnull__)); + MY_ATTRIBUTE((__nonnull__)); int read_next_chunk_from_disk(File fd) - __attribute__((__nonnull__, __warn_unused_result__)); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int read_rec(rocksdb::Slice* const key, rocksdb::Slice* const val) - __attribute__((__nonnull__, __warn_unused_result__)); + int read_rec(rocksdb::Slice *const key, rocksdb::Slice *const val) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int read_slice(rocksdb::Slice* const slice, const uchar** block_ptr) - __attribute__((__nonnull__, __warn_unused_result__)); + int read_slice(rocksdb::Slice *const slice, const uchar **block_ptr) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - explicit merge_heap_entry(const rocksdb::Comparator* const comparator) : - chunk_info(nullptr), block(nullptr), comparator(comparator) {} + explicit merge_heap_entry(const rocksdb::Comparator *const comparator) + : chunk_info(nullptr), block(nullptr), comparator(comparator) {} }; - struct merge_heap_comparator - { - bool operator() (const std::shared_ptr<merge_heap_entry>& lhs, - const std::shared_ptr<merge_heap_entry>& rhs) - { + struct merge_heap_comparator { + bool operator()(const std::shared_ptr<merge_heap_entry> &lhs, + const std::shared_ptr<merge_heap_entry> &rhs) { return lhs->comparator->Compare(rhs->key, lhs->key) < 0; } }; /* Represents a record in unsorted buffer */ - struct merge_record - { - uchar* block; /* points to offset of key in sort buffer */ - const rocksdb::Comparator* const comparator; + struct merge_record { + uchar *block; /* points to offset of key in sort buffer */ + const rocksdb::Comparator *const comparator; - bool operator< (const merge_record &record) const - { + bool operator<(const merge_record &record) const { return merge_record_compare(this->block, record.block, comparator) < 0; } - merge_record(uchar* const block, - const rocksdb::Comparator* const comparator) : - block(block), comparator(comparator) {} + merge_record(uchar *const block, + const rocksdb::Comparator *const comparator) + : block(block), comparator(comparator) {} }; - private: - const char* m_tmpfile_path; - const ulonglong m_merge_buf_size; - const ulonglong m_merge_combine_read_size; - const rocksdb::Comparator* m_comparator; - struct merge_file_info m_merge_file; - std::shared_ptr<merge_buf_info> m_rec_buf_unsorted; - std::shared_ptr<merge_buf_info> m_output_buf; - std::set<merge_record> m_offset_tree; +private: + const char *m_tmpfile_path; + const ulonglong m_merge_buf_size; + const ulonglong m_merge_combine_read_size; + const rocksdb::Comparator *m_comparator; + struct merge_file_info m_merge_file; + std::shared_ptr<merge_buf_info> m_rec_buf_unsorted; + std::shared_ptr<merge_buf_info> m_output_buf; + std::set<merge_record> m_offset_tree; std::priority_queue<std::shared_ptr<merge_heap_entry>, std::vector<std::shared_ptr<merge_heap_entry>>, - merge_heap_comparator> m_merge_min_heap; + merge_heap_comparator> + m_merge_min_heap; - static inline void merge_store_uint64(uchar* const dst, uint64 n) - { + static inline void merge_store_uint64(uchar *const dst, uint64 n) { memcpy(dst, &n, sizeof(n)); } - static inline void merge_read_uint64(const uchar **buf_ptr, uint64* const dst) - { + static inline void merge_read_uint64(const uchar **buf_ptr, + uint64 *const dst) { DBUG_ASSERT(buf_ptr != nullptr); memcpy(dst, *buf_ptr, sizeof(uint64)); *buf_ptr += sizeof(uint64); } - static inline rocksdb::Slice as_slice(const uchar* block) - { + static inline rocksdb::Slice as_slice(const uchar *block) { uint64 len; merge_read_uint64(&block, &len); - return rocksdb::Slice(reinterpret_cast<const char*>(block), len); + return rocksdb::Slice(reinterpret_cast<const char *>(block), len); } - static int merge_record_compare(const uchar* a_block, const uchar* b_block, - const rocksdb::Comparator* const comparator) - __attribute__((__nonnull__, __warn_unused_result__)); + static int merge_record_compare(const uchar *a_block, const uchar *b_block, + const rocksdb::Comparator *const comparator) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - void merge_read_rec(const uchar* const block, rocksdb::Slice* const key, - rocksdb::Slice* const val) - __attribute__((__nonnull__)); + void merge_read_rec(const uchar *const block, rocksdb::Slice *const key, + rocksdb::Slice *const val) MY_ATTRIBUTE((__nonnull__)); - void read_slice(rocksdb::Slice* slice, const uchar* block_ptr) - __attribute__((__nonnull__)); + void read_slice(rocksdb::Slice *slice, const uchar *block_ptr) + MY_ATTRIBUTE((__nonnull__)); - public: - Rdb_index_merge(const char* const tmpfile_path, +public: + Rdb_index_merge(const char *const tmpfile_path, const ulonglong &merge_buf_size, const ulonglong &merge_combine_read_size, - const rocksdb::Comparator* const comparator); + const rocksdb::Comparator *const comparator); ~Rdb_index_merge(); - int init() - __attribute__((__nonnull__, __warn_unused_result__)); + int init() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int merge_file_create() - __attribute__((__nonnull__, __warn_unused_result__)); + int merge_file_create() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int add(const rocksdb::Slice& key, const rocksdb::Slice& val) - __attribute__((__nonnull__, __warn_unused_result__)); + int add(const rocksdb::Slice &key, const rocksdb::Slice &val) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int merge_buf_write() - __attribute__((__nonnull__, __warn_unused_result__)); + int merge_buf_write() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int next(rocksdb::Slice* const key, rocksdb::Slice* const val) - __attribute__((__nonnull__, __warn_unused_result__)); + int next(rocksdb::Slice *const key, rocksdb::Slice *const val) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int merge_heap_prepare() - __attribute__((__nonnull__, __warn_unused_result__)); + int merge_heap_prepare() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - void merge_heap_top(rocksdb::Slice* key, rocksdb::Slice* val) - __attribute__((__nonnull__)); + void merge_heap_top(rocksdb::Slice *key, rocksdb::Slice *val) + MY_ATTRIBUTE((__nonnull__)); - int merge_heap_pop_and_get_next(rocksdb::Slice* const key, - rocksdb::Slice* const val) - __attribute__((__nonnull__, __warn_unused_result__)); + int merge_heap_pop_and_get_next(rocksdb::Slice *const key, + rocksdb::Slice *const val) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void merge_reset(); }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_mutex_wrapper.cc b/storage/rocksdb/rdb_mutex_wrapper.cc index 5b1c9ba4c22..4a50af0c13d 100644 --- a/storage/rocksdb/rdb_mutex_wrapper.cc +++ b/storage/rocksdb/rdb_mutex_wrapper.cc @@ -17,43 +17,35 @@ /* This C++ file's header file */ #include "./rdb_mutex_wrapper.h" +/* MySQL header files */ +#include "../sql/sql_class.h" +#include "../sql/replication.h" + /* MyRocks header files */ #include "./ha_rocksdb.h" #include "./rdb_utils.h" -// Internal MySQL APIs not exposed in any header. -extern "C" -{ -void thd_enter_cond(MYSQL_THD thd, mysql_cond_t *cond, mysql_mutex_t *mutex, - const PSI_stage_info *stage, PSI_stage_info *old_stage); -void thd_exit_cond(MYSQL_THD thd, const PSI_stage_info *stage); -} - using namespace rocksdb; namespace myrocks { -static -PSI_stage_info stage_waiting_on_row_lock2= { 0, "Waiting for row lock", 0}; +static PSI_stage_info stage_waiting_on_row_lock2 = {0, "Waiting for row lock", + 0}; -static const int64_t MICROSECS= 1000*1000; +static const int64_t ONE_SECOND_IN_MICROSECS = 1000 * 1000; // A timeout as long as one full non-leap year worth of microseconds is as // good as infinite timeout. -static const int64_t BIG_TIMEOUT= MICROSECS * 60 * 60 * 24 * 365; +static const int64_t ONE_YEAR_IN_MICROSECS = + ONE_SECOND_IN_MICROSECS * 60 * 60 * 24 * 365; -Rdb_cond_var::Rdb_cond_var() { - mysql_cond_init(0, &m_cond, nullptr); -} +Rdb_cond_var::Rdb_cond_var() { mysql_cond_init(0, &m_cond, nullptr); } -Rdb_cond_var::~Rdb_cond_var() { - mysql_cond_destroy(&m_cond); -} +Rdb_cond_var::~Rdb_cond_var() { mysql_cond_destroy(&m_cond); } Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) { - return WaitFor(mutex_arg, BIG_TIMEOUT); + return WaitFor(mutex_arg, ONE_YEAR_IN_MICROSECS); } - /* @brief Wait on condition variable. The caller must make sure that we own @@ -70,32 +62,30 @@ Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) { Status Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, - int64_t timeout_micros) -{ - auto *mutex_obj= reinterpret_cast<Rdb_mutex*>(mutex_arg.get()); + int64_t timeout_micros) { + auto *mutex_obj = reinterpret_cast<Rdb_mutex *>(mutex_arg.get()); DBUG_ASSERT(mutex_obj != nullptr); - mysql_mutex_t * const mutex_ptr= &mutex_obj->m_mutex; + mysql_mutex_t *const mutex_ptr = &mutex_obj->m_mutex; - int res= 0; + int res = 0; struct timespec wait_timeout; if (timeout_micros < 0) - timeout_micros= BIG_TIMEOUT; - set_timespec_nsec(wait_timeout, timeout_micros*1000); + timeout_micros = ONE_YEAR_IN_MICROSECS; + set_timespec_nsec(wait_timeout, timeout_micros * 1000); #ifndef STANDALONE_UNITTEST PSI_stage_info old_stage; mysql_mutex_assert_owner(mutex_ptr); - if (current_thd && mutex_obj->m_old_stage_info.count(current_thd) == 0) - { - my_core::thd_enter_cond(current_thd, &m_cond, mutex_ptr, - &stage_waiting_on_row_lock2, &old_stage); + if (current_thd && mutex_obj->m_old_stage_info.count(current_thd) == 0) { + THD_ENTER_COND(current_thd, &m_cond, mutex_ptr, &stage_waiting_on_row_lock2, + &old_stage); /* After the mysql_cond_timedwait we need make this call - my_core::thd_exit_cond(thd, &old_stage); + THD_EXIT_COND(thd, &old_stage); to inform the SQL layer that KILLable wait has ended. However, that will cause mutex to be released. Defer the release until the mutex @@ -105,15 +95,14 @@ Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, } #endif - bool killed= false; + bool killed = false; - do - { - res= mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout); + do { + res = mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout); #ifndef STANDALONE_UNITTEST if (current_thd) - killed= my_core::thd_killed(current_thd); + killed = my_core::thd_killed(current_thd); #endif } while (!killed && res == EINTR); @@ -123,7 +112,6 @@ Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, return Status::OK(); } - /* @note @@ -154,32 +142,21 @@ Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, None of this looks like a problem for our use case. */ -void Rdb_cond_var::Notify() -{ - mysql_cond_signal(&m_cond); -} - +void Rdb_cond_var::Notify() { mysql_cond_signal(&m_cond); } /* @note This is called without holding the mutex that's used for waiting on the condition. See ::Notify(). */ -void Rdb_cond_var::NotifyAll() -{ - mysql_cond_broadcast(&m_cond); -} +void Rdb_cond_var::NotifyAll() { mysql_cond_broadcast(&m_cond); } - -Rdb_mutex::Rdb_mutex() -{ +Rdb_mutex::Rdb_mutex() { mysql_mutex_init(0 /* Don't register in P_S. */, &m_mutex, MY_MUTEX_INIT_FAST); } -Rdb_mutex::~Rdb_mutex() { - mysql_mutex_destroy(&m_mutex); -} +Rdb_mutex::~Rdb_mutex() { mysql_mutex_destroy(&m_mutex); } Status Rdb_mutex::Lock() { mysql_mutex_lock(&m_mutex); @@ -192,8 +169,7 @@ Status Rdb_mutex::Lock() { // If implementing a custom version of this class, the implementation may // choose to ignore the timeout. // Return OK on success, or other Status on failure. -Status Rdb_mutex::TryLockFor(int64_t timeout_time __attribute__((__unused__))) -{ +Status Rdb_mutex::TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) { /* Note: PThreads API has pthread_mutex_timedlock(), but mysql's mysql_mutex_* wrappers do not wrap that function. @@ -202,10 +178,8 @@ Status Rdb_mutex::TryLockFor(int64_t timeout_time __attribute__((__unused__))) return Status::OK(); } - #ifndef STANDALONE_UNITTEST -void Rdb_mutex::set_unlock_action(const PSI_stage_info* const old_stage_arg) -{ +void Rdb_mutex::set_unlock_action(const PSI_stage_info *const old_stage_arg) { DBUG_ASSERT(old_stage_arg != nullptr); mysql_mutex_assert_owner(&m_mutex); @@ -219,17 +193,16 @@ void Rdb_mutex::set_unlock_action(const PSI_stage_info* const old_stage_arg) // Unlock Mutex that was successfully locked by Lock() or TryLockUntil() void Rdb_mutex::UnLock() { #ifndef STANDALONE_UNITTEST - if (m_old_stage_info.count(current_thd) > 0) - { + if (m_old_stage_info.count(current_thd) > 0) { const std::shared_ptr<PSI_stage_info> old_stage = - m_old_stage_info[current_thd]; + m_old_stage_info[current_thd]; m_old_stage_info.erase(current_thd); /* The following will call mysql_mutex_unlock */ - my_core::thd_exit_cond(current_thd, old_stage.get()); + THD_EXIT_COND(current_thd, old_stage.get()); return; } #endif mysql_mutex_unlock(&m_mutex); } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_mutex_wrapper.h b/storage/rocksdb/rdb_mutex_wrapper.h index 6edd78a1167..96ebd77a723 100644 --- a/storage/rocksdb/rdb_mutex_wrapper.h +++ b/storage/rocksdb/rdb_mutex_wrapper.h @@ -32,10 +32,11 @@ namespace myrocks { -class Rdb_mutex: public rocksdb::TransactionDBMutex { - Rdb_mutex(const Rdb_mutex& p) = delete; - Rdb_mutex& operator = (const Rdb_mutex& p)=delete; - public: +class Rdb_mutex : public rocksdb::TransactionDBMutex { + Rdb_mutex(const Rdb_mutex &p) = delete; + Rdb_mutex &operator=(const Rdb_mutex &p) = delete; + +public: Rdb_mutex(); virtual ~Rdb_mutex(); @@ -53,27 +54,27 @@ class Rdb_mutex: public rocksdb::TransactionDBMutex { // TimedOut if timed out, // or other Status on failure. // If returned status is OK, TransactionDB will eventually call UnLock(). - virtual rocksdb::Status TryLockFor( - int64_t timeout_time __attribute__((__unused__))) override; + virtual rocksdb::Status + TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) override; // Unlock Mutex that was successfully locked by Lock() or TryLockUntil() virtual void UnLock() override; - private: +private: mysql_mutex_t m_mutex; friend class Rdb_cond_var; #ifndef STANDALONE_UNITTEST - void set_unlock_action(const PSI_stage_info* const old_stage_arg); - std::unordered_map<THD*, std::shared_ptr<PSI_stage_info>> m_old_stage_info; + void set_unlock_action(const PSI_stage_info *const old_stage_arg); + std::unordered_map<THD *, std::shared_ptr<PSI_stage_info>> m_old_stage_info; #endif }; +class Rdb_cond_var : public rocksdb::TransactionDBCondVar { + Rdb_cond_var(const Rdb_cond_var &) = delete; + Rdb_cond_var &operator=(const Rdb_cond_var &) = delete; -class Rdb_cond_var: public rocksdb::TransactionDBCondVar { - Rdb_cond_var(const Rdb_cond_var&) = delete; - Rdb_cond_var& operator=(const Rdb_cond_var&) = delete; - public: +public: Rdb_cond_var(); virtual ~Rdb_cond_var(); @@ -112,33 +113,30 @@ class Rdb_cond_var: public rocksdb::TransactionDBCondVar { // Unblocks all threads waiting on *this. virtual void NotifyAll() override; - private: +private: mysql_cond_t m_cond; }; - class Rdb_mutex_factory : public rocksdb::TransactionDBMutexFactory { - public: - Rdb_mutex_factory(const Rdb_mutex_factory&) = delete; - Rdb_mutex_factory& operator=(const Rdb_mutex_factory&) = delete; +public: + Rdb_mutex_factory(const Rdb_mutex_factory &) = delete; + Rdb_mutex_factory &operator=(const Rdb_mutex_factory &) = delete; Rdb_mutex_factory() {} - /* - Override parent class's virtual methods of interrest. - */ + /* + Override parent class's virtual methods of interrest. + */ virtual std::shared_ptr<rocksdb::TransactionDBMutex> AllocateMutex() override { - return - std::make_shared<Rdb_mutex>(); + return std::make_shared<Rdb_mutex>(); } virtual std::shared_ptr<rocksdb::TransactionDBCondVar> AllocateCondVar() override { - return - std::make_shared<Rdb_cond_var>(); + return std::make_shared<Rdb_cond_var>(); } virtual ~Rdb_mutex_factory() {} }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc index 88d84061789..5ebc6cdab5c 100644 --- a/storage/rocksdb/rdb_perf_context.cc +++ b/storage/rocksdb/rdb_perf_context.cc @@ -35,71 +35,68 @@ namespace myrocks { // 2. Update sections (A), (B), and (C) below // 3. Update perf_context.test and show_engine.test -std::string rdb_pc_stat_types[]= -{ - // (A) These should be in the same order as the PC enum - "USER_KEY_COMPARISON_COUNT", - "BLOCK_CACHE_HIT_COUNT", - "BLOCK_READ_COUNT", - "BLOCK_READ_BYTE", - "BLOCK_READ_TIME", - "BLOCK_CHECKSUM_TIME", - "BLOCK_DECOMPRESS_TIME", - "INTERNAL_KEY_SKIPPED_COUNT", - "INTERNAL_DELETE_SKIPPED_COUNT", - "GET_SNAPSHOT_TIME", - "GET_FROM_MEMTABLE_TIME", - "GET_FROM_MEMTABLE_COUNT", - "GET_POST_PROCESS_TIME", - "GET_FROM_OUTPUT_FILES_TIME", - "SEEK_ON_MEMTABLE_TIME", - "SEEK_ON_MEMTABLE_COUNT", - "SEEK_CHILD_SEEK_TIME", - "SEEK_CHILD_SEEK_COUNT", - "SEEK_IN_HEAP_TIME", - "SEEK_INTERNAL_SEEK_TIME", - "FIND_NEXT_USER_ENTRY_TIME", - "WRITE_WAL_TIME", - "WRITE_MEMTABLE_TIME", - "WRITE_DELAY_TIME", - "WRITE_PRE_AND_POST_PROCESS_TIME", - "DB_MUTEX_LOCK_NANOS", - "DB_CONDITION_WAIT_NANOS", - "MERGE_OPERATOR_TIME_NANOS", - "READ_INDEX_BLOCK_NANOS", - "READ_FILTER_BLOCK_NANOS", - "NEW_TABLE_BLOCK_ITER_NANOS", - "NEW_TABLE_ITERATOR_NANOS", - "BLOCK_SEEK_NANOS", - "FIND_TABLE_NANOS", - "IO_THREAD_POOL_ID", - "IO_BYTES_WRITTEN", - "IO_BYTES_READ", - "IO_OPEN_NANOS", - "IO_ALLOCATE_NANOS", - "IO_WRITE_NANOS", - "IO_READ_NANOS", - "IO_RANGE_SYNC_NANOS", - "IO_LOGGER_NANOS" -}; - -#define IO_PERF_RECORD(_field_) \ - do { \ - if (rocksdb::perf_context._field_ > 0) \ - counters->m_value[idx] += rocksdb::perf_context._field_; \ - idx++; \ +std::string rdb_pc_stat_types[] = { + // (A) These should be in the same order as the PC enum + "USER_KEY_COMPARISON_COUNT", + "BLOCK_CACHE_HIT_COUNT", + "BLOCK_READ_COUNT", + "BLOCK_READ_BYTE", + "BLOCK_READ_TIME", + "BLOCK_CHECKSUM_TIME", + "BLOCK_DECOMPRESS_TIME", + "INTERNAL_KEY_SKIPPED_COUNT", + "INTERNAL_DELETE_SKIPPED_COUNT", + "GET_SNAPSHOT_TIME", + "GET_FROM_MEMTABLE_TIME", + "GET_FROM_MEMTABLE_COUNT", + "GET_POST_PROCESS_TIME", + "GET_FROM_OUTPUT_FILES_TIME", + "SEEK_ON_MEMTABLE_TIME", + "SEEK_ON_MEMTABLE_COUNT", + "SEEK_CHILD_SEEK_TIME", + "SEEK_CHILD_SEEK_COUNT", + "SEEK_IN_HEAP_TIME", + "SEEK_INTERNAL_SEEK_TIME", + "FIND_NEXT_USER_ENTRY_TIME", + "WRITE_WAL_TIME", + "WRITE_MEMTABLE_TIME", + "WRITE_DELAY_TIME", + "WRITE_PRE_AND_POST_PROCESS_TIME", + "DB_MUTEX_LOCK_NANOS", + "DB_CONDITION_WAIT_NANOS", + "MERGE_OPERATOR_TIME_NANOS", + "READ_INDEX_BLOCK_NANOS", + "READ_FILTER_BLOCK_NANOS", + "NEW_TABLE_BLOCK_ITER_NANOS", + "NEW_TABLE_ITERATOR_NANOS", + "BLOCK_SEEK_NANOS", + "FIND_TABLE_NANOS", + "IO_THREAD_POOL_ID", + "IO_BYTES_WRITTEN", + "IO_BYTES_READ", + "IO_OPEN_NANOS", + "IO_ALLOCATE_NANOS", + "IO_WRITE_NANOS", + "IO_READ_NANOS", + "IO_RANGE_SYNC_NANOS", + "IO_LOGGER_NANOS"}; + +#define IO_PERF_RECORD(_field_) \ + do { \ + if (rocksdb::perf_context._field_ > 0) \ + counters->m_value[idx] += rocksdb::perf_context._field_; \ + idx++; \ } while (0) -#define IO_STAT_RECORD(_field_) \ - do { \ - if (rocksdb::iostats_context._field_ > 0) \ - counters->m_value[idx] += rocksdb::iostats_context._field_; \ - idx++; \ +#define IO_STAT_RECORD(_field_) \ + do { \ + if (rocksdb::iostats_context._field_ > 0) \ + counters->m_value[idx] += rocksdb::iostats_context._field_; \ + idx++; \ } while (0) -static void harvest_diffs(Rdb_atomic_perf_counters * const counters) -{ +static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { // (C) These should be in the same order as the PC enum - size_t idx= 0; + size_t idx = 0; IO_PERF_RECORD(user_key_comparison_count); IO_PERF_RECORD(block_cache_hit_count); IO_PERF_RECORD(block_read_count); @@ -148,35 +145,29 @@ static void harvest_diffs(Rdb_atomic_perf_counters * const counters) #undef IO_PERF_DIFF #undef IO_STAT_DIFF - static Rdb_atomic_perf_counters rdb_global_perf_counters; -void rdb_get_global_perf_counters(Rdb_perf_counters* const counters) -{ +void rdb_get_global_perf_counters(Rdb_perf_counters *const counters) { DBUG_ASSERT(counters != nullptr); counters->load(rdb_global_perf_counters); } -void Rdb_perf_counters::load(const Rdb_atomic_perf_counters &atomic_counters) -{ - for (int i= 0; i < PC_MAX_IDX; i++) { - m_value[i]= atomic_counters.m_value[i].load(std::memory_order_relaxed); +void Rdb_perf_counters::load(const Rdb_atomic_perf_counters &atomic_counters) { + for (int i = 0; i < PC_MAX_IDX; i++) { + m_value[i] = atomic_counters.m_value[i].load(std::memory_order_relaxed); } } -bool Rdb_io_perf::start(const uint32_t perf_context_level) -{ - const rocksdb::PerfLevel perf_level= - static_cast<rocksdb::PerfLevel>(perf_context_level); +bool Rdb_io_perf::start(const uint32_t perf_context_level) { + const rocksdb::PerfLevel perf_level = + static_cast<rocksdb::PerfLevel>(perf_context_level); - if (rocksdb::GetPerfLevel() != perf_level) - { + if (rocksdb::GetPerfLevel() != perf_level) { rocksdb::SetPerfLevel(perf_level); } - if (perf_level == rocksdb::kDisable) - { + if (perf_level == rocksdb::kDisable) { return false; } @@ -185,38 +176,33 @@ bool Rdb_io_perf::start(const uint32_t perf_context_level) return true; } -void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) -{ - const rocksdb::PerfLevel perf_level= - static_cast<rocksdb::PerfLevel>(perf_context_level); +void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) { + const rocksdb::PerfLevel perf_level = + static_cast<rocksdb::PerfLevel>(perf_context_level); - if (perf_level == rocksdb::kDisable) - { + if (perf_level == rocksdb::kDisable) { return; } - if (m_atomic_counters) - { + if (m_atomic_counters) { harvest_diffs(m_atomic_counters); } harvest_diffs(&rdb_global_perf_counters); - if (m_shared_io_perf_read && - (rocksdb::perf_context.block_read_byte != 0 || - rocksdb::perf_context.block_read_count != 0 || - rocksdb::perf_context.block_read_time != 0)) - { + if (m_shared_io_perf_read && (rocksdb::perf_context.block_read_byte != 0 || + rocksdb::perf_context.block_read_count != 0 || + rocksdb::perf_context.block_read_time != 0)) { my_io_perf_t io_perf_read; io_perf_read.init(); - io_perf_read.bytes= rocksdb::perf_context.block_read_byte; - io_perf_read.requests= rocksdb::perf_context.block_read_count; + io_perf_read.bytes = rocksdb::perf_context.block_read_byte; + io_perf_read.requests = rocksdb::perf_context.block_read_count; /* Rocksdb does not distinguish between I/O service and wait time, so just use svc time. */ - io_perf_read.svc_time_max= io_perf_read.svc_time= + io_perf_read.svc_time_max = io_perf_read.svc_time = rocksdb::perf_context.block_read_time; m_shared_io_perf_read->sum(io_perf_read); @@ -224,17 +210,15 @@ void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) } if (m_stats) { - if (rocksdb::perf_context.internal_key_skipped_count != 0) - { + if (rocksdb::perf_context.internal_key_skipped_count != 0) { m_stats->key_skipped += rocksdb::perf_context.internal_key_skipped_count; } - if (rocksdb::perf_context.internal_delete_skipped_count != 0) - { + if (rocksdb::perf_context.internal_delete_skipped_count != 0) { m_stats->delete_skipped += rocksdb::perf_context.internal_delete_skipped_count; } } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h index e6439c2e613..87c6426342e 100644 --- a/storage/rocksdb/rdb_perf_context.h +++ b/storage/rocksdb/rdb_perf_context.h @@ -80,8 +80,7 @@ class Rdb_perf_counters; A collection of performance counters that can be safely incremented by multiple threads since it stores atomic datapoints. */ -struct Rdb_atomic_perf_counters -{ +struct Rdb_atomic_perf_counters { std::atomic_ullong m_value[PC_MAX_IDX]; }; @@ -89,11 +88,11 @@ struct Rdb_atomic_perf_counters A collection of performance counters that is meant to be incremented by a single thread. */ -class Rdb_perf_counters -{ - Rdb_perf_counters(const Rdb_perf_counters&) = delete; - Rdb_perf_counters& operator=(const Rdb_perf_counters&) = delete; - public: +class Rdb_perf_counters { + Rdb_perf_counters(const Rdb_perf_counters &) = delete; + Rdb_perf_counters &operator=(const Rdb_perf_counters &) = delete; + +public: Rdb_perf_counters() = default; uint64_t m_value[PC_MAX_IDX]; @@ -105,36 +104,34 @@ extern std::string rdb_pc_stat_types[PC_MAX_IDX]; /* Perf timers for data reads */ -class Rdb_io_perf -{ +class Rdb_io_perf { // Context management - Rdb_atomic_perf_counters *m_atomic_counters= nullptr; - my_io_perf_atomic_t *m_shared_io_perf_read= nullptr; - ha_statistics *m_stats= nullptr; - - public: - Rdb_io_perf(const Rdb_io_perf&) = delete; - Rdb_io_perf& operator=(const Rdb_io_perf&) = delete; - - void init(Rdb_atomic_perf_counters* const atomic_counters, - my_io_perf_atomic_t* const shared_io_perf_read, - ha_statistics* const stats) - { + Rdb_atomic_perf_counters *m_atomic_counters = nullptr; + my_io_perf_atomic_t *m_shared_io_perf_read = nullptr; + ha_statistics *m_stats = nullptr; + +public: + Rdb_io_perf(const Rdb_io_perf &) = delete; + Rdb_io_perf &operator=(const Rdb_io_perf &) = delete; + + void init(Rdb_atomic_perf_counters *const atomic_counters, + my_io_perf_atomic_t *const shared_io_perf_read, + ha_statistics *const stats) { DBUG_ASSERT(atomic_counters != nullptr); DBUG_ASSERT(shared_io_perf_read != nullptr); DBUG_ASSERT(stats != nullptr); - m_atomic_counters= atomic_counters; - m_shared_io_perf_read= shared_io_perf_read; - m_stats= stats; + m_atomic_counters = atomic_counters; + m_shared_io_perf_read = shared_io_perf_read; + m_stats = stats; } bool start(const uint32_t perf_context_level); void end_and_record(const uint32_t perf_context_level); - explicit Rdb_io_perf() : m_atomic_counters(nullptr), - m_shared_io_perf_read(nullptr), - m_stats(nullptr) {} + explicit Rdb_io_perf() + : m_atomic_counters(nullptr), m_shared_io_perf_read(nullptr), + m_stats(nullptr) {} }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc index ce457cc73a7..cfbefb2ce6d 100644 --- a/storage/rocksdb/rdb_sst_info.cc +++ b/storage/rocksdb/rdb_sst_info.cc @@ -37,26 +37,20 @@ namespace myrocks { -Rdb_sst_file::Rdb_sst_file(rocksdb::DB* const db, - rocksdb::ColumnFamilyHandle* const cf, - const rocksdb::DBOptions& db_options, - const std::string& name, const bool tracing) : - m_db(db), - m_cf(cf), - m_db_options(db_options), - m_sst_file_writer(nullptr), - m_name(name), - m_tracing(tracing) -{ +Rdb_sst_file::Rdb_sst_file(rocksdb::DB *const db, + rocksdb::ColumnFamilyHandle *const cf, + const rocksdb::DBOptions &db_options, + const std::string &name, const bool tracing) + : m_db(db), m_cf(cf), m_db_options(db_options), m_sst_file_writer(nullptr), + m_name(name), m_tracing(tracing) { DBUG_ASSERT(db != nullptr); DBUG_ASSERT(cf != nullptr); } -Rdb_sst_file::~Rdb_sst_file() -{ +Rdb_sst_file::~Rdb_sst_file() { // Make sure we clean up delete m_sst_file_writer; - m_sst_file_writer= nullptr; + m_sst_file_writer = nullptr; // In case something went wrong attempt to delete the temporary file. // If everything went fine that file will have been renamed and this @@ -64,98 +58,86 @@ Rdb_sst_file::~Rdb_sst_file() std::remove(m_name.c_str()); } -rocksdb::Status Rdb_sst_file::open() -{ +rocksdb::Status Rdb_sst_file::open() { DBUG_ASSERT(m_sst_file_writer == nullptr); rocksdb::ColumnFamilyDescriptor cf_descr; - rocksdb::Status s= m_cf->GetDescriptor(&cf_descr); - if (!s.ok()) - { + rocksdb::Status s = m_cf->GetDescriptor(&cf_descr); + if (!s.ok()) { return s; } // Create an sst file writer with the current options and comparator - const rocksdb::Comparator* comparator= m_cf->GetComparator(); + const rocksdb::Comparator *comparator = m_cf->GetComparator(); const rocksdb::EnvOptions env_options(m_db_options); const rocksdb::Options options(m_db_options, cf_descr.options); - m_sst_file_writer= + m_sst_file_writer = new rocksdb::SstFileWriter(env_options, options, comparator, m_cf); - s= m_sst_file_writer->Open(m_name); - if (m_tracing) - { + s = m_sst_file_writer->Open(m_name); + if (m_tracing) { // NO_LINT_DEBUG sql_print_information("SST Tracing: Open(%s) returned %s", m_name.c_str(), s.ok() ? "ok" : "not ok"); } - if (!s.ok()) - { + if (!s.ok()) { delete m_sst_file_writer; - m_sst_file_writer= nullptr; + m_sst_file_writer = nullptr; } return s; } -rocksdb::Status Rdb_sst_file::put(const rocksdb::Slice& key, - const rocksdb::Slice& value) -{ +rocksdb::Status Rdb_sst_file::put(const rocksdb::Slice &key, + const rocksdb::Slice &value) { DBUG_ASSERT(m_sst_file_writer != nullptr); // Add the specified key/value to the sst file writer return m_sst_file_writer->Add(key, value); } -std::string Rdb_sst_file::generateKey(const std::string& key) -{ - static char const hexdigit[]= { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' - }; +std::string Rdb_sst_file::generateKey(const std::string &key) { + static char const hexdigit[] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; std::string res; res.reserve(key.size() * 2); - for (auto ch : key) - { - res += hexdigit[((uint8_t) ch) >> 4]; - res += hexdigit[((uint8_t) ch) & 0x0F]; + for (auto ch : key) { + res += hexdigit[((uint8_t)ch) >> 4]; + res += hexdigit[((uint8_t)ch) & 0x0F]; } return res; } // This function is run by the background thread -rocksdb::Status Rdb_sst_file::commit() -{ +rocksdb::Status Rdb_sst_file::commit() { DBUG_ASSERT(m_sst_file_writer != nullptr); rocksdb::Status s; - rocksdb::ExternalSstFileInfo fileinfo; ///Finish may should be modified + rocksdb::ExternalSstFileInfo fileinfo; /// Finish may should be modified // Close out the sst file - s= m_sst_file_writer->Finish(&fileinfo); - if (m_tracing) - { + s = m_sst_file_writer->Finish(&fileinfo); + if (m_tracing) { // NO_LINT_DEBUG sql_print_information("SST Tracing: Finish returned %s", s.ok() ? "ok" : "not ok"); } - if (s.ok()) - { - if (m_tracing) - { + if (s.ok()) { + if (m_tracing) { // NO_LINT_DEBUG sql_print_information("SST Tracing: Adding file %s, smallest key: %s, " "largest key: %s, file size: %" PRIu64 ", " - "num_entries: %" PRIu64, fileinfo.file_path.c_str(), + "num_entries: %" PRIu64, + fileinfo.file_path.c_str(), generateKey(fileinfo.smallest_key).c_str(), generateKey(fileinfo.largest_key).c_str(), fileinfo.file_size, fileinfo.num_entries); @@ -169,10 +151,9 @@ rocksdb::Status Rdb_sst_file::commit() opts.snapshot_consistency = false; opts.allow_global_seqno = false; opts.allow_blocking_flush = false; - s= m_db->IngestExternalFile(m_cf, { m_name }, opts); + s = m_db->IngestExternalFile(m_cf, {m_name}, opts); - if (m_tracing) - { + if (m_tracing) { // NO_LINT_DEBUG sql_print_information("SST Tracing: AddFile(%s) returned %s", fileinfo.file_path.c_str(), @@ -181,106 +162,84 @@ rocksdb::Status Rdb_sst_file::commit() } delete m_sst_file_writer; - m_sst_file_writer= nullptr; + m_sst_file_writer = nullptr; return s; } -Rdb_sst_info::Rdb_sst_info(rocksdb::DB* const db, const std::string& tablename, - const std::string& indexname, - rocksdb::ColumnFamilyHandle* const cf, - const rocksdb::DBOptions& db_options, - const bool& tracing) : - m_db(db), - m_cf(cf), - m_db_options(db_options), - m_curr_size(0), - m_sst_count(0), - m_error_msg(""), +Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, + const std::string &indexname, + rocksdb::ColumnFamilyHandle *const cf, + const rocksdb::DBOptions &db_options, + const bool &tracing) + : m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0), + m_sst_count(0), m_error_msg(""), #if defined(RDB_SST_INFO_USE_THREAD) - m_queue(), - m_mutex(), - m_cond(), - m_thread(nullptr), - m_finished(false), + m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false), #endif - m_sst_file(nullptr), - m_tracing(tracing) -{ - m_prefix= db->GetName() + "/"; + m_sst_file(nullptr), m_tracing(tracing) { + m_prefix = db->GetName() + "/"; std::string normalized_table; - if (rdb_normalize_tablename(tablename.c_str(), &normalized_table)) - { + if (rdb_normalize_tablename(tablename.c_str(), &normalized_table)) { // We failed to get a normalized table name. This should never happen, // but handle it anyway. - m_prefix += "fallback_" + - std::to_string( - reinterpret_cast<intptr_t>(reinterpret_cast<void*>(this))) + "_" + - indexname + "_"; - } - else - { + m_prefix += "fallback_" + std::to_string(reinterpret_cast<intptr_t>( + reinterpret_cast<void *>(this))) + + "_" + indexname + "_"; + } else { m_prefix += normalized_table + "_" + indexname + "_"; } rocksdb::ColumnFamilyDescriptor cf_descr; - const rocksdb::Status s= m_cf->GetDescriptor(&cf_descr); - if (!s.ok()) - { + const rocksdb::Status s = m_cf->GetDescriptor(&cf_descr); + if (!s.ok()) { // Default size if we can't get the cf's target size - m_max_size= 64*1024*1024; - } - else - { + m_max_size = 64 * 1024 * 1024; + } else { // Set the maximum size to 3 times the cf's target size - m_max_size= cf_descr.options.target_file_size_base * 3; + m_max_size = cf_descr.options.target_file_size_base * 3; } } -Rdb_sst_info::~Rdb_sst_info() -{ +Rdb_sst_info::~Rdb_sst_info() { DBUG_ASSERT(m_sst_file == nullptr); #if defined(RDB_SST_INFO_USE_THREAD) DBUG_ASSERT(m_thread == nullptr); #endif } -int Rdb_sst_info::open_new_sst_file() -{ +int Rdb_sst_info::open_new_sst_file() { DBUG_ASSERT(m_sst_file == nullptr); // Create the new sst file's name - const std::string name= m_prefix + std::to_string(m_sst_count++) + m_suffix; + const std::string name = m_prefix + std::to_string(m_sst_count++) + m_suffix; // Create the new sst file object - m_sst_file= new Rdb_sst_file(m_db, m_cf, m_db_options, name, m_tracing); + m_sst_file = new Rdb_sst_file(m_db, m_cf, m_db_options, name, m_tracing); // Open the sst file - const rocksdb::Status s= m_sst_file->open(); - if (!s.ok()) - { + const rocksdb::Status s = m_sst_file->open(); + if (!s.ok()) { set_error_msg(s.ToString()); delete m_sst_file; - m_sst_file= nullptr; - return 1; + m_sst_file = nullptr; + return HA_EXIT_FAILURE; } - m_curr_size= 0; + m_curr_size = 0; - return 0; + return HA_EXIT_SUCCESS; } -void Rdb_sst_info::close_curr_sst_file() -{ +void Rdb_sst_info::close_curr_sst_file() { DBUG_ASSERT(m_sst_file != nullptr); DBUG_ASSERT(m_curr_size > 0); #if defined(RDB_SST_INFO_USE_THREAD) - if (m_thread == nullptr) - { + if (m_thread == nullptr) { // We haven't already started a background thread, so start one - m_thread= new std::thread(thread_fcn, this); + m_thread = new std::thread(thread_fcn, this); } DBUG_ASSERT(m_thread != nullptr); @@ -294,9 +253,8 @@ void Rdb_sst_info::close_curr_sst_file() // Notify the background thread that there is a new entry in the queue m_cond.notify_one(); #else - const rocksdb::Status s= m_sst_file->commit(); - if (!s.ok()) - { + const rocksdb::Status s = m_sst_file->commit(); + if (!s.ok()) { set_error_msg(s.ToString()); } @@ -304,34 +262,28 @@ void Rdb_sst_info::close_curr_sst_file() #endif // Reset for next sst file - m_sst_file= nullptr; - m_curr_size= 0; + m_sst_file = nullptr; + m_curr_size = 0; } -int Rdb_sst_info::put(const rocksdb::Slice& key, - const rocksdb::Slice& value) -{ +int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) { int rc; - if (m_curr_size >= m_max_size) - { + if (m_curr_size >= m_max_size) { // The current sst file has reached its maximum, close it out close_curr_sst_file(); // While we are here, check to see if we have had any errors from the // background thread - we don't want to wait for the end to report them - if (!m_error_msg.empty()) - { - return 1; + if (!m_error_msg.empty()) { + return HA_EXIT_FAILURE; } } - if (m_curr_size == 0) - { + if (m_curr_size == 0) { // We don't have an sst file open - open one - rc= open_new_sst_file(); - if (rc != 0) - { + rc = open_new_sst_file(); + if (rc != 0) { return rc; } } @@ -339,51 +291,45 @@ int Rdb_sst_info::put(const rocksdb::Slice& key, DBUG_ASSERT(m_sst_file != nullptr); // Add the key/value to the current sst file - const rocksdb::Status s= m_sst_file->put(key, value); - if (!s.ok()) - { + const rocksdb::Status s = m_sst_file->put(key, value); + if (!s.ok()) { set_error_msg(s.ToString()); - return 1; + return HA_EXIT_FAILURE; } m_curr_size += key.size() + value.size(); - return 0; + return HA_EXIT_SUCCESS; } -int Rdb_sst_info::commit() -{ - if (m_curr_size > 0) - { +int Rdb_sst_info::commit() { + if (m_curr_size > 0) { // Close out any existing files close_curr_sst_file(); } #if defined(RDB_SST_INFO_USE_THREAD) - if (m_thread != nullptr) - { + if (m_thread != nullptr) { // Tell the background thread we are done - m_finished= true; + m_finished = true; m_cond.notify_one(); // Wait for the background thread to finish m_thread->join(); delete m_thread; - m_thread= nullptr; + m_thread = nullptr; } #endif // Did we get any errors? - if (!m_error_msg.empty()) - { - return 1; + if (!m_error_msg.empty()) { + return HA_EXIT_FAILURE; } - return 0; + return HA_EXIT_SUCCESS; } -void Rdb_sst_info::set_error_msg(const std::string& msg) -{ +void Rdb_sst_info::set_error_msg(const std::string &msg) { #if defined(RDB_SST_INFO_USE_THREAD) // Both the foreground and background threads can set the error message // so lock the mutex to protect it. We only want the first error that @@ -391,41 +337,35 @@ void Rdb_sst_info::set_error_msg(const std::string& msg) const std::lock_guard<std::mutex> guard(m_mutex); #endif my_printf_error(ER_UNKNOWN_ERROR, "bulk load error: %s", MYF(0), msg.c_str()); - if (m_error_msg.empty()) - { - m_error_msg= msg; + if (m_error_msg.empty()) { + m_error_msg = msg; } } #if defined(RDB_SST_INFO_USE_THREAD) // Static thread function - the Rdb_sst_info object is in 'object' -void Rdb_sst_info::thread_fcn(void* object) -{ - reinterpret_cast<Rdb_sst_info*>(object)->run_thread(); +void Rdb_sst_info::thread_fcn(void *object) { + reinterpret_cast<Rdb_sst_info *>(object)->run_thread(); } -void Rdb_sst_info::run_thread() -{ +void Rdb_sst_info::run_thread() { const std::unique_lock<std::mutex> lk(m_mutex); - do - { + do { // Wait for notification or 1 second to pass m_cond.wait_for(lk, std::chrono::seconds(1)); // Inner loop pulls off all Rdb_sst_file entries and processes them - while (!m_queue.empty()) - { - const Rdb_sst_file* const sst_file= m_queue.front(); + while (!m_queue.empty()) { + const Rdb_sst_file *const sst_file = m_queue.front(); m_queue.pop(); // Release the lock - we don't want to hold it while committing the file lk.unlock(); // Close out the sst file and add it to the database - const rocksdb::Status s= sst_file->commit(); - if (!s.ok()) - { + const rocksdb::Status s = sst_file->commit(); + if (!s.ok()) { set_error_msg(s.ToString()); } @@ -443,14 +383,12 @@ void Rdb_sst_info::run_thread() } #endif -void Rdb_sst_info::init(const rocksdb::DB* const db) -{ - const std::string path= db->GetName() + FN_DIRSEP; - struct st_my_dir* const dir_info= my_dir(path.c_str(), MYF(MY_DONT_SORT)); +void Rdb_sst_info::init(const rocksdb::DB *const db) { + const std::string path = db->GetName() + FN_DIRSEP; + struct st_my_dir *const dir_info = my_dir(path.c_str(), MYF(MY_DONT_SORT)); // Access the directory - if (dir_info == nullptr) - { + if (dir_info == nullptr) { // NO_LINT_DEBUG sql_print_warning("RocksDB: Could not access database directory: %s", path.c_str()); @@ -458,16 +396,14 @@ void Rdb_sst_info::init(const rocksdb::DB* const db) } // Scan through the files in the directory - const struct fileinfo* file_info= dir_info->dir_entry; - for (uint ii= 0; ii < dir_info->number_off_files; ii++, file_info++) - { + const struct fileinfo *file_info = dir_info->dir_entry; + for (uint ii = 0; ii < dir_info->number_off_files; ii++, file_info++) { // find any files ending with m_suffix ... - const std::string name= file_info->name; - const size_t pos= name.find(m_suffix); - if (pos != std::string::npos && name.size() - pos == m_suffix.size()) - { + const std::string name = file_info->name; + const size_t pos = name.find(m_suffix); + if (pos != std::string::npos && name.size() - pos == m_suffix.size()) { // ... and remove them - const std::string fullname= path + name; + const std::string fullname = path + name; my_delete(fullname.c_str(), MYF(0)); } } @@ -476,5 +412,5 @@ void Rdb_sst_info::init(const rocksdb::DB* const db) my_dirend(dir_info); } -std::string Rdb_sst_info::m_suffix= ".bulk_load.tmp"; -} // namespace myrocks +std::string Rdb_sst_info::m_suffix = ".bulk_load.tmp"; +} // namespace myrocks diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h index 933357c8f08..45d44fc848b 100644 --- a/storage/rocksdb/rdb_sst_info.h +++ b/storage/rocksdb/rdb_sst_info.h @@ -33,78 +33,77 @@ namespace myrocks { class Rdb_sst_file { - private: - Rdb_sst_file(const Rdb_sst_file& p)= delete; - Rdb_sst_file& operator=(const Rdb_sst_file& p)= delete; - - rocksdb::DB* const m_db; - rocksdb::ColumnFamilyHandle* const m_cf; - const rocksdb::DBOptions& m_db_options; - rocksdb::SstFileWriter* m_sst_file_writer; - const std::string m_name; - const bool m_tracing; - - std::string generateKey(const std::string& key); - - public: - Rdb_sst_file(rocksdb::DB* const db, - rocksdb::ColumnFamilyHandle* const cf, - const rocksdb::DBOptions& db_options, const std::string& name, +private: + Rdb_sst_file(const Rdb_sst_file &p) = delete; + Rdb_sst_file &operator=(const Rdb_sst_file &p) = delete; + + rocksdb::DB *const m_db; + rocksdb::ColumnFamilyHandle *const m_cf; + const rocksdb::DBOptions &m_db_options; + rocksdb::SstFileWriter *m_sst_file_writer; + const std::string m_name; + const bool m_tracing; + + std::string generateKey(const std::string &key); + +public: + Rdb_sst_file(rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf, + const rocksdb::DBOptions &db_options, const std::string &name, const bool tracing); ~Rdb_sst_file(); rocksdb::Status open(); - rocksdb::Status put(const rocksdb::Slice& key, const rocksdb::Slice& value); + rocksdb::Status put(const rocksdb::Slice &key, const rocksdb::Slice &value); rocksdb::Status commit(); }; class Rdb_sst_info { - private: - Rdb_sst_info(const Rdb_sst_info& p)= delete; - Rdb_sst_info& operator=(const Rdb_sst_info& p)= delete; - - rocksdb::DB* const m_db; - rocksdb::ColumnFamilyHandle* const m_cf; - const rocksdb::DBOptions& m_db_options; - uint64_t m_curr_size; - uint64_t m_max_size; - uint m_sst_count; - std::string m_error_msg; - std::string m_prefix; - static std::string m_suffix; +private: + Rdb_sst_info(const Rdb_sst_info &p) = delete; + Rdb_sst_info &operator=(const Rdb_sst_info &p) = delete; + + rocksdb::DB *const m_db; + rocksdb::ColumnFamilyHandle *const m_cf; + const rocksdb::DBOptions &m_db_options; + uint64_t m_curr_size; + uint64_t m_max_size; + uint m_sst_count; + std::string m_error_msg; + std::string m_prefix; + static std::string m_suffix; #if defined(RDB_SST_INFO_USE_THREAD) - std::queue<Rdb_sst_file*> m_queue; - std::mutex m_mutex; - std::condition_variable m_cond; - std::thread* m_thread; - bool m_finished; + std::queue<Rdb_sst_file *> m_queue; + std::mutex m_mutex; + std::condition_variable m_cond; + std::thread *m_thread; + bool m_finished; #endif - Rdb_sst_file* m_sst_file; - const bool m_tracing; + Rdb_sst_file *m_sst_file; + const bool m_tracing; int open_new_sst_file(); void close_curr_sst_file(); - void set_error_msg(const std::string& msg); + void set_error_msg(const std::string &msg); #if defined(RDB_SST_INFO_USE_THREAD) void run_thread(); - static void thread_fcn(void* object); + static void thread_fcn(void *object); #endif - public: - Rdb_sst_info(rocksdb::DB* const db, const std::string& tablename, - const std::string& indexname, - rocksdb::ColumnFamilyHandle* const cf, - const rocksdb::DBOptions& db_options, const bool &tracing); +public: + Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, + const std::string &indexname, + rocksdb::ColumnFamilyHandle *const cf, + const rocksdb::DBOptions &db_options, const bool &tracing); ~Rdb_sst_info(); - int put(const rocksdb::Slice& key, const rocksdb::Slice& value); + int put(const rocksdb::Slice &key, const rocksdb::Slice &value); int commit(); - const std::string& error_message() const { return m_error_msg; } + const std::string &error_message() const { return m_error_msg; } - static void init(const rocksdb::DB* const db); + static void init(const rocksdb::DB *const db); }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_threads.cc b/storage/rocksdb/rdb_threads.cc index 3f00bc13325..0bc590e4cf8 100644 --- a/storage/rocksdb/rdb_threads.cc +++ b/storage/rocksdb/rdb_threads.cc @@ -16,7 +16,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif /* The C++ file's header */ @@ -24,58 +24,64 @@ namespace myrocks { -void* Rdb_thread::thread_func(void* const thread_ptr) -{ +void *Rdb_thread::thread_func(void *const thread_ptr) { DBUG_ASSERT(thread_ptr != nullptr); - Rdb_thread* const thread= static_cast<Rdb_thread* const>(thread_ptr); - if (!thread->m_run_once.exchange(true)) - { + Rdb_thread *const thread = static_cast<Rdb_thread *const>(thread_ptr); + if (!thread->m_run_once.exchange(true)) { thread->run(); thread->uninit(); } return nullptr; } - void Rdb_thread::init( #ifdef HAVE_PSI_INTERFACE - my_core::PSI_mutex_key stop_bg_psi_mutex_key, - my_core::PSI_cond_key stop_bg_psi_cond_key + my_core::PSI_mutex_key stop_bg_psi_mutex_key, + my_core::PSI_cond_key stop_bg_psi_cond_key #endif - ) -{ + ) { DBUG_ASSERT(!m_run_once); mysql_mutex_init(stop_bg_psi_mutex_key, &m_signal_mutex, MY_MUTEX_INIT_FAST); mysql_cond_init(stop_bg_psi_cond_key, &m_signal_cond, nullptr); } - -void Rdb_thread::uninit() -{ +void Rdb_thread::uninit() { mysql_mutex_destroy(&m_signal_mutex); mysql_cond_destroy(&m_signal_cond); } - -int Rdb_thread::create_thread( +int Rdb_thread::create_thread(const std::string &thread_name #ifdef HAVE_PSI_INTERFACE - PSI_thread_key background_psi_thread_key + , + PSI_thread_key background_psi_thread_key #endif - ) -{ - return mysql_thread_create(background_psi_thread_key, - &m_handle, nullptr, thread_func, this); -} + ) { + DBUG_ASSERT(!thread_name.empty()); + + int err = mysql_thread_create(background_psi_thread_key, &m_handle, nullptr, + thread_func, this); + + if (!err) { + /* + mysql_thread_create() ends up doing some work underneath and setting the + thread name as "my-func". This isn't what we want. Our intent is to name + the threads according to their purpose so that when displayed under the + debugger then they'll be more easily identifiable. Therefore we'll reset + the name if thread was successfully created. + */ + err = pthread_setname_np(m_handle, thread_name.c_str()); + } + return err; +} -void Rdb_thread::signal(const bool &stop_thread) -{ +void Rdb_thread::signal(const bool &stop_thread) { mysql_mutex_lock(&m_signal_mutex); if (stop_thread) { - m_stop= true; + m_stop = true; } mysql_cond_signal(&m_signal_cond); mysql_mutex_unlock(&m_signal_mutex); } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_threads.h b/storage/rocksdb/rdb_threads.h index c06dba438c0..b7890b03576 100644 --- a/storage/rocksdb/rdb_threads.h +++ b/storage/rocksdb/rdb_threads.h @@ -16,6 +16,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #pragma once +/* C++ standard header files */ +#include <string> + /* MySQL includes */ #include "./my_global.h" #include <mysql/psi/mysql_table.h> @@ -26,91 +29,81 @@ namespace myrocks { -class Rdb_thread -{ - private: +class Rdb_thread { +private: // Disable Copying - Rdb_thread(const Rdb_thread&); - Rdb_thread& operator=(const Rdb_thread&); + Rdb_thread(const Rdb_thread &); + Rdb_thread &operator=(const Rdb_thread &); // Make sure we run only once std::atomic_bool m_run_once; - pthread_t m_handle; + pthread_t m_handle; - protected: - mysql_mutex_t m_signal_mutex; - mysql_cond_t m_signal_cond; - bool m_stop= false; +protected: + mysql_mutex_t m_signal_mutex; + mysql_cond_t m_signal_cond; + bool m_stop = false; - public: +public: Rdb_thread() : m_run_once(false) {} #ifdef HAVE_PSI_INTERFACE - void init(my_core::PSI_mutex_key stop_bg_psi_mutex_key, - my_core::PSI_cond_key stop_bg_psi_cond_key); - int create_thread( - my_core::PSI_thread_key background_psi_thread_key); + void init(my_core::PSI_mutex_key stop_bg_psi_mutex_key, + my_core::PSI_cond_key stop_bg_psi_cond_key); + int create_thread(const std::string &thread_name, + my_core::PSI_thread_key background_psi_thread_key); #else void init(); - int create_thread(); + int create_thread(const std::string &thread_name); #endif virtual void run(void) = 0; - void signal(const bool &stop_thread= false); + void signal(const bool &stop_thread = false); - int join() - { - return pthread_join(m_handle, nullptr); - } + int join() { return pthread_join(m_handle, nullptr); } void uninit(); virtual ~Rdb_thread() {} - private: - static void* thread_func(void* const thread_ptr); +private: + static void *thread_func(void *const thread_ptr); }; - /** MyRocks background thread control N.B. This is on top of RocksDB's own background threads (@see rocksdb::CancelAllBackgroundWork()) */ -class Rdb_background_thread : public Rdb_thread -{ - private: - bool m_save_stats= false; +class Rdb_background_thread : public Rdb_thread { +private: + bool m_save_stats = false; - void reset() - { + void reset() { mysql_mutex_assert_owner(&m_signal_mutex); - m_stop= false; - m_save_stats= false; + m_stop = false; + m_save_stats = false; } - public: +public: virtual void run() override; - void request_save_stats() - { + void request_save_stats() { mysql_mutex_lock(&m_signal_mutex); - m_save_stats= true; + m_save_stats = true; mysql_mutex_unlock(&m_signal_mutex); } }; - /* Drop index thread control */ -struct Rdb_drop_index_thread : public Rdb_thread -{ +struct Rdb_drop_index_thread : public Rdb_thread { virtual void run() override; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc index 599f11c5681..900d0f9be19 100644 --- a/storage/rocksdb/rdb_utils.cc +++ b/storage/rocksdb/rdb_utils.cc @@ -32,14 +32,12 @@ namespace myrocks { /* Skip past any spaces in the input */ -const char* rdb_skip_spaces(const struct charset_info_st* const cs, - const char *str) -{ +const char *rdb_skip_spaces(const struct charset_info_st *const cs, + const char *str) { DBUG_ASSERT(cs != nullptr); DBUG_ASSERT(str != nullptr); - while (my_isspace(cs, *str)) - { + while (my_isspace(cs, *str)) { str++; } @@ -51,18 +49,15 @@ const char* rdb_skip_spaces(const struct charset_info_st* const cs, Note that str1 can be longer but we only compare up to the number of characters in str2. */ -bool rdb_compare_strings_ic(const char* const str1, const char* const str2) -{ +bool rdb_compare_strings_ic(const char *const str1, const char *const str2) { DBUG_ASSERT(str1 != nullptr); DBUG_ASSERT(str2 != nullptr); // Scan through the strings size_t ii; - for (ii = 0; str2[ii]; ii++) - { + for (ii = 0; str2[ii]; ii++) { if (toupper(static_cast<int>(str1[ii])) != - toupper(static_cast<int>(str2[ii]))) - { + toupper(static_cast<int>(str2[ii]))) { return false; } } @@ -74,11 +69,10 @@ bool rdb_compare_strings_ic(const char* const str1, const char* const str2) Scan through an input string looking for pattern, ignoring case and skipping all data enclosed in quotes. */ -const char* rdb_find_in_string(const char *str, const char *pattern, - bool * const succeeded) -{ - char quote = '\0'; - bool escape = false; +const char *rdb_find_in_string(const char *str, const char *pattern, + bool *const succeeded) { + char quote = '\0'; + bool escape = false; DBUG_ASSERT(str != nullptr); DBUG_ASSERT(pattern != nullptr); @@ -86,38 +80,30 @@ const char* rdb_find_in_string(const char *str, const char *pattern, *succeeded = false; - for ( ; *str; str++) - { + for (; *str; str++) { /* If we found a our starting quote character */ - if (*str == quote) - { + if (*str == quote) { /* If it was escaped ignore it */ - if (escape) - { + if (escape) { escape = false; } /* Otherwise we are now outside of the quoted string */ - else - { + else { quote = '\0'; } } /* Else if we are currently inside a quoted string? */ - else if (quote != '\0') - { + else if (quote != '\0') { /* If so, check for the escape character */ escape = !escape && *str == '\\'; } /* Else if we found a quote we are starting a quoted string */ - else if (*str == '"' || *str == '\'' || *str == '`') - { + else if (*str == '"' || *str == '\'' || *str == '`') { quote = *str; } /* Else we are outside of a quoted string - look for our pattern */ - else - { - if (rdb_compare_strings_ic(str, pattern)) - { + else { + if (rdb_compare_strings_ic(str, pattern)) { *succeeded = true; return str; } @@ -132,10 +118,9 @@ const char* rdb_find_in_string(const char *str, const char *pattern, /* See if the next valid token matches the specified string */ -const char* rdb_check_next_token(const struct charset_info_st* const cs, - const char *str, const char* const pattern, - bool* const succeeded) -{ +const char *rdb_check_next_token(const struct charset_info_st *const cs, + const char *str, const char *const pattern, + bool *const succeeded) { DBUG_ASSERT(cs != nullptr); DBUG_ASSERT(str != nullptr); DBUG_ASSERT(pattern != nullptr); @@ -145,8 +130,7 @@ const char* rdb_check_next_token(const struct charset_info_st* const cs, str = rdb_skip_spaces(cs, str); // See if the next characters match the pattern - if (rdb_compare_strings_ic(str, pattern)) - { + if (rdb_compare_strings_ic(str, pattern)) { *succeeded = true; return str + strlen(pattern); } @@ -158,43 +142,35 @@ const char* rdb_check_next_token(const struct charset_info_st* const cs, /* Parse id */ -const char* rdb_parse_id(const struct charset_info_st* const cs, - const char *str, std::string * const id) -{ +const char *rdb_parse_id(const struct charset_info_st *const cs, + const char *str, std::string *const id) { DBUG_ASSERT(cs != nullptr); DBUG_ASSERT(str != nullptr); // Move past any spaces str = rdb_skip_spaces(cs, str); - if (*str == '\0') - { + if (*str == '\0') { return str; } char quote = '\0'; - if (*str == '`' || *str == '"') - { + if (*str == '`' || *str == '"') { quote = *str++; } - size_t len = 0; - const char* start = str; + size_t len = 0; + const char *start = str; - if (quote != '\0') - { - for ( ; ; ) - { - if (*str == '\0') - { + if (quote != '\0') { + for (;;) { + if (*str == '\0') { return str; } - if (*str == quote) - { + if (*str == quote) { str++; - if (*str != quote) - { + if (*str != quote) { break; } } @@ -202,27 +178,21 @@ const char* rdb_parse_id(const struct charset_info_st* const cs, str++; len++; } - } - else - { - while (!my_isspace(cs, *str) && *str != '(' && *str != ')' && - *str != '.' && *str != ',' && *str != '\0') - { + } else { + while (!my_isspace(cs, *str) && *str != '(' && *str != ')' && *str != '.' && + *str != ',' && *str != '\0') { str++; len++; } } // If the user requested the id create it and return it - if (id != nullptr) - { + if (id != nullptr) { *id = std::string(""); id->reserve(len); - while (len--) - { + while (len--) { *id += *start; - if (*start++ == quote) - { + if (*start++ == quote) { start++; } } @@ -234,8 +204,8 @@ const char* rdb_parse_id(const struct charset_info_st* const cs, /* Skip id */ -const char* rdb_skip_id(const struct charset_info_st* const cs, const char *str) -{ +const char *rdb_skip_id(const struct charset_info_st *const cs, + const char *str) { DBUG_ASSERT(cs != nullptr); DBUG_ASSERT(str != nullptr); @@ -243,19 +213,16 @@ const char* rdb_skip_id(const struct charset_info_st* const cs, const char *str) } static const std::size_t rdb_hex_bytes_per_char = 2; -static const std::array<char, 16> rdb_hexdigit = -{ - { '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' } -}; +static const std::array<char, 16> rdb_hexdigit = {{'0', '1', '2', '3', '4', '5', + '6', '7', '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f'}}; /* Convert data into a hex string with optional maximum length. If the data is larger than the maximum length trancate it and append "..". */ std::string rdb_hexdump(const char *data, const std::size_t data_len, - const std::size_t maxsize) -{ + const std::size_t maxsize) { DBUG_ASSERT(data != nullptr); // Count the elements in the string @@ -264,8 +231,7 @@ std::string rdb_hexdump(const char *data, const std::size_t data_len, std::size_t len = elems * rdb_hex_bytes_per_char; std::string str; - if (maxsize != 0 && len > maxsize) - { + if (maxsize != 0 && len > maxsize) { // If the amount of output is too large adjust the settings // and leave room for the ".." at the end elems = (maxsize - 2) / rdb_hex_bytes_per_char; @@ -276,34 +242,29 @@ std::string rdb_hexdump(const char *data, const std::size_t data_len, str.reserve(len); // Loop through the input data and build the output string - for (std::size_t ii = 0; ii < elems; ii++, data++) - { - uint8_t ch = (uint8_t) *data; + for (std::size_t ii = 0; ii < elems; ii++, data++) { + uint8_t ch = (uint8_t)*data; str += rdb_hexdigit[ch >> 4]; str += rdb_hexdigit[ch & 0x0F]; } // If we can't fit it all add the ".." - if (elems != data_len) - { + if (elems != data_len) { str += ".."; } return str; } - /* Attempt to access the database subdirectory to see if it exists */ -bool rdb_database_exists(const std::string& db_name) -{ - const std::string dir = std::string(mysql_real_data_home) + FN_DIRSEP - + db_name; - struct st_my_dir* const dir_info = my_dir(dir.c_str(), - MYF(MY_DONT_SORT | MY_WANT_STAT)); - if (dir_info == nullptr) - { +bool rdb_database_exists(const std::string &db_name) { + const std::string dir = + std::string(mysql_real_data_home) + FN_DIRSEP + db_name; + struct st_my_dir *const dir_info = + my_dir(dir.c_str(), MYF(MY_DONT_SORT | MY_WANT_STAT)); + if (dir_info == nullptr) { return false; } @@ -311,4 +272,4 @@ bool rdb_database_exists(const std::string& db_name) return true; } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h index 7d63ff9c220..b337ed108d3 100644 --- a/storage/rocksdb/rdb_utils.h +++ b/storage/rocksdb/rdb_utils.h @@ -26,7 +26,7 @@ #include "rocksdb/slice.h" #ifdef HAVE_JEMALLOC - #include <jemalloc/jemalloc.h> +#include <jemalloc/jemalloc.h> #endif namespace myrocks { @@ -38,7 +38,7 @@ namespace myrocks { #ifndef interface #define interface struct -#endif // interface +#endif // interface /* Introduce C-style pseudo-namespaces, a handy way to make code more readble @@ -56,7 +56,7 @@ namespace myrocks { // to non-obvious MySQL functions, like the ones that do not start with well // known prefixes: "my_", "sql_", and "mysql_". #define my_core -#endif // my_core +#endif // my_core /* The intent behind a SHIP_ASSERT() macro is to have a mechanism for validating @@ -74,14 +74,14 @@ namespace myrocks { */ #ifndef SHIP_ASSERT -#define SHIP_ASSERT(expr) \ - do { \ - if (!(expr)) { \ - my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \ - abort_with_stack_traces(); \ - } \ +#define SHIP_ASSERT(expr) \ + do { \ + if (!(expr)) { \ + my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \ + abort_with_stack_traces(); \ + } \ } while (0) -#endif // SHIP_ASSERT +#endif // SHIP_ASSERT /* Assert a implies b. @@ -97,23 +97,50 @@ namespace myrocks { a and b must be both true or both false. */ #ifndef DBUG_ASSERT_IFF -#define DBUG_ASSERT_IFF(a, b) \ +#define DBUG_ASSERT_IFF(a, b) \ DBUG_ASSERT(static_cast<bool>(a) == static_cast<bool>(b)) #endif /* + Intent behind this macro is to avoid manually typing the function name every + time we want to add the debugging statement and use the compiler for this + work. This avoids typical refactoring problems when one renames a function, + but the tracing message doesn't get updated. + + We could use __func__ or __FUNCTION__ macros, but __PRETTY_FUNCTION__ + contains the signature of the function as well as its bare name and provides + therefore more context when interpreting the logs. +*/ +#define DBUG_ENTER_FUNC() DBUG_ENTER(__PRETTY_FUNCTION__) + +/* + Error handling pattern used across MySQL abides by the following rules: "All + functions that can report an error (usually an allocation error), should + return 0/FALSE/false on success, 1/TRUE/true on failure." + + https://dev.mysql.com/doc/internals/en/additional-suggestions.html has more + details. + + To increase the comprehension and readability of MyRocks codebase we'll use + constants similar to ones from C standard (EXIT_SUCCESS and EXIT_FAILURE) to + make sure that both failure and success paths are clearly identifiable. The + definitions of FALSE and TRUE come from <my_global.h>. +*/ +#define HA_EXIT_SUCCESS FALSE +#define HA_EXIT_FAILURE TRUE + +/* Generic constant. */ -const size_t RDB_MAX_HEXDUMP_LEN= 1000; +const size_t RDB_MAX_HEXDUMP_LEN = 1000; /* Helper function to get an NULL terminated uchar* out of a given MySQL String. */ -inline uchar* rdb_mysql_str_to_uchar_str(my_core::String *str) -{ +inline uchar *rdb_mysql_str_to_uchar_str(my_core::String *str) { DBUG_ASSERT(str != nullptr); - return reinterpret_cast<uchar*>(str->c_ptr()); + return reinterpret_cast<uchar *>(str->c_ptr()); } /* @@ -121,17 +148,15 @@ inline uchar* rdb_mysql_str_to_uchar_str(my_core::String *str) given STL string. */ -inline const uchar* rdb_std_str_to_uchar_ptr(const std::string &str) -{ - return reinterpret_cast<const uchar*>(str.data()); +inline const uchar *rdb_std_str_to_uchar_ptr(const std::string &str) { + return reinterpret_cast<const uchar *>(str.data()); } /* Helper function to convert seconds to milliseconds. */ -constexpr int rdb_convert_sec_to_ms(int sec) -{ +constexpr int rdb_convert_sec_to_ms(int sec) { return std::chrono::milliseconds(std::chrono::seconds(sec)).count(); } @@ -140,10 +165,9 @@ constexpr int rdb_convert_sec_to_ms(int sec) given RocksDB item. */ -inline const uchar* rdb_slice_to_uchar_ptr(const rocksdb::Slice *item) -{ +inline const uchar *rdb_slice_to_uchar_ptr(const rocksdb::Slice *item) { DBUG_ASSERT(item != nullptr); - return reinterpret_cast<const uchar*>(item->data()); + return reinterpret_cast<const uchar *>(item->data()); } /* @@ -152,12 +176,11 @@ inline const uchar* rdb_slice_to_uchar_ptr(const rocksdb::Slice *item) scenario for cases where it has been verified that this intervention has noticeable benefits. */ -inline int purge_all_jemalloc_arenas() -{ +inline int purge_all_jemalloc_arenas() { #ifdef HAVE_JEMALLOC unsigned narenas = 0; size_t sz = sizeof(unsigned); - char name[25] = { 0 }; + char name[25] = {0}; // Get the number of arenas first. Please see `jemalloc` documentation for // all the various options. @@ -184,28 +207,28 @@ inline int purge_all_jemalloc_arenas() Helper functions to parse strings. */ -const char* rdb_skip_spaces(const struct charset_info_st* const cs, +const char *rdb_skip_spaces(const struct charset_info_st *const cs, const char *str) - __attribute__((__nonnull__, __warn_unused_result__)); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); -bool rdb_compare_strings_ic(const char* const str1, const char* const str2) - __attribute__((__nonnull__, __warn_unused_result__)); +bool rdb_compare_strings_ic(const char *const str1, const char *const str2) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); -const char* rdb_find_in_string(const char *str, const char *pattern, - bool * const succeeded) - __attribute__((__nonnull__, __warn_unused_result__)); +const char *rdb_find_in_string(const char *str, const char *pattern, + bool *const succeeded) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); -const char* rdb_check_next_token(const struct charset_info_st* const cs, - const char *str, const char* const pattern, - bool * const succeeded) - __attribute__((__nonnull__, __warn_unused_result__)); +const char *rdb_check_next_token(const struct charset_info_st *const cs, + const char *str, const char *const pattern, + bool *const succeeded) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); -const char* rdb_parse_id(const struct charset_info_st* const cs, - const char *str, std::string * const id) - __attribute__((__nonnull__(1, 2), __warn_unused_result__)); +const char *rdb_parse_id(const struct charset_info_st *const cs, + const char *str, std::string *const id) + MY_ATTRIBUTE((__nonnull__(1, 2), __warn_unused_result__)); -const char* rdb_skip_id(const struct charset_info_st* const cs, const char *str) - __attribute__((__nonnull__, __warn_unused_result__)); +const char *rdb_skip_id(const struct charset_info_st *const cs, const char *str) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); /* Helper functions to populate strings. @@ -213,11 +236,11 @@ const char* rdb_skip_id(const struct charset_info_st* const cs, const char *str) std::string rdb_hexdump(const char *data, const std::size_t data_len, const std::size_t maxsize = 0) - __attribute__((__nonnull__)); + MY_ATTRIBUTE((__nonnull__)); /* Helper function to see if a database exists */ -bool rdb_database_exists(const std::string& db_name); +bool rdb_database_exists(const std::string &db_name); -} // namespace myrocks +} // namespace myrocks |