diff options
author | Sergei Golubchik <serg@mariadb.org> | 2016-10-25 21:58:59 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2016-10-25 21:58:59 +0200 |
commit | de9ea40f054c171e1a592b47b5e6443e81532fa9 (patch) | |
tree | 7f559cde4b9c70df8ff9d0cc4779cce11e5401f1 | |
parent | 675f27b382457de77d8a128124b132439eb5cfaf (diff) | |
parent | d9787aa29af3e77c5cd04defe0331c721542cff6 (diff) | |
download | mariadb-git-de9ea40f054c171e1a592b47b5e6443e81532fa9.tar.gz |
Merge branch 'merge/merge-tokudb-5.6' into 10.0
43 files changed, 1108 insertions, 162 deletions
diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index 9820aa2217f..dad90fe96eb 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -1,4 +1,4 @@ -SET(TOKUDB_VERSION 5.6.32-78.1) +SET(TOKUDB_VERSION 5.6.33-79.0) # PerconaFT only supports x86-64 and cmake-2.8.9+ IF(CMAKE_VERSION VERSION_LESS "2.8.9") MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB") diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc index 576f902f6ae..7ede78b3c0d 100644 --- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc +++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc @@ -422,6 +422,9 @@ static void print_db_env_struct (void) { "int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)", "void (*set_check_thp)(DB_ENV *, bool new_val)", "bool (*get_check_thp)(DB_ENV *)", + "bool (*set_dir_per_db)(DB_ENV *, bool new_val)", + "bool (*get_dir_per_db)(DB_ENV *)", + "const char *(*get_data_dir)(DB_ENV *env)", NULL}; sort_and_dump_fields("db_env", true, extra); diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h index dc6aec9226d..05fb771de08 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h @@ -138,6 +138,8 @@ struct cachefile { // nor attempt to open any cachefile with the same fname (dname) // until this cachefile has been fully closed and unlinked. bool unlink_on_close; + // If set then fclose will not be logged in recovery log. + bool skip_log_recover_on_close; int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */ CACHETABLE cachetable; struct fileid fileid; diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc index 5bba977de1a..6d753805fa9 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc @@ -467,6 +467,10 @@ toku_cachefile_fname_in_env (CACHEFILE cf) { return cf->fname_in_env; } +void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) { + cf->fname_in_env = new_fname_in_env; +} + int toku_cachefile_get_fd (CACHEFILE cf) { return cf->fd; @@ -2903,6 +2907,18 @@ bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) { return cf->unlink_on_close; } +void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf) { + cf->skip_log_recover_on_close = true; +} + +void toku_cachefile_do_log_recover_on_close(CACHEFILE cf) { + cf->skip_log_recover_on_close = false; +} + +bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf) { + return cf->skip_log_recover_on_close; +} + uint64_t toku_cachefile_size(CACHEFILE cf) { int64_t file_size; int fd = toku_cachefile_get_fd(cf); diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h index 148326562ab..3b3cb0a2d46 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h @@ -500,12 +500,18 @@ int toku_cachefile_get_fd (CACHEFILE); // Return the filename char * toku_cachefile_fname_in_env (CACHEFILE cf); +void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env); + // Make it so when the cachefile closes, the underlying file is unlinked void toku_cachefile_unlink_on_close(CACHEFILE cf); // is this cachefile marked as unlink on close? bool toku_cachefile_is_unlink_on_close(CACHEFILE cf); +void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf); +void toku_cachefile_do_log_recover_on_close(CACHEFILE cf); +bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf); + // Return the logger associated with the cachefile struct tokulogger *toku_cachefile_logger(CACHEFILE cf); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index f131668889e..30a8710d7aa 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -149,22 +149,23 @@ basement nodes, bulk fetch, and partial fetch: #include "ft/cachetable/checkpoint.h" #include "ft/cursor.h" -#include "ft/ft.h" #include "ft/ft-cachetable-wrappers.h" #include "ft/ft-flusher.h" #include "ft/ft-internal.h" -#include "ft/msg.h" +#include "ft/ft.h" #include "ft/leafentry.h" #include "ft/logger/log-internal.h" +#include "ft/msg.h" #include "ft/node.h" #include "ft/serialize/block_table.h" -#include "ft/serialize/sub_block.h" #include "ft/serialize/ft-serialize.h" #include "ft/serialize/ft_layout_version.h" #include "ft/serialize/ft_node-serialize.h" +#include "ft/serialize/sub_block.h" #include "ft/txn/txn_manager.h" -#include "ft/ule.h" #include "ft/txn/xids.h" +#include "ft/ule.h" +#include "src/ydb-internal.h" #include <toku_race_tools.h> @@ -179,6 +180,7 @@ basement nodes, bulk fetch, and partial fetch: #include <stdint.h> +#include <memory> /* Status is intended for display to humans to help understand system behavior. * It does not need to be perfectly thread-safe. */ @@ -2593,12 +2595,104 @@ static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH; +inline bool toku_file_is_root(const char *path, const char *last_slash) { + return last_slash == path; +} + +static std::unique_ptr<char[], decltype(&toku_free)> toku_file_get_parent_dir( + const char *path) { + std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free); + + bool has_trailing_slash = false; + + /* Find the offset of the last slash */ + const char *last_slash = strrchr(path, OS_PATH_SEPARATOR); + + if (!last_slash) { + /* No slash in the path, return NULL */ + return result; + } + + /* Ok, there is a slash. Is there anything after it? */ + if (static_cast<size_t>(last_slash - path + 1) == strlen(path)) { + has_trailing_slash = true; + } + + /* Reduce repetative slashes. */ + while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) { + last_slash--; + } + + /* Check for the root of a drive. */ + if (toku_file_is_root(path, last_slash)) { + return result; + } + + /* If a trailing slash prevented the first strrchr() from trimming + the last component of the path, trim that component now. */ + if (has_trailing_slash) { + /* Back up to the previous slash. */ + last_slash--; + while (last_slash > path && last_slash[0] != OS_PATH_SEPARATOR) { + last_slash--; + } + + /* Reduce repetative slashes. */ + while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) { + last_slash--; + } + } + + /* Check for the root of a drive. */ + if (toku_file_is_root(path, last_slash)) { + return result; + } + + result.reset(toku_strndup(path, last_slash - path)); + return result; +} + +static bool toku_create_subdirs_if_needed(const char *path) { + static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | + S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH; + + toku_struct_stat stat; + bool subdir_exists = true; + auto subdir = toku_file_get_parent_dir(path); + + if (!subdir.get()) + return true; + + if (toku_stat(subdir.get(), &stat) == -1) { + if (ENOENT == get_error_errno()) + subdir_exists = false; + else + return false; + } + + if (subdir_exists) { + if (!S_ISDIR(stat.st_mode)) + return false; + return true; + } + + if (!toku_create_subdirs_if_needed(subdir.get())) + return false; + + if (toku_os_mkdir(subdir.get(), dir_mode)) + return false; + + return true; +} + // open a file for use by the ft // Requires: File does not exist. static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) { int r; int fd; int er; + if (!toku_create_subdirs_if_needed(fname)) + return get_error_errno(); fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); assert(fd==-1); if ((er = get_maybe_error_errno()) != ENOENT) { @@ -4427,6 +4521,55 @@ void toku_ft_unlink(FT_HANDLE handle) { toku_cachefile_unlink_on_close(cf); } +int toku_ft_rename_iname(DB_TXN *txn, + const char *data_dir, + const char *old_iname, + const char *new_iname, + CACHETABLE ct) { + int r = 0; + + std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(nullptr, + &toku_free); + std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(nullptr, + &toku_free); + + new_iname_full.reset(toku_construct_full_name(2, data_dir, new_iname)); + old_iname_full.reset(toku_construct_full_name(2, data_dir, old_iname)); + + if (txn) { + BYTESTRING bs_old_name = {static_cast<uint32_t>(strlen(old_iname) + 1), + const_cast<char *>(old_iname)}; + BYTESTRING bs_new_name = {static_cast<uint32_t>(strlen(new_iname) + 1), + const_cast<char *>(new_iname)}; + FILENUM filenum = FILENUM_NONE; + { + CACHEFILE cf; + r = toku_cachefile_of_iname_in_env(ct, old_iname, &cf); + if (r != ENOENT) { + char *old_fname_in_cf = toku_cachefile_fname_in_env(cf); + toku_cachefile_set_fname_in_env(cf, toku_xstrdup(new_iname)); + toku_free(old_fname_in_cf); + filenum = toku_cachefile_filenum(cf); + } + } + toku_logger_save_rollback_frename( + db_txn_struct_i(txn)->tokutxn, &bs_old_name, &bs_new_name); + toku_log_frename(db_txn_struct_i(txn)->tokutxn->logger, + (LSN *)0, + 0, + toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn), + bs_old_name, + filenum, + bs_new_name); + } + + r = toku_os_rename(old_iname_full.get(), new_iname_full.get()); + if (r != 0) + return r; + r = toku_fsync_directory(new_iname_full.get()); + return r; +} + int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) { int fd = toku_cachefile_get_fd(ft_handle->ft->cf); toku_ft_lock(ft_handle->ft); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h index 313a74628ea..70cf045d43c 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.h +++ b/storage/tokudb/PerconaFT/ft/ft-ops.h @@ -48,6 +48,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/msg.h" #include "util/dbt.h" +#define OS_PATH_SEPARATOR '/' + typedef struct ft_handle *FT_HANDLE; int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result)); diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc index 699fcc57603..7c94b4c59d3 100644 --- a/storage/tokudb/PerconaFT/ft/ft.cc +++ b/storage/tokudb/PerconaFT/ft/ft.cc @@ -253,7 +253,19 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val char* fname_in_env = toku_cachefile_fname_in_env(cachefile); assert(fname_in_env); BYTESTRING bs = {.len=(uint32_t) strlen(fname_in_env), .data=fname_in_env}; - toku_log_fclose(logger, &lsn, ft->h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out. + if (!toku_cachefile_is_skip_log_recover_on_close(cachefile)) { + toku_log_fclose( + logger, + &lsn, + ft->h->dirty, + bs, + toku_cachefile_filenum(cachefile)); // flush the log on + // close (if new header + // is being written), + // otherwise it might + // not make it out. + toku_cachefile_do_log_recover_on_close(cachefile); + } } } if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader) diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h index d600e093bdc..7a3c4fa783c 100644 --- a/storage/tokudb/PerconaFT/ft/ft.h +++ b/storage/tokudb/PerconaFT/ft/ft.h @@ -53,6 +53,12 @@ typedef struct ft_options *FT_OPTIONS; void toku_ft_unlink(FT_HANDLE handle); void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn); +int toku_ft_rename_iname(DB_TXN *txn, + const char *data_dir, + const char *old_iname, + const char *new_iname, + CACHETABLE ct); + void toku_ft_init_reflock(FT ft); void toku_ft_destroy_reflock(FT ft); void toku_ft_grab_reflock(FT ft); diff --git a/storage/tokudb/PerconaFT/ft/logger/logformat.cc b/storage/tokudb/PerconaFT/ft/logger/logformat.cc index 6f3baa81c86..49b61138803 100644 --- a/storage/tokudb/PerconaFT/ft/logger/logformat.cc +++ b/storage/tokudb/PerconaFT/ft/logger/logformat.cc @@ -90,6 +90,10 @@ const struct logtype rollbacks[] = { {"fcreate", 'F', FA{{"FILENUM", "filenum", 0}, {"BYTESTRING", "iname", 0}, NULLFIELD}, LOG_BEGIN_ACTION_NA}, + //rename file + {"frename", 'n', FA{{"BYTESTRING", "old_iname", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, // cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data. {"cmdinsert", 'i', FA{ {"FILENUM", "filenum", 0}, @@ -195,6 +199,11 @@ const struct logtype logtypes[] = { {"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0}, {"FILENUM", "filenum", 0}, NULLFIELD}, SHOULD_LOG_BEGIN}, + {"frename", 'n', FA{{"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "old_iname", 0}, + {"FILENUM", "old_filenum", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, {"enq_insert", 'I', FA{{"FILENUM", "filenum", 0}, {"TXNID_PAIR", "xid", 0}, {"BYTESTRING", "key", 0}, diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc index 38f29773bd6..a9c30c0e37a 100644 --- a/storage/tokudb/PerconaFT/ft/logger/recover.cc +++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc @@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." +#include <memory> #include "ft/cachetable/cachetable.h" #include "ft/cachetable/checkpoint.h" #include "ft/ft.h" @@ -935,6 +936,83 @@ static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER return 0; } +static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) { + assert(renv); + assert(renv->env); + + toku_struct_stat stat; + const char *data_dir = renv->env->get_data_dir(renv->env); + bool old_exist = true; + bool new_exist = true; + + assert(data_dir); + + struct file_map_tuple *tuple; + + std::unique_ptr<char[], decltype(&toku_free)> old_iname_full( + toku_construct_full_name(2, data_dir, l->old_iname.data), &toku_free); + std::unique_ptr<char[], decltype(&toku_free)> new_iname_full( + toku_construct_full_name(2, data_dir, l->new_iname.data), &toku_free); + + if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + old_exist = false; + else + return 1; + } + + if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + new_exist = false; + else + return 1; + } + + // Both old and new files can exist if: + // - rename() is not completed + // - fcreate was replayed during recovery + // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains + // closed but not yet evicted cachefiles and the key of this container is + // fs-dependent file id - (device id, inode number) pair. As it is supposed + // new file have not yet created during recovery process the 'stalled + // cachefile' container can contain only cache file of old file. + // To preserve the old cachefile file's id and keep it in + // 'stalled cachefiles' container the new file is removed + // and the old file is renamed. + if (old_exist && new_exist && + (toku_os_unlink(new_iname_full.get()) == -1 || + toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1)) + return 1; + + if (old_exist && !new_exist && + (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1)) + return 1; + + if (file_map_find(&renv->fmap, l->old_filenum, &tuple) != DB_NOTFOUND) { + if (tuple->iname) + toku_free(tuple->iname); + tuple->iname = toku_xstrdup(l->new_iname.data); + } + + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + + if (txn) + toku_logger_save_rollback_frename(txn, &l->old_iname, &l->new_iname); + + return 0; +} + +static int toku_recover_backward_frename(struct logtype_frename *UU(l), + RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) { int r; TOKUTXN txn = NULL; diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h index 92f1e278e1a..eb8c953b08c 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h +++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h @@ -106,6 +106,7 @@ namespace MhsRbTree { static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff; OUUInt64() : _value(0) {} OUUInt64(uint64_t s) : _value(s) {} + OUUInt64(const OUUInt64& o) : _value(o._value) {} bool operator<(const OUUInt64 &r) const { invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); return _value < r.ToInt(); @@ -182,15 +183,18 @@ namespace MhsRbTree { class Node { public: - struct BlockPair { + class BlockPair { + public: OUUInt64 _offset; OUUInt64 _size; BlockPair() : _offset(0), _size(0) {} BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {} - BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {} - int operator<(const struct BlockPair &rhs) const { + BlockPair(const BlockPair &o) + : _offset(o._offset), _size(o._size) {} + + int operator<(const BlockPair &rhs) const { return _offset < rhs._offset; } int operator<(const uint64_t &o) const { return _offset < o; } @@ -203,15 +207,15 @@ namespace MhsRbTree { }; EColor _color; - struct BlockPair _hole; - struct Pair _label; + BlockPair _hole; + Pair _label; Node *_left; Node *_right; Node *_parent; Node(EColor c, Node::BlockPair h, - struct Pair lb, + Pair lb, Node *l, Node *r, Node *p) diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc index 85f29ce9813..cefe66335a6 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc @@ -53,9 +53,10 @@ static void generate_random_input() { std::srand(unsigned(std::time(0))); // set some values: - for (uint64_t i = 1; i < N; ++i) { - input_vector.push_back({i, 0}); - old_vector[i] = {i, 0}; + for (uint64_t i = 0; i < N; ++i) { + MhsRbTree::Node::BlockPair bp = {i+1, 0}; + input_vector.push_back(bp); + old_vector[i] = bp; } // using built-in random generator: std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom); diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc index 90eee1e580a..9f3977743a0 100644 --- a/storage/tokudb/PerconaFT/ft/txn/roll.cc +++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc @@ -38,13 +38,13 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. /* rollback and rollforward routines. */ - -#include "ft/ft.h" +#include <memory> #include "ft/ft-ops.h" +#include "ft/ft.h" #include "ft/log_header.h" #include "ft/logger/log-internal.h" -#include "ft/txn/xids.h" #include "ft/txn/rollback-apply.h" +#include "ft/txn/xids.h" // functionality provided by roll.c is exposed by an autogenerated // header file, logheader.h @@ -162,10 +162,122 @@ toku_rollback_fcreate (FILENUM filenum, // directory row lock for its dname) and we would not get this // far if there were other live handles. toku_cachefile_unlink_on_close(cf); + toku_cachefile_skip_log_recover_on_close(cf); done: return 0; } +int toku_commit_frename(BYTESTRING /* old_name */, + BYTESTRING /* new_iname */, + TOKUTXN /* txn */, + LSN UU(oplsn)) { + return 0; +} + +int toku_rollback_frename(BYTESTRING old_iname, + BYTESTRING new_iname, + TOKUTXN txn, + LSN UU(oplsn)) { + assert(txn); + assert(txn->logger); + assert(txn->logger->ct); + + CACHETABLE cachetable = txn->logger->ct; + + toku_struct_stat stat; + bool old_exist = true; + bool new_exist = true; + + std::unique_ptr<char[], decltype(&toku_free)> old_iname_full( + toku_cachetable_get_fname_in_cwd(cachetable, old_iname.data), + &toku_free); + std::unique_ptr<char[], decltype(&toku_free)> new_iname_full( + toku_cachetable_get_fname_in_cwd(cachetable, new_iname.data), + &toku_free); + + if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + old_exist = false; + else + return 1; + } + + if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + new_exist = false; + else + return 1; + } + + // Both old and new files can exist if: + // - rename() is not completed + // - fcreate was replayed during recovery + // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains + // closed but not yet evicted cachefiles and the key of this container is + // fs-dependent file id - (device id, inode number) pair. To preserve the + // new cachefile + // file's id and keep it in 'stalled cachefiles' container the old file is + // removed + // and the new file is renamed. + if (old_exist && new_exist && + (toku_os_unlink(old_iname_full.get()) == -1 || + toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1)) + return 1; + + if (!old_exist && new_exist && + (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1)) + return 1; + + // it's ok if both files do not exist on recovery + if (!old_exist && !new_exist) + assert(txn->for_recovery); + + CACHEFILE cf; + int r = toku_cachefile_of_iname_in_env(cachetable, new_iname.data, &cf); + if (r != ENOENT) { + char *old_fname_in_cf = toku_cachefile_fname_in_env(cf); + toku_cachefile_set_fname_in_env(cf, toku_xstrdup(old_iname.data)); + toku_free(old_fname_in_cf); + // There is at least one case when fclose logging cause error: + // 1) start transaction + // 2) create ft 'a'(write "fcreate" in recovery log) + // 3) rename ft 'a' to 'b'(write "frename" in recovery log) + // 4) abort transaction: + // a) rollback rename ft (renames 'b' to 'a') + // b) rollback create ft (removes 'a'): + // invokes toku_cachefile_unlink_on_close - lazy unlink on file + // close, + // it just sets corresponding flag in cachefile object + // c) write "unlink" for 'a' in recovery log + // (when transaction is aborted all locks are released, + // when file lock is released the file is closed and unlinked if + // corresponding flag is set in cachefile object) + // 5) crash + // + // After this we have the following records in recovery log: + // - create ft 'a', + // - rename 'a' to 'b', + // - unlink 'a' + // + // On recovery: + // - create 'a' + // - rename 'a' to 'b' + // - unlink 'a' - as 'a' file does not exist we have crash on assert + // here + // + // There is no need to write "unlink" in recovery log in (4a) because + // 'a' will be removed + // on transaction rollback on recovery. + toku_cachefile_skip_log_recover_on_close(cf); + } + + return 0; +} + int find_ft_from_filenum (const FT &ft, const FILENUM &filenum); int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) { FILENUM thisfnum = toku_cachefile_filenum(ft->cf); diff --git a/storage/tokudb/PerconaFT/portability/file.cc b/storage/tokudb/PerconaFT/portability/file.cc index 5332a2dff55..0e3efc1a12a 100644 --- a/storage/tokudb/PerconaFT/portability/file.cc +++ b/storage/tokudb/PerconaFT/portability/file.cc @@ -356,6 +356,12 @@ toku_os_close(int fd) { // if EINTR, retry until success return r; } +int toku_os_rename(const char *old_name, const char *new_name) { + return rename(old_name, new_name); +} + +int toku_os_unlink(const char *path) { return unlink(path); } + ssize_t toku_os_read(int fd, void *buf, size_t count) { ssize_t r; diff --git a/storage/tokudb/PerconaFT/portability/memory.cc b/storage/tokudb/PerconaFT/portability/memory.cc index 2de12699c61..5430ff84b70 100644 --- a/storage/tokudb/PerconaFT/portability/memory.cc +++ b/storage/tokudb/PerconaFT/portability/memory.cc @@ -313,6 +313,15 @@ toku_strdup(const char *s) { return (char *) toku_memdup(s, strlen(s)+1); } +char *toku_strndup(const char *s, size_t n) { + size_t s_size = strlen(s); + size_t bytes_to_copy = n > s_size ? s_size : n; + ++bytes_to_copy; + char *result = (char *)toku_memdup(s, bytes_to_copy); + result[bytes_to_copy - 1] = 0; + return result; +} + void toku_free(void *p) { if (p) { diff --git a/storage/tokudb/PerconaFT/portability/memory.h b/storage/tokudb/PerconaFT/portability/memory.h index 7780536f279..5ae652d39fc 100644 --- a/storage/tokudb/PerconaFT/portability/memory.h +++ b/storage/tokudb/PerconaFT/portability/memory.h @@ -125,7 +125,9 @@ size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default") void *toku_memdup (const void *v, size_t len); /* Toku-version of strdup. Use this so that it calls toku_malloc() */ char *toku_strdup (const char *s) __attribute__((__visibility__("default"))); - +/* Toku-version of strndup. Use this so that it calls toku_malloc() */ +char *toku_strndup(const char *s, size_t n) + __attribute__((__visibility__("default"))); /* Copy memory. Analogous to strdup() Crashes instead of returning NULL */ void *toku_xmemdup (const void *v, size_t len) __attribute__((__visibility__("default"))); /* Toku-version of strdup. Use this so that it calls toku_xmalloc() Crashes instead of returning NULL */ diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h index 921d3a309f6..f127b0fe172 100644 --- a/storage/tokudb/PerconaFT/portability/toku_portability.h +++ b/storage/tokudb/PerconaFT/portability/toku_portability.h @@ -246,6 +246,8 @@ int toku_os_open(const char *path, int oflag, int mode); int toku_os_open_direct(const char *path, int oflag, int mode); int toku_os_close(int fd); int toku_os_fclose(FILE * stream); +int toku_os_rename(const char *old_name, const char *new_name); +int toku_os_unlink(const char *path); ssize_t toku_os_read(int fd, void *buf, size_t count); ssize_t toku_os_pread(int fd, void *buf, size_t count, off_t offset); void toku_os_recursive_delete(const char *path); diff --git a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt index 47f6aa44a75..c01a8f0d628 100644 --- a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt @@ -108,11 +108,11 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) foreach(ov c d r) if (ov STREQUAL c) - set(gset 0) set(hset 0) + set(iset 0) else () - set(gset 0 1 2 3 4 5) - set(hset 0 1) + set(hset 0 1 2 3 4 5) + set(iset 0 1) endif () foreach(av 0 1) @@ -130,25 +130,27 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) foreach(dv ${dset}) foreach(ev ${eset}) foreach(fv 0 1) - foreach(gv ${gset}) + foreach(gv 0 1) foreach(hv ${hset}) - - if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv))) - set(iset 0 1) - else () - set(iset 0) - endif () - foreach(iv ${iset}) - set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}") - set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}") - set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}.ctest-errors") - add_test(NAME ${testname} - COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137 - -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} - ) - setup_toku_test_properties(${testname} ${envdir}) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}") + + if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv))) + set(jset 0 1) + else () + set(jset 0) + endif () + + foreach(jv ${jset}) + set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}") + set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}") + set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}.ctest-errors") + add_test(NAME ${testname} + COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137 + -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} -J ${jv} + ) + setup_toku_test_properties(${testname} ${envdir}) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}") + endforeach(jv) endforeach(iv) endforeach(hv) endforeach(gv) diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc index 2c905c5ff12..cc99ab560d8 100644 --- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc +++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc @@ -36,17 +36,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." -#include "test.h" -#include "toku_pthread.h" #include <db.h> -#include <sys/stat.h> #include <stdlib.h> - +#include <sys/stat.h> +#include "ft/logger/logger.h" +#include "test.h" +#include "toku_pthread.h" static int do_recover; static int do_crash; static char fileop; -static int choices['I'-'A'+1]; +static int choices['J' - 'A' + 1]; const int num_choices = sizeof(choices)/sizeof(choices[0]); static DB_TXN *txn; const char *oldname = "oldfoo"; @@ -58,11 +58,14 @@ static char *cmd; static void usage(void) { - fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# -F# [-G# -H# -I#]\n" - " fileop = c/r/d (create/rename/delete)\n" - " Where # is a single digit number > 0.\n" - " A-F are required for fileop=create\n" - " A-I are required for fileop=delete, fileop=rename\n", cmd); + fprintf(stderr, + "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# " + "-F# -G# [-H# -I# -J#]\n" + " fileop = c/r/d (create/rename/delete)\n" + " Where # is a single digit number > 0.\n" + " A-G are required for fileop=create\n" + " A-I are required for fileop=delete, fileop=rename\n", + cmd); exit(1); } @@ -129,19 +132,18 @@ get_choice_flush_log_before_crash(void) { return get_bool_choice('F'); } -static int -get_choice_create_type(void) { - return get_x_choice('G', 6); -} +static int get_choice_dir_per_db(void) { return get_bool_choice('G'); } + +static int get_choice_create_type(void) { return get_x_choice('H', 6); } static int get_choice_txn_does_open_close_before_fileop(void) { - return get_bool_choice('H'); + return get_bool_choice('I'); } static int get_choice_lock_table_split_fcreate(void) { - int choice = get_bool_choice('I'); + int choice = get_bool_choice('J'); if (choice) assert(fileop_did_commit()); return choice; @@ -156,63 +158,65 @@ do_args(int argc, char * const argv[]) { choices[i] = -1; } - int c; - while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) { - switch(c) { - case 'v': - verbose++; - break; - case 'q': - verbose--; - if (verbose<0) verbose=0; - break; - case 'h': - case '?': - usage(); - break; - case 'c': - do_crash = 1; - break; - case 'r': - do_recover = 1; - break; - case 'O': - if (fileop != '\0') + char c; + while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:J:X:")) != -1) { + switch (c) { + case 'v': + verbose++; + break; + case 'q': + verbose--; + if (verbose < 0) + verbose = 0; + break; + case 'h': + case '?': usage(); - fileop = optarg[0]; - switch (fileop) { - case 'c': - case 'r': - case 'd': - break; - default: + break; + case 'c': + do_crash = 1; + break; + case 'r': + do_recover = 1; + break; + case 'O': + if (fileop != '\0') usage(); - break; - } - break; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - if (fileop == '\0') - usage(); - int num; - num = atoi(optarg); - if (num < 0 || num > 9) - usage(); - choices[c - 'A'] = num; - break; - case 'X': - if (strcmp(optarg, "novalgrind") == 0) { - // provide a way for the shell script runner to pass an - // arg that suppresses valgrind on this child process + fileop = optarg[0]; + switch (fileop) { + case 'c': + case 'r': + case 'd': + break; + default: + usage(); + break; + } + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + if (fileop == '\0') + usage(); + int num; + num = atoi(optarg); + if (num < 0 || num > 9) + usage(); + choices[c - 'A'] = num; break; - } + case 'X': + if (strcmp(optarg, "novalgrind") == 0) { + // provide a way for the shell script runner to pass an + // arg that suppresses valgrind on this child process + break; + } // otherwise, fall through to an error default: usage(); @@ -222,7 +226,7 @@ do_args(int argc, char * const argv[]) { if (argc!=optind) { usage(); exit(1); } for (i = 0; i < num_choices; i++) { - if (i >= 'G' - 'A' && fileop == 'c') + if (i >= 'H' - 'A' && fileop == 'c') break; if (choices[i] == -1) usage(); @@ -261,6 +265,8 @@ static void env_startup(void) { int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE | recover_flag; r = db_env_create(&env, 0); CKERR(r); + r = env->set_dir_per_db(env, get_choice_dir_per_db()); + CKERR(r); env->set_errfile(env, stderr); r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); @@ -625,8 +631,11 @@ recover_and_verify(void) { else if (did_create_commit_early()) expect_old_name = 1; } - verify_file_exists(oldname, expect_old_name); - verify_file_exists(newname, expect_new_name); + // We can't expect files existence until recovery log was not flushed + if ((get_choice_flush_log_before_crash())) { + verify_file_exists(oldname, expect_old_name); + verify_file_exists(newname, expect_new_name); + } env_shutdown(); } diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h index 2d6c84126e1..d40f7795b0b 100644 --- a/storage/tokudb/PerconaFT/src/ydb-internal.h +++ b/storage/tokudb/PerconaFT/src/ydb-internal.h @@ -132,7 +132,8 @@ struct __toku_db_env_internal { int datadir_lockfd; int logdir_lockfd; int tmpdir_lockfd; - bool check_thp; // if set check if transparent huge pages are disables + bool check_thp; // if set check if transparent huge pages are disabled + bool dir_per_db; uint64_t (*get_loader_memory_size_callback)(void); uint64_t default_lock_timeout_msec; uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec); diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc index aed271bce40..3341f6d76c6 100644 --- a/storage/tokudb/PerconaFT/src/ydb.cc +++ b/storage/tokudb/PerconaFT/src/ydb.cc @@ -1298,6 +1298,22 @@ env_get_check_thp(DB_ENV * env) { return env->i->check_thp; } +static bool env_set_dir_per_db(DB_ENV *env, bool new_val) { + HANDLE_PANICKED_ENV(env); + bool r = env->i->dir_per_db; + env->i->dir_per_db = new_val; + return r; +} + +static bool env_get_dir_per_db(DB_ENV *env) { + HANDLE_PANICKED_ENV(env); + return env->i->dir_per_db; +} + +static const char *env_get_data_dir(DB_ENV *env) { + return env->i->real_data_dir; +} + static int env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, uint32_t flags); static int @@ -2700,6 +2716,9 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) { USENV(do_backtrace); USENV(set_check_thp); USENV(get_check_thp); + USENV(set_dir_per_db); + USENV(get_dir_per_db); + USENV(get_data_dir); #undef USENV // unlocked methods @@ -3045,7 +3064,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co if (env_is_db_with_dname_open(env, newname)) { return toku_ydb_do_error(env, EINVAL, "Cannot rename dictionary; Dictionary with target name has an open handle.\n"); } - + DBT old_dname_dbt; DBT new_dname_dbt; DBT iname_dbt; @@ -3065,10 +3084,35 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co r = EEXIST; } else if (r == DB_NOTFOUND) { + DBT new_iname_dbt; + // Do not rename ft file if 'dir_per_db' option is not set + auto new_iname = + env->get_dir_per_db(env) + ? generate_iname_for_rename_or_open( + env, txn, newname, false) + : std::unique_ptr<char[], decltype(&toku_free)>( + toku_strdup(iname), &toku_free); + toku_fill_dbt( + &new_iname_dbt, new_iname.get(), strlen(new_iname.get()) + 1); + // remove old (dname,iname) and insert (newname,iname) in directory r = toku_db_del(env->i->directory, txn, &old_dname_dbt, DB_DELETE_ANY, true); if (r != 0) { goto exit; } - r = toku_db_put(env->i->directory, txn, &new_dname_dbt, &iname_dbt, 0, true); + + // Do not rename ft file if 'dir_per_db' option is not set + if (env->get_dir_per_db(env)) + r = toku_ft_rename_iname(txn, + env->get_data_dir(env), + iname, + new_iname.get(), + env->i->cachetable); + + r = toku_db_put(env->i->directory, + txn, + &new_dname_dbt, + &new_iname_dbt, + 0, + true); if (r != 0) { goto exit; } //Now that we have writelocks on both dnames, verify that there are still no handles open. (to prevent race conditions) @@ -3091,7 +3135,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co // otherwise, we're okay in marking this ft as remove on // commit. no new handles can open for this dictionary // because the txn has directory write locks on the dname - if (txn && !can_acquire_table_lock(env, txn, iname)) { + if (txn && !can_acquire_table_lock(env, txn, new_iname.get())) { r = DB_LOCK_NOTGRANTED; } // We don't do anything at the ft or cachetable layer for rename. diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc index e5bd4e7d089..100d1bfa20b 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.cc +++ b/storage/tokudb/PerconaFT/src/ydb_db.cc @@ -83,8 +83,7 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) { *statp = ydb_db_layer_status; } -static void -create_iname_hint(const char *dname, char *hint) { +void create_iname_hint(const char *dname, char *hint) { //Requires: size of hint array must be > strlen(dname) //Copy alphanumeric characters only. //Replace strings of non-alphanumeric characters with a single underscore. @@ -105,11 +104,43 @@ create_iname_hint(const char *dname, char *hint) { *hint = '\0'; } +void create_iname_hint_for_dbdir(const char *dname, char *hint) { + assert(dname); + if (*dname == '.') + ++dname; + if (*dname == '/') + ++dname; + bool underscored = false; + bool dbdir_is_parsed = false; + // Do not change the first '/' because this is + // delimiter which splits name into database dir + // and table dir. + while (*dname) { + if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) { + char c = *dname++; + *hint++ = c; + if (c == '/') + dbdir_is_parsed = true; + underscored = false; + } else { + if (!underscored) + *hint++ = '_'; + dname++; + underscored = true; + } + } + *hint = '\0'; +} + // n < 0 means to ignore mark and ignore n // n >= 0 means to include mark ("_B_" or "_P_") with hex value of n in iname // (intended for use by loader, which will create many inames using one txnid). -static char * -create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *mark, int n) { +char *create_iname(DB_ENV *env, + uint64_t id1, + uint64_t id2, + char *hint, + const char *mark, + int n) { int bytes; char inamebase[strlen(hint) + 8 + // hex file format version @@ -138,6 +169,34 @@ create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *ma return rval; } +static uint64_t nontransactional_open_id = 0; + +std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open( + DB_ENV *env, + DB_TXN *txn, + const char *dname, + bool is_open) { + std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free); + char hint[strlen(dname) + 1]; + uint64_t id1 = 0; + uint64_t id2 = 0; + + if (txn) { + id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64; + id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64; + } else if (is_open) + id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1); + + if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) + create_iname_hint_for_dbdir(dname, hint); + else + create_iname_hint(dname, hint); + + result.reset(create_iname(env, id1, id2, hint, NULL, -1)); + + return result; +} + static int toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYPE dbtype, uint32_t flags, int mode); // Effect: Do the work required of DB->close(). @@ -227,8 +286,6 @@ db_open_subdb(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTY return r; } -static uint64_t nontransactional_open_id = 0; - // inames are created here. // algorithm: // begin txn @@ -286,27 +343,15 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1); toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC); r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname - char *iname = (char *) iname_dbt.data; + std::unique_ptr<char[], decltype(&toku_free)> iname( + static_cast<char *>(iname_dbt.data), &toku_free); if (r == DB_NOTFOUND && !is_db_create) { r = ENOENT; } else if (r==0 && is_db_excl) { r = EEXIST; } else if (r == DB_NOTFOUND) { - char hint[strlen(dname) + 1]; - - // create iname and make entry in directory - uint64_t id1 = 0; - uint64_t id2 = 0; - - if (txn) { - id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64; - id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64; - } else { - id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1); - } - create_iname_hint(dname, hint); - iname = create_iname(db->dbenv, id1, id2, hint, NULL, -1); // allocated memory for iname - toku_fill_dbt(&iname_dbt, iname, strlen(iname) + 1); + iname = generate_iname_for_rename_or_open(db->dbenv, txn, dname, true); + toku_fill_dbt(&iname_dbt, iname.get(), strlen(iname.get()) + 1); // // put_flags will be 0 for performance only, avoid unnecessary query // if we are creating a hot index, per #3166, we do not want the write lock in directory grabbed. @@ -318,16 +363,13 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP // we now have an iname if (r == 0) { - r = toku_db_open_iname(db, txn, iname, flags, mode); + r = toku_db_open_iname(db, txn, iname.get(), flags, mode); if (r == 0) { db->i->dname = toku_xstrdup(dname); env_note_db_opened(db->dbenv, db); // tell env that a new db handle is open (using dname) } } - if (iname) { - toku_free(iname); - } return r; } @@ -1181,7 +1223,10 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1); // now create new iname char hint[strlen(dname) + 1]; - create_iname_hint(dname, hint); + if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) + create_iname_hint_for_dbdir(dname, hint); + else + create_iname_hint(dname, hint); const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); // allocates memory for iname_in_env new_inames_in_env[i] = new_iname; toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); // iname_in_env goes in directory diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h index 8b92dd1c3cb..8be28857c14 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.h +++ b/storage/tokudb/PerconaFT/src/ydb_db.h @@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ydb-internal.h" #include "ydb_txn.h" +#include <memory> + typedef enum { YDB_LAYER_DIRECTORY_WRITE_LOCKS = 0, /* total directory write locks taken */ YDB_LAYER_DIRECTORY_WRITE_LOCKS_FAIL, /* total directory write locks unable to be taken */ @@ -119,3 +121,17 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) { } return r; } + +void create_iname_hint_for_dbdir(const char *dname, char *hint); +void create_iname_hint(const char *dname, char *hint); +char *create_iname(DB_ENV *env, + uint64_t id1, + uint64_t id2, + char *hint, + const char *mark, + int n); +std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open( + DB_ENV *env, + DB_TXN *txn, + const char *dname, + bool is_open); diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc index e7dfbd810c2..1581a7b76df 100644 --- a/storage/tokudb/hatoku_hton.cc +++ b/storage/tokudb/hatoku_hton.cc @@ -531,6 +531,7 @@ static int tokudb_init_func(void *p) { db_env->change_fsync_log_period(db_env, tokudb::sysvars::fsync_log_period); db_env->set_lock_timeout_callback(db_env, tokudb_lock_timeout_callback); + db_env->set_dir_per_db(db_env, tokudb::sysvars::dir_per_db); db_env->set_loader_memory_size( db_env, diff --git a/storage/tokudb/mysql-test/tokudb/disabled.def b/storage/tokudb/mysql-test/tokudb/disabled.def index c98a8aa622a..ddefceb432e 100644 --- a/storage/tokudb/mysql-test/tokudb/disabled.def +++ b/storage/tokudb/mysql-test/tokudb/disabled.def @@ -28,3 +28,4 @@ type_timestamp_explicit: cluster_key_part: engine options on partitioned tables i_s_tokudb_lock_waits_released: unstable, race conditions i_s_tokudb_locks_released: unstable, race conditions +row_format: n/a diff --git a/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc new file mode 100644 index 00000000000..b10ad21dd95 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc @@ -0,0 +1 @@ +--replace_regex /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/ diff --git a/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result new file mode 100644 index 00000000000..a36dbcb28c0 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result @@ -0,0 +1,10 @@ +SELECT @@tokudb_dir_per_db; +@@tokudb_dir_per_db +1 +TOKUDB_DATA_DIR_CHANGED +1 +CREATE DATABASE tokudb_test; +USE tokudb_test; +CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb; +DROP TABLE t; +DROP DATABASE tokudb_test; diff --git a/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result new file mode 100644 index 00000000000..371f97406c8 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result @@ -0,0 +1,180 @@ +######## +# tokudb_dir_per_db = 1 +######## +SET GLOBAL tokudb_dir_per_db= 1; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t2_key_ab_id.tokudb +t2_key_b_id.tokudb +t2_main_id.tokudb +t2_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = 0 +######## +SET GLOBAL tokudb_dir_per_db= 0; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa +######## +######## +# tokudb_dir_per_db = (1 - 1); +######## +SET GLOBAL tokudb_dir_per_db= (1 - 1);; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = 1 +######## +SET GLOBAL tokudb_dir_per_db= 1; +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t2_key_ab_id.tokudb +t2_key_b_id.tokudb +t2_main_id.tokudb +t2_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = (1 - 0); +######## +SET GLOBAL tokudb_dir_per_db= (1 - 0);; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# tokudb_dir_per_db = 0 +######## +SET GLOBAL tokudb_dir_per_db= 0; +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +SET GLOBAL tokudb_dir_per_db=default; diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result index 6f9592ddc1f..ecd4d077206 100644 --- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result +++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result @@ -2,6 +2,7 @@ set default_storage_engine='tokudb'; set tokudb_prelock_empty=false; drop table if exists t; create table t (id int primary key); +t should be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; @@ -15,17 +16,21 @@ insert into t values (1); set autocommit=0; set tokudb_lock_timeout=600000; insert into t values (1); +should find the presence of a lock on 1st transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +should find the presence of a lock_wait on the 2nd transaction select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main +should find the presence of two transactions select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; +verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main @@ -33,6 +38,8 @@ select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name ERROR 23000: Duplicate entry '1' for key 'PRIMARY' commit; +verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes +verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; @@ -46,23 +53,28 @@ replace into t values (1); set autocommit=0; set tokudb_lock_timeout=600000; replace into t values (1); +should find the presence of a lock on 1st transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +should find the presence of a lock_wait on the 2nd transaction select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main +should find the presence of two transactions select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; +verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name commit; +verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; diff --git a/storage/tokudb/mysql-test/tokudb/r/row_format.result b/storage/tokudb/mysql-test/tokudb/r/row_format.result new file mode 100644 index 00000000000..cb669148445 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/row_format.result @@ -0,0 +1,51 @@ +CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +tokudb_row_format_test_2 tokudb_quicklz TokuDB +tokudb_row_format_test_3 tokudb_lzma TokuDB +tokudb_row_format_test_4 tokudb_uncompressed TokuDB +tokudb_row_format_test_5 tokudb_zlib TokuDB +tokudb_row_format_test_6 tokudb_lzma TokuDB +tokudb_row_format_test_7 tokudb_quicklz TokuDB +tokudb_row_format_test_8 tokudb_snappy TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_quicklz TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_lzma TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_uncompressed TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_snappy TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_quicklz TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_lzma TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt new file mode 100644 index 00000000000..a9090f4d115 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt @@ -0,0 +1 @@ +--loose-tokudb_data_dir="$MYSQL_TMP_DIR" --loose-tokudb-dir-per-db=1 diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test new file mode 100644 index 00000000000..7f415a72515 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test @@ -0,0 +1,16 @@ +--source include/have_tokudb.inc + +SELECT @@tokudb_dir_per_db; + +--disable_query_log +--eval SELECT STRCMP(@@tokudb_data_dir, '$MYSQL_TMP_DIR') = 0 AS TOKUDB_DATA_DIR_CHANGED +--enable_query_log + +CREATE DATABASE tokudb_test; +USE tokudb_test; +CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb; + +--file_exists $MYSQL_TMP_DIR/tokudb_test + +DROP TABLE t; +DROP DATABASE tokudb_test; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test new file mode 100644 index 00000000000..b638b706d87 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test @@ -0,0 +1,76 @@ +source include/have_tokudb.inc; + +--let $DB= test +--let $DATADIR= `select @@datadir` +--let $i= 2 + +while ($i) { + --dec $i + --echo ######## + --echo # tokudb_dir_per_db = $i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $i + --echo ######## + --echo # CREATE + --echo ######## + CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; + INSERT INTO t1 SET b = 10; + INSERT INTO t1 SET b = 20; + SELECT b FROM t1 ORDER BY a; + CREATE INDEX b ON t1 (b); + CREATE INDEX ab ON t1 (a,b); + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # RENAME + --echo ######## + RENAME TABLE t1 TO t2; + SELECT b FROM t2 ORDER BY a; + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # DROP + --echo ######## + DROP TABLE t2; + --source dir_per_db_show_table_files.inc +} + +--echo ######## +--echo # CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa +--echo ######## + +--let $i= 2 + +while ($i) { + --dec $i + --let $inv_i= (1 - $i); + --echo ######## + --echo # tokudb_dir_per_db = $inv_i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $inv_i + --echo ######## + --echo # CREATE + --echo ######## + CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; + INSERT INTO t1 SET b = 10; + INSERT INTO t1 SET b = 20; + SELECT b FROM t1 ORDER BY a; + CREATE INDEX b ON t1 (b); + CREATE INDEX ab ON t1 (a,b); + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # tokudb_dir_per_db = $i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $i + --echo ######## + --echo # RENAME + --echo ######## + RENAME TABLE t1 TO t2; + SELECT b FROM t2 ORDER BY a; + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # DROP + --echo ######## + DROP TABLE t2; + --source dir_per_db_show_table_files.inc +} + +SET GLOBAL tokudb_dir_per_db=default; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc new file mode 100644 index 00000000000..bdf7d5b235f --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc @@ -0,0 +1,9 @@ +--sorted_result + +--echo ## Looking for *.tokudb files in data_dir +--source include/table_files_replace_pattern.inc +--list_files $DATADIR *.tokudb + +--echo ## Looking for *.tokudb files in data_dir/$DB +--source include/table_files_replace_pattern.inc +--list_files $DATADIR/$DB/ *.tokudb diff --git a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test index d8ce18b3aa7..6534175d619 100644 --- a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test +++ b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test @@ -17,7 +17,7 @@ create table t (id int primary key); # verify that txn_a insert (1) blocks txn_b insert (1) and txn_b gets a duplicate key error -# should be empty +--echo t should be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -33,7 +33,7 @@ set autocommit=0; set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes send insert into t values (1); -# should find the presence of a lock on 1st transaction +--echo should find the presence of a lock on 1st transaction connection default; let $wait_condition= select count(*)=1 from information_schema.processlist where info='insert into t values (1)' and state='update'; source include/wait_condition.inc; @@ -42,17 +42,17 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; -# should find the presence of a lock_wait on the 2nd transaction +--echo should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; select * from information_schema.tokudb_lock_waits; -# should find the presence of two transactions +--echo should find the presence of two transactions replace_column 1 TRX_ID 2 MYSQL_ID; select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; connection conn_a; commit; -# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main'; source include/wait_condition.inc; @@ -69,10 +69,8 @@ connection default; disconnect conn_a; disconnect conn_b; -# verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes - -# verify that the lock on the 2nd transaction has been released -# should be be empty +--echo verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes +--echo verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -88,7 +86,7 @@ set autocommit=0; set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes send replace into t values (1); -# should find the presence of a lock on 1st transaction +--echo should find the presence of a lock on 1st transaction connection default; let $wait_condition= select count(*)=1 from information_schema.processlist where info='replace into t values (1)' and state='update'; source include/wait_condition.inc; @@ -97,17 +95,19 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; -# should find the presence of a lock_wait on the 2nd transaction +--echo should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; select * from information_schema.tokudb_lock_waits; -# should find the presence of two transactions +--echo should find the presence of two transactions replace_column 1 TRX_ID 2 MYSQL_ID; select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; connection conn_a; commit; -# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main'; +source include/wait_condition.inc; replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -120,8 +120,7 @@ connection default; disconnect conn_a; disconnect conn_b; -# verify that the lock on the 2nd transaction has been released -# should be be empty +--echo verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; diff --git a/storage/tokudb/mysql-test/tokudb/t/row_format.test b/storage/tokudb/mysql-test/tokudb/t/row_format.test new file mode 100644 index 00000000000..6533f8c06be --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/row_format.test @@ -0,0 +1,41 @@ +# +# Test TokuDB compression option additions to row_format +# +--source include/have_tokudb.inc + +CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; + +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result index 779d458221b..30e0bdbebd7 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result +++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result @@ -23,6 +23,7 @@ set DEBUG_SYNC = 'tokudb_after_truncate_all_dictionarys SIGNAL closed WAIT_FOR d TRUNCATE TABLE t1; set global tokudb_debug_pause_background_job_manager = FALSE; set DEBUG_SYNC = 'now SIGNAL done'; +set DEBUG_SYNC = 'RESET'; drop table t1; set session tokudb_auto_analyze = @orig_auto_analyze; set session tokudb_analyze_in_background = @orig_in_background; @@ -32,4 +33,3 @@ set session tokudb_analyze_time = @orig_time; set global tokudb_cardinality_scale_percent = @orig_scale_percent; set session default_storage_engine = @orig_default_storage_engine; set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager; -set DEBUG_SYNC='reset'; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test index f56f93d1492..50434a79a00 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test @@ -40,6 +40,7 @@ insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3); select database_name, table_name, job_type, job_params, scheduler from information_schema.tokudb_background_job_status; # lets flip to another connection +--source include/count_sessions.inc connect(conn1, localhost, root); # set up the DEBUG_SYNC point @@ -64,6 +65,7 @@ connection conn1; reap; connection default; disconnect conn1; +set DEBUG_SYNC = 'RESET'; drop table t1; set session tokudb_auto_analyze = @orig_auto_analyze; @@ -74,4 +76,4 @@ set session tokudb_analyze_time = @orig_time; set global tokudb_cardinality_scale_percent = @orig_scale_percent; set session default_storage_engine = @orig_default_storage_engine; set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager; -set DEBUG_SYNC='reset'; +--source include/wait_until_count_sessions.inc diff --git a/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc new file mode 100644 index 00000000000..b10ad21dd95 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc @@ -0,0 +1 @@ +--replace_regex /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/ diff --git a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test index be14d8814f0..f97235a0a2d 100644 --- a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test +++ b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test @@ -56,7 +56,7 @@ partition by range (a) insert into t1 values (1), (11), (21), (33); SELECT * FROM t1; SHOW CREATE TABLE t1; ---replace_result #p# #P# #sp# #SP# +--source include/table_files_replace_pattern.inc --list_files $MYSQLD_DATADIR/test SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open'; @@ -82,7 +82,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p0 INTO disconnect con1; connection default; --reap ---replace_result #p# #P# #sp# #SP# +--source include/table_files_replace_pattern.inc --list_files $MYSQLD_DATADIR/test SHOW CREATE TABLE t1; SELECT * FROM t1; diff --git a/storage/tokudb/tokudb_sysvars.cc b/storage/tokudb/tokudb_sysvars.cc index 7cea749b4fb..b758929c10e 100644 --- a/storage/tokudb/tokudb_sysvars.cc +++ b/storage/tokudb/tokudb_sysvars.cc @@ -66,6 +66,7 @@ uint read_status_frequency = 0; my_bool strip_frm_data = FALSE; char* tmp_dir = NULL; uint write_status_frequency = 0; +my_bool dir_per_db = FALSE; char* version = (char*) TOKUDB_VERSION_STR; // file system reserve as a percentage of total disk space @@ -394,6 +395,18 @@ static MYSQL_SYSVAR_UINT( ~0U, 0); +static void tokudb_dir_per_db_update(THD* thd, + struct st_mysql_sys_var* sys_var, + void* var, const void* save) { + my_bool *value = (my_bool *) var; + *value = *(const my_bool *) save; + db_env->set_dir_per_db(db_env, *value); +} + +static MYSQL_SYSVAR_BOOL(dir_per_db, dir_per_db, + 0, "TokuDB store ft files in db directories", + NULL, tokudb_dir_per_db_update, FALSE); + #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL static MYSQL_SYSVAR_STR( gdb_path, @@ -935,6 +948,7 @@ st_mysql_sys_var* system_variables[] = { MYSQL_SYSVAR(tmp_dir), MYSQL_SYSVAR(version), MYSQL_SYSVAR(write_status_frequency), + MYSQL_SYSVAR(dir_per_db), #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL MYSQL_SYSVAR(gdb_path), diff --git a/storage/tokudb/tokudb_sysvars.h b/storage/tokudb/tokudb_sysvars.h index 3bd96f7c68d..7701f211729 100644 --- a/storage/tokudb/tokudb_sysvars.h +++ b/storage/tokudb/tokudb_sysvars.h @@ -101,6 +101,7 @@ extern uint read_status_frequency; extern my_bool strip_frm_data; extern char* tmp_dir; extern uint write_status_frequency; +extern my_bool dir_per_db; extern char* version; #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL |