diff options
Diffstat (limited to 'storage')
48 files changed, 764 insertions, 170 deletions
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 8bc34da77fc..c720213bb4f 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2004, 2014, Oracle and/or its affiliates. +/* Copyright (c) 2004, 2015, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2904,7 +2904,7 @@ int ha_federated::info(uint flag) } - if (flag & HA_STATUS_AUTO) + if ((flag & HA_STATUS_AUTO) && mysql) stats.auto_increment_value= mysql->insert_id; mysql_free_result(result); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 8eeee43128a..fb7b4072341 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -5567,12 +5567,15 @@ ha_innobase::innobase_lock_autoinc(void) break; case AUTOINC_NEW_STYLE_LOCKING: - /* For simple (single/multi) row INSERTs, we fallback to the - old style only if another transaction has already acquired - the AUTOINC lock on behalf of a LOAD FILE or INSERT ... SELECT - etc. type of statement. */ + /* For simple (single/multi) row INSERTs/REPLACEs and RBR + events, we fallback to the old style only if another + transaction has already acquired the AUTOINC lock on + behalf of a LOAD FILE or INSERT ... SELECT etc. type of + statement. */ if (thd_sql_command(user_thd) == SQLCOM_INSERT - || thd_sql_command(user_thd) == SQLCOM_REPLACE) { + || thd_sql_command(user_thd) == SQLCOM_REPLACE + || thd_sql_command(user_thd) == SQLCOM_END // RBR event + ) { dict_table_t* table = prebuilt->table; /* Acquire the AUTOINC mutex. */ @@ -5581,9 +5584,11 @@ ha_innobase::innobase_lock_autoinc(void) /* We need to check that another transaction isn't already holding the AUTOINC lock on the table. */ if (table->n_waiting_or_granted_auto_inc_locks) { - /* Release the mutex to avoid deadlocks. */ + /* Release the mutex to avoid deadlocks and + fall back to old style locking. */ dict_table_autoinc_unlock(table); } else { + /* Do not fall back to old style locking. */ break; } } @@ -13664,7 +13669,7 @@ innobase_convert_to_filename_charset( /********************************************************************** Issue a warning that the row is too big. */ -extern "C" +extern "C" UNIV_INTERN void ib_warn_row_too_big(const dict_table_t* table) { diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c index 3494d4e4773..5351690dcef 100644 --- a/storage/innobase/rem/rem0rec.c +++ b/storage/innobase/rem/rem0rec.c @@ -833,8 +833,7 @@ rec_get_converted_size_comp_prefix_low( } ut_ad(len <= col->len || col->mtype == DATA_BLOB - || col->mtype == DATA_VARMYSQL - || (col->len == 0 && col->mtype == DATA_VARCHAR)); + || (col->len == 0 && col->mtype == DATA_VARCHAR)); fixed_len = field->fixed_len; if (temp && fixed_len @@ -1261,8 +1260,7 @@ rec_convert_dtuple_to_rec_comp( *lens-- = (byte) len; } else { ut_ad(len <= dtype_get_len(type) - || dtype_get_mtype(type) == DATA_BLOB - || dtype_get_mtype(type) == DATA_VARMYSQL); + || dtype_get_mtype(type) == DATA_BLOB); if (len < 128 || (dtype_get_len(type) < 256 && dtype_get_mtype(type) != DATA_BLOB)) { diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index efb26d255d3..29319491ecf 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -25,7 +25,7 @@ IF (HAVE_WVLA) ENDIF() ############################################ -SET(TOKUDB_VERSION "tokudb-7.5.4") +SET(TOKUDB_VERSION "tokudb-7.5.6") SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-galera-server-5.5/README-TOKUDB\nusr/share/doc/mariadb-galera-server-5.5/README.md" PARENT_SCOPE) SET(USE_BDB OFF CACHE BOOL "") MARK_AS_ADVANCED(BUILDNAME) diff --git a/storage/tokudb/README.md b/storage/tokudb/README.md index 2ab2e21a5a1..0d4f09d4c86 100644 --- a/storage/tokudb/README.md +++ b/storage/tokudb/README.md @@ -30,14 +30,14 @@ working MySQL or MariaDB with Tokutek patches, and with the TokuDB storage engine, called `make.mysql.bash`. This script will download copies of the needed source code from github and build everything. -To build MySQL 5.5.40 with TokuDB 7.5.3: +To build MySQL 5.5.41 with TokuDB 7.5.5: ```sh -scripts/make.mysql.bash --mysqlbuild=mysql-5.5.40-tokudb-7.5.3-linux-x86_64 +scripts/make.mysql.bash --mysqlbuild=mysql-5.5.41-tokudb-7.5.5-linux-x86_64 ``` -To build MariaDB 5.5.40 with TokuDB 7.5.3: +To build MariaDB 5.5.41 with TokuDB 7.5.5: ```sh -scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.40-tokudb-7.5.3-linux-x86_64 +scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.41-tokudb-7.5.5-linux-x86_64 ``` Before you start, make sure you have a C++11-compatible compiler (GCC >= @@ -59,6 +59,7 @@ repositories, run this: scripts/make.mysql.debug.env.bash ``` +We use gcc from devtoolset-1.1 on CentOS 5.9 for builds. Contribute ---------- diff --git a/storage/tokudb/ft-index/buildheader/make_tdb.cc b/storage/tokudb/ft-index/buildheader/make_tdb.cc index 88f8882df78..3f9a721d9aa 100644 --- a/storage/tokudb/ft-index/buildheader/make_tdb.cc +++ b/storage/tokudb/ft-index/buildheader/make_tdb.cc @@ -572,7 +572,7 @@ static void print_db_txn_struct (void) { STRUCT_SETUP(DB_TXN, abort, "int (*%s) (DB_TXN *)"); STRUCT_SETUP(DB_TXN, api_internal,"void *%s"); STRUCT_SETUP(DB_TXN, commit, "int (*%s) (DB_TXN*, uint32_t)"); - STRUCT_SETUP(DB_TXN, prepare, "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE])"); + STRUCT_SETUP(DB_TXN, prepare, "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE], uint32_t flags)"); STRUCT_SETUP(DB_TXN, discard, "int (*%s) (DB_TXN*, uint32_t)"); STRUCT_SETUP(DB_TXN, id, "uint32_t (*%s) (DB_TXN *)"); STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /* In TokuFT, mgrp is a DB_ENV, not a DB_TXNMGR */"); @@ -581,11 +581,12 @@ static void print_db_txn_struct (void) { "int (*txn_stat)(DB_TXN *, struct txn_stat **)", "int (*commit_with_progress)(DB_TXN*, uint32_t, TXN_PROGRESS_POLL_FUNCTION, void*)", "int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*)", - "int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *)", + "int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *, uint32_t flags)", "uint64_t (*id64) (DB_TXN*)", "void (*set_client_id)(DB_TXN *, uint64_t client_id)", "uint64_t (*get_client_id)(DB_TXN *)", "bool (*is_prepared)(DB_TXN *)", + "DB_TXN *(*get_child)(DB_TXN *)", NULL}; sort_and_dump_fields("db_txn", false, extra); } @@ -614,7 +615,7 @@ static void print_dbc_struct (void) { "int (*c_getf_set_range_reverse)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)", "int (*c_getf_set_range_with_bound)(DBC *, uint32_t, DBT *k, DBT *k_bound, YDB_CALLBACK_FUNCTION, void *)", "int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)", - "void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*), void *)", + "void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*, uint64_t deleted_rows), void *)", "void (*c_remove_restriction)(DBC*)", "char _internal[512]", NULL}; diff --git a/storage/tokudb/ft-index/ft/ft-internal.h b/storage/tokudb/ft-index/ft/ft-internal.h index 88fc5dca686..4a820d5682b 100644 --- a/storage/tokudb/ft-index/ft/ft-internal.h +++ b/storage/tokudb/ft-index/ft/ft-internal.h @@ -655,7 +655,7 @@ int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull)) // When lock_only is true, the callback only does optional lock tree locking. typedef int (*FT_GET_CALLBACK_FUNCTION)(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only); -typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra); +typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra, uint64_t deleted_rows); struct ft_cursor; int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, struct ft_cursor *ftcursor, bool can_bulk_fetch); diff --git a/storage/tokudb/ft-index/ft/ft-ops.cc b/storage/tokudb/ft-index/ft/ft-ops.cc index 34c9c46f1c6..b3c224afb7a 100644 --- a/storage/tokudb/ft-index/ft/ft-ops.cc +++ b/storage/tokudb/ft-index/ft/ft-ops.cc @@ -3387,7 +3387,7 @@ ok: ; idx++; if (idx >= bn->data_buffer.num_klpairs() || ((n_deleted % 64) == 0 && !search_continue(search, key, keylen))) { STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted); - if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra)) { + if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted)) { return TOKUDB_INTERRUPTED; } return DB_NOTFOUND; @@ -3396,7 +3396,7 @@ ok: ; case FT_SEARCH_RIGHT: if (idx == 0) { STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted); - if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra)) { + if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted)) { return TOKUDB_INTERRUPTED; } return DB_NOTFOUND; @@ -3410,6 +3410,8 @@ ok: ; assert_zero(r); // we just validated the index if (!le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) { STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted); + if (ftcursor->interrupt_cb) + ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted); goto got_a_good_value; } } diff --git a/storage/tokudb/ft-index/ft/ft.cc b/storage/tokudb/ft-index/ft/ft.cc index bf99646351a..77948dfe358 100644 --- a/storage/tokudb/ft-index/ft/ft.cc +++ b/storage/tokudb/ft-index/ft/ft.cc @@ -1064,6 +1064,11 @@ garbage_helper(BLOCKNUM blocknum, int64_t UU(size), int64_t UU(address), void *e goto exit; } } + { + float a = info->used_space, b=info->total_space; + float percentage = (1 - (a / b)) * 100; + printf("LeafNode# %d has %d BasementNodes and %2.1f%% of the allocated space is garbage\n", (int)blocknum.b, node->n_children, percentage); + } exit: toku_ftnode_free(&node); toku_free(ndd); diff --git a/storage/tokudb/ft-index/ft/logger/recover.cc b/storage/tokudb/ft-index/ft/logger/recover.cc index 8dd7bf87624..cae7397651d 100644 --- a/storage/tokudb/ft-index/ft/logger/recover.cc +++ b/storage/tokudb/ft-index/ft/logger/recover.cc @@ -714,7 +714,7 @@ static int toku_recover_xstillopenprepared (struct logtype_xstillopenprepared *l } switch (renv->ss.ss) { case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: { - toku_txn_prepare_txn(txn, l->xa_xid); + toku_txn_prepare_txn(txn, l->xa_xid, 0); break; } case FORWARD_NEWER_CHECKPOINT_END: { @@ -778,7 +778,7 @@ static int toku_recover_xprepare (struct logtype_xprepare *l, RECOVER_ENV renv) assert(txn!=NULL); // Save the transaction - toku_txn_prepare_txn(txn, l->xa_xid); + toku_txn_prepare_txn(txn, l->xa_xid, 0); return 0; } diff --git a/storage/tokudb/ft-index/ft/txn/txn.cc b/storage/tokudb/ft-index/ft/txn/txn.cc index 216cb0d8dfd..18d5a6b67dd 100644 --- a/storage/tokudb/ft-index/ft/txn/txn.cc +++ b/storage/tokudb/ft-index/ft/txn/txn.cc @@ -558,7 +558,7 @@ static void copy_xid (TOKU_XA_XID *dest, TOKU_XA_XID *source) { memcpy(dest->data, source->data, source->gtrid_length+source->bqual_length); } -void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xa_xid) { +void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xa_xid, int nosync) { if (txn->parent || toku_txn_is_read_only(txn)) { // We do not prepare children. // @@ -573,7 +573,7 @@ void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xa_xid) { txn->state = TOKUTXN_PREPARING; toku_txn_unlock_state(txn); // Do we need to do an fsync? - txn->do_fsync = (txn->force_fsync_on_commit || txn->roll_info.num_rollentries>0); + txn->do_fsync = txn->force_fsync_on_commit || (!nosync && txn->roll_info.num_rollentries>0); copy_xid(&txn->xa_xid, xa_xid); // This list will go away with #4683, so we wn't need the ydb lock for this anymore. toku_log_xprepare(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid, xa_xid); diff --git a/storage/tokudb/ft-index/ft/txn/txn.h b/storage/tokudb/ft-index/ft/txn/txn.h index c458df3b5b2..6381b5a7779 100644 --- a/storage/tokudb/ft-index/ft/txn/txn.h +++ b/storage/tokudb/ft-index/ft/txn/txn.h @@ -302,7 +302,7 @@ int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn, int toku_txn_discard_txn(struct tokutxn *txn); -void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid); +void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid, int nosync); // Effect: Do the internal work of preparing a transaction (does not log the prepare record). void toku_txn_get_prepared_xa_xid(struct tokutxn *txn, TOKU_XA_XID *xa_xid); diff --git a/storage/tokudb/ft-index/src/tests/recover-5146.cc b/storage/tokudb/ft-index/src/tests/recover-5146.cc index c05f9effa7d..5e793830f1d 100644 --- a/storage/tokudb/ft-index/src/tests/recover-5146.cc +++ b/storage/tokudb/ft-index/src/tests/recover-5146.cc @@ -147,7 +147,7 @@ run_test(void) { uint8_t gid[DB_GID_SIZE]; memset(gid, 0, DB_GID_SIZE); gid[0]=42; - r = txn->prepare(txn, gid); CKERR(r); + r = txn->prepare(txn, gid, 0); CKERR(r); } r = env->txn_checkpoint(env, 0, 0, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test-prepare.cc b/storage/tokudb/ft-index/src/tests/test-prepare.cc index 9033c633ea7..73d2105fa86 100644 --- a/storage/tokudb/ft-index/src/tests/test-prepare.cc +++ b/storage/tokudb/ft-index/src/tests/test-prepare.cc @@ -122,7 +122,7 @@ static void setup_env_and_prepare (DB_ENV **envp, const char *envdir, bool commi uint8_t gid[DB_GID_SIZE]; memset(gid, 0, DB_GID_SIZE); gid[0]=42; - CKERR(txn->prepare(txn, gid)); + CKERR(txn->prepare(txn, gid, 0)); if (commit) CKERR(txn->commit(txn, 0)); } diff --git a/storage/tokudb/ft-index/src/tests/test-prepare2.cc b/storage/tokudb/ft-index/src/tests/test-prepare2.cc index 8952f14cf31..12d78e4da7c 100644 --- a/storage/tokudb/ft-index/src/tests/test-prepare2.cc +++ b/storage/tokudb/ft-index/src/tests/test-prepare2.cc @@ -124,7 +124,7 @@ static void setup_env_and_prepare (DB_ENV **envp, const char *envdir, bool commi uint8_t gid[DB_GID_SIZE]; memset(gid, 0, DB_GID_SIZE); gid[0]=42; - CKERR(txn->prepare(txn, gid)); + CKERR(txn->prepare(txn, gid, 0)); if (commit) CKERR(txn->commit(txn, 0)); } diff --git a/storage/tokudb/ft-index/src/tests/test-prepare3.cc b/storage/tokudb/ft-index/src/tests/test-prepare3.cc index 3643d73f41a..30c5c728e94 100644 --- a/storage/tokudb/ft-index/src/tests/test-prepare3.cc +++ b/storage/tokudb/ft-index/src/tests/test-prepare3.cc @@ -147,7 +147,7 @@ static void setup_env_and_prepare (DB_ENV **envp, const char *envdir) { uint8_t gid[DB_GID_SIZE]; memset(gid, 0, DB_GID_SIZE); gid[0]='a'+tnum; - CKERR(txn->prepare(txn, gid)); + CKERR(txn->prepare(txn, gid, 0)); // Drop txn on the ground, since we will commit or abort it after recovery if (tnum==0) { //printf("commit %d\n", tnum); diff --git a/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc b/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc index e08e7361555..d3b1352f55d 100644 --- a/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc +++ b/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc @@ -125,7 +125,7 @@ static void setup_env_and_prepare (DB_ENV **envp, const char *envdir, bool commi .gtrid_length = 8, .bqual_length = 9}; for (int i=0; i<8+9; i++) x.data[i] = 42+i; - CKERR(txn->xa_prepare(txn, &x)); + CKERR(txn->xa_prepare(txn, &x, 0)); if (commit) CKERR(txn->commit(txn, 0)); } diff --git a/storage/tokudb/ft-index/src/tests/test5092.cc b/storage/tokudb/ft-index/src/tests/test5092.cc index 16652472b55..8ef31ae0340 100644 --- a/storage/tokudb/ft-index/src/tests/test5092.cc +++ b/storage/tokudb/ft-index/src/tests/test5092.cc @@ -118,7 +118,7 @@ static void setup_env_and_prepare (DB_ENV **envp, const char *envdir, bool commi uint8_t gid[DB_GID_SIZE]; memset(gid, 0, DB_GID_SIZE); gid[0]=42; - CKERR(txn->prepare(txn, gid)); + CKERR(txn->prepare(txn, gid, 0)); { int chk_r = db->close(db, 0); CKERR(chk_r); } if (commit) CKERR(txn->commit(txn, 0)); diff --git a/storage/tokudb/ft-index/src/tests/test_5015.cc b/storage/tokudb/ft-index/src/tests/test_5015.cc index 071b7f3660e..50900d342d0 100644 --- a/storage/tokudb/ft-index/src/tests/test_5015.cc +++ b/storage/tokudb/ft-index/src/tests/test_5015.cc @@ -133,7 +133,7 @@ test_main(int argc, char *const argv[]) { uint8_t gid[DB_GID_SIZE]; memset(gid, 0, DB_GID_SIZE); gid[0]='a'; - r = child_txn->prepare(child_txn, gid); + r = child_txn->prepare(child_txn, gid, 0); CKERR(r); r = env->txn_checkpoint(env, 0, 0, 0); diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc b/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc index e992f86455b..5583b0283a6 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc @@ -95,12 +95,12 @@ PATENT RIGHTS GRANT: int num_interrupts_called; -static bool interrupt(void* extra UU()) { +static bool interrupt(void* extra UU(), uint64_t rows UU()) { num_interrupts_called++; return false; } -static bool interrupt_true(void* extra UU()) { +static bool interrupt_true(void* extra UU(), uint64_t rows UU()) { num_interrupts_called++; return true; } diff --git a/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc b/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc index fe237f063ab..d70f0411e01 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc @@ -211,7 +211,7 @@ cleanup: gid_count++; uint32_t *hi_gid_count_p = cast_to_typeof(hi_gid_count_p) hi_gid; // make gcc --happy about -Wstrict-aliasing *hi_gid_count_p = gid_count; - int rr = hi_txn->prepare(hi_txn, hi_gid); + int rr = hi_txn->prepare(hi_txn, hi_gid, 0); CKERR(rr); if (r || (random() % 2)) { rr = hi_txn->abort(hi_txn); diff --git a/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc b/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc index e3b715c4ce7..31267b17686 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc @@ -130,7 +130,7 @@ test_txn_close_before_prepare_commit (void) { uint8_t gid[DB_GID_SIZE]; memset(gid, 1, DB_GID_SIZE); - r = txn->prepare(txn, gid); assert(r == 0); + r = txn->prepare(txn, gid, 0); assert(r == 0); r = txn->commit(txn, 0); assert(r == 0); r = env->close(env, 0); assert(r == 0); diff --git a/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h b/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h index 2c2525a3165..f9da1693847 100644 --- a/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h +++ b/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h @@ -593,7 +593,7 @@ static void *worker(void *arg_v) { uint64_t gid_val = txn->id64(txn); uint64_t *gid_count_p = cast_to_typeof(gid_count_p) gid; // make gcc --happy about -Wstrict-aliasing *gid_count_p = gid_val; - int rr = txn->prepare(txn, gid); + int rr = txn->prepare(txn, gid, 0); assert_zero(rr); } if (r == 0) { diff --git a/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc b/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc index 126a7c1453e..008a41274ae 100644 --- a/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc +++ b/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc @@ -136,7 +136,7 @@ static void create_prepared_txn(void) { for (int i = 0; i < 8+9; i++) { xid.data[i] = i; } - r = txn->xa_prepare(txn, &xid); + r = txn->xa_prepare(txn, &xid, 0); CKERR(r); // discard the txn so that we can close the env and run xa recovery later diff --git a/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc b/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc index 2d13e559050..3f0debe4116 100644 --- a/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc +++ b/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc @@ -136,7 +136,7 @@ static void create_prepared_txn(void) { for (int i = 0; i < 8+9; i++) { xid.data[i] = i; } - r = txn->xa_prepare(txn, &xid); + r = txn->xa_prepare(txn, &xid, 0); CKERR(r); // discard the txn so that we can close the env and run xa recovery later diff --git a/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc b/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc index 3365a1bb139..95852c68192 100644 --- a/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc +++ b/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc @@ -138,7 +138,7 @@ static void create_prepared_txn(void) { for (int i = 0; i < 8+9; i++) { xid.data[i] = i; } - r = txn->xa_prepare(txn, &xid); + r = txn->xa_prepare(txn, &xid, 0); CKERR(r); // discard the txn so that we can close the env and run xa recovery later diff --git a/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc b/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc index c4d164017ae..a21f8813f90 100644 --- a/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc +++ b/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc @@ -139,7 +139,7 @@ static void create_prepared_txn(void) { for (int i = 0; i < 8+9; i++) { xid.data[i] = i; } - r = txn->xa_prepare(txn, &xid); + r = txn->xa_prepare(txn, &xid, 0); CKERR(r); // discard the txn so that we can close the env and run xa recovery later diff --git a/storage/tokudb/ft-index/src/ydb_cursor.cc b/storage/tokudb/ft-index/src/ydb_cursor.cc index 57f3b5808b6..eed519455fc 100644 --- a/storage/tokudb/ft-index/src/ydb_cursor.cc +++ b/storage/tokudb/ft-index/src/ydb_cursor.cc @@ -769,7 +769,7 @@ c_remove_restriction(DBC *dbc) { } static void -c_set_check_interrupt_callback(DBC* dbc, bool (*interrupt_callback)(void*), void *extra) { +c_set_check_interrupt_callback(DBC* dbc, bool (*interrupt_callback)(void*, uint64_t), void *extra) { toku_ft_cursor_set_check_interrupt_cb(dbc_ftcursor(dbc), interrupt_callback, extra); } diff --git a/storage/tokudb/ft-index/src/ydb_txn.cc b/storage/tokudb/ft-index/src/ydb_txn.cc index ce06e78b23f..82903849535 100644 --- a/storage/tokudb/ft-index/src/ydb_txn.cc +++ b/storage/tokudb/ft-index/src/ydb_txn.cc @@ -240,7 +240,7 @@ static int toku_txn_abort(DB_TXN * txn, return r; } -static int toku_txn_xa_prepare (DB_TXN *txn, TOKU_XA_XID *xid) { +static int toku_txn_xa_prepare (DB_TXN *txn, TOKU_XA_XID *xid, uint32_t flags) { int r = 0; if (!txn) { r = EINVAL; @@ -273,9 +273,11 @@ static int toku_txn_xa_prepare (DB_TXN *txn, TOKU_XA_XID *xid) { HANDLE_PANICKED_ENV(txn->mgrp); } assert(!db_txn_struct_i(txn)->child); + int nosync; + nosync = (flags & DB_TXN_NOSYNC)!=0 || (db_txn_struct_i(txn)->flags&DB_TXN_NOSYNC); TOKUTXN ttxn; ttxn = db_txn_struct_i(txn)->tokutxn; - toku_txn_prepare_txn(ttxn, xid); + toku_txn_prepare_txn(ttxn, xid, nosync); TOKULOGGER logger; logger = txn->mgrp->i->logger; LSN do_fsync_lsn; @@ -292,14 +294,14 @@ exit: // requires: must hold the multi operation lock. it is // released in toku_txn_xa_prepare before the fsync. -static int toku_txn_prepare (DB_TXN *txn, uint8_t gid[DB_GID_SIZE]) { +static int toku_txn_prepare (DB_TXN *txn, uint8_t gid[DB_GID_SIZE], uint32_t flags) { TOKU_XA_XID xid; TOKU_ANNOTATE_NEW_MEMORY(&xid, sizeof(xid)); xid.formatID=0x756b6f54; // "Toku" xid.gtrid_length=DB_GID_SIZE/2; // The maximum allowed gtrid length is 64. See the XA spec in source:/import/opengroup.org/C193.pdf page 20. xid.bqual_length=DB_GID_SIZE/2; // The maximum allowed bqual length is 64. memcpy(xid.data, gid, DB_GID_SIZE); - return toku_txn_xa_prepare(txn, &xid); + return toku_txn_xa_prepare(txn, &xid, flags); } static int toku_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) { @@ -427,6 +429,10 @@ static bool toku_txn_is_prepared(DB_TXN *txn) { return toku_txn_get_state(ttxn) == TOKUTXN_PREPARING; } +static DB_TXN *toku_txn_get_child(DB_TXN *txn) { + return db_txn_struct_i(txn)->child; +} + static inline void txn_func_init(DB_TXN *txn) { #define STXN(name) txn->name = locked_txn_ ## name STXN(abort); @@ -444,6 +450,7 @@ static inline void txn_func_init(DB_TXN *txn) { #undef SUTXN txn->id64 = toku_txn_id64; txn->is_prepared = toku_txn_is_prepared; + txn->get_child = toku_txn_get_child; } // diff --git a/storage/tokudb/ft-index/tools/CMakeLists.txt b/storage/tokudb/ft-index/tools/CMakeLists.txt index 4ed0cb4cbdc..71c44df9acd 100644 --- a/storage/tokudb/ft-index/tools/CMakeLists.txt +++ b/storage/tokudb/ft-index/tools/CMakeLists.txt @@ -14,7 +14,7 @@ target_link_libraries(ftverify m) install( TARGETS tokuftdump - DESTINATION bin + DESTINATION ${INSTALL_BINDIR} COMPONENT Server ) diff --git a/storage/tokudb/ft-index/tools/tokuftdump.cc b/storage/tokudb/ft-index/tools/tokuftdump.cc index 3f73136fb5c..d680a3dd0d0 100644 --- a/storage/tokudb/ft-index/tools/tokuftdump.cc +++ b/storage/tokudb/ft-index/tools/tokuftdump.cc @@ -99,7 +99,10 @@ PATENT RIGHTS GRANT: #include <stdlib.h> #include <inttypes.h> #include <limits.h> - +#include <string> +#include <iostream> +#include <fstream> +#include <string.h> #include "ft/serialize/block_table.h" #include "ft/cachetable/cachetable.h" #include "ft/ft.h" @@ -108,8 +111,11 @@ PATENT RIGHTS GRANT: #include "ft/serialize/ft_node-serialize.h" #include "ft/node.h" +using namespace std; + static int do_dump_data = 1; static int do_interactive = 0; +static int do_json = 0; static int do_header = 0; static int do_fragmentation = 0; static int do_garbage = 0; @@ -118,10 +124,24 @@ static int do_rootnode = 0; static int do_node = 0; static BLOCKNUM do_node_num; static int do_tsv = 0; - static const char *arg0; static const char *fname; +//it holdes the messges count for each FT's node +typedef struct nodeMessage{ + int id; + int clean;//0=clean >=1 dirty + int *count;//holds the messages + nodeMessage *nextNode; +}NMC; +enum { maxline = 128}; + +static int printNodeMessagesToSTDout(NMC* ptr); + +static int printLevelSTDout(int *); + +static void treeToSTDout(NMC *msgs[], int height); + static void format_time(const uint64_t time_int, char *buf) { time_t timer = (time_t) time_int; ctime_r(&timer, buf); @@ -225,12 +245,202 @@ static void dump_header(FT ft) { printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes); } +static int64_t getRootNode(FT ft) { + return ft->h->root_blocknum.b; +} + static int print_le(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t idx UU(), void *const ai UU()) { print_klpair(stdout, key, keylen, le); printf("\n"); return 0; } +static int getHeight(int fd, BLOCKNUM blocknum, FT ft){ + FTNODE n; + FTNODE_DISK_DATA ndd = nullptr; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + assert_zero(r); + assert(n!=0); + return n->height; +} + +static FTNODE getNode(int fd, BLOCKNUM blocknum, FT ft) { + FTNODE n; + FTNODE_DISK_DATA ndd = nullptr; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + assert_zero(r);; + return n; +} + +static int countNodes(NMC *level){ + int count=0; + NMC *ptr=level; + while(ptr!=NULL){ + count++; + ptr=ptr->nextNode; + } + return count; +} + +static int * countMessages(NMC *level){ + int *counts=new int[16]; + for(int i=0;i<16;i++){ + counts[i]=0; + } + NMC *ptr=level; + while(ptr!=NULL){ + for(int i=0;i<16;i++){ + counts[i]+=ptr->count[i]; + } + ptr=ptr->nextNode; + } + return counts; +} + +static NMC * getLast(NMC *level){ + if (level==NULL) return NULL; + NMC *ptr=level; + while(ptr->nextNode!=NULL){ + ptr=ptr->nextNode; + } + return ptr; +} + +/* + * Prints the total messages at each to STDout + */ +static int printLevelSTDout(int *count){ + int isEmpty=0; + for(int j=0;j<16;j++){ + if(count[j]>0){ + cout <<count[j]<<" "; + isEmpty++; + switch (j) { + case FT_INSERT: printf("INSERT(s) "); break; + case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE(s) "); break; + case FT_DELETE_ANY: printf("DELETE_ANY(s) "); break; + case FT_ABORT_ANY: printf("ABORT_ANY(s) "); break; + case FT_COMMIT_ANY: printf("COMMIT_ANY(s) "); break; + case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL(s) "); break; + case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN(s) "); break; + case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN(s) "); break; + case FT_OPTIMIZE: printf("OPTIMIZE(s) "); break; + case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE(s) "); break; + case FT_UPDATE: printf("UPDATE(s) "); break; + case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL(s) "); break; + } + + } + } + return isEmpty; +} + +/* + * Prints the total # of messages in a node to STD output + */ +static int printNodeMessagesToSTDout(NMC *ptr){ + cout <<"\nNode :"<<ptr->id<<" has :"; + for(int j=0;j<16;j++){ + if(ptr->count[j]>0){ + cout <<ptr->count[j]<<" "; + switch (j) { + case FT_INSERT: printf("INSERT(s) "); break; + case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE(s) "); break; + case FT_DELETE_ANY: printf("DELETE_ANY(s) "); break; + case FT_ABORT_ANY: printf("ABORT_ANY(s) "); break; + case FT_COMMIT_ANY: printf("COMMIT_ANY(s) "); break; + case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL(s) "); break; + case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN(s) "); break; + case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN(s) "); break; + case FT_OPTIMIZE: printf("OPTIMIZE(s) "); break; + case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE(s) "); break; + case FT_UPDATE: printf("UPDATE(s) "); break; + case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL(s) "); break; + } + } + } + return 1; +} + +static void levelToSTDout(NMC *list, int level){ + NMC *ptr=list; + cout <<endl<<"Height : "<<level<<endl; + while(ptr!=NULL){ + if(ptr->clean!=0){ + printNodeMessagesToSTDout(ptr); + } + else{ + cout << "\nNode : "<<ptr->id<<" has no messages"; + } + ptr=ptr->nextNode; + } + cout <<endl; +} + +/* + * prints the tree total # of nodes and total # of messages at each height in : + * STDout in human readable format + */ +static void treeToSTDout(NMC *msgs[], int height){ + for(int i=height; i>=0 ; i--){ + cout <<"At height "<<i; + int *counts=countMessages(msgs[i]); + cout <<"\n Node Count: "<< countNodes(msgs[i])<<endl; + cout <<" Messages: "; + if(printLevelSTDout(counts)==0) cout <<"0\n"; + else cout <<endl; + } +} + +//traverse through the FT and report back the count of messages in every node +static void countMessagesInFT(int fd, BLOCKNUM blocknum, FT ft,NMC *msgs[]){ + FTNODE n=getNode(fd,blocknum,ft); + + NMC *last=NULL; + if(msgs[n->height]==NULL){ + last = msgs[n->height]=new NMC; + }else { + last=getLast(msgs[n->height]); + last->nextNode=new NMC; + last=last->nextNode; + } + last->id=blocknum.b; + last->count=new int[16]; + for(int i=0;i<16;i++){ + last->count[i]=0; + } + last->clean=0; + last->nextNode=NULL; + + if (n->height==0){ + toku_ftnode_free(&n); + return; + } + for(int i=0;i<n->n_children;i++){ + NONLEAF_CHILDINFO bnc = BNC(n, i); + if (n->height==1 && n->bp[i].ptr.tag==BCT_NULL){ + cout <<n->bp[i].ptr.tag; + } + auto dump_fn=[&](const ft_msg &msg, bool UU(is_fresh)) { + enum ft_msg_type type = (enum ft_msg_type) msg.type(); + last->count[type]++; + last->clean=1; + return 0; + }; + + bnc->msg_buffer.iterate(dump_fn); + + blocknum=make_blocknum(BP_BLOCKNUM(n, i).b); + countMessagesInFT(fd,blocknum,ft, msgs); + } + + toku_ftnode_free(&n); +} + static void dump_node(int fd, BLOCKNUM blocknum, FT ft) { FTNODE n; FTNODE_DISK_DATA ndd = nullptr; @@ -254,9 +464,9 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) { printf(" layout_version_read_from_disk=%d\n", n->layout_version_read_from_disk); printf(" build_id=%d\n", n->build_id); printf(" max_msn_applied_to_node_on_disk=%" PRId64 " (0x%" PRIx64 ")\n", n->max_msn_applied_to_node_on_disk.msn, n->max_msn_applied_to_node_on_disk.msn); - printf(" io time %lf decompress time %lf deserialize time %lf\n", - tokutime_to_seconds(bfe.io_time), - tokutime_to_seconds(bfe.decompress_time), + printf(" io time %lf decompress time %lf deserialize time %lf\n", + tokutime_to_seconds(bfe.io_time), + tokutime_to_seconds(bfe.decompress_time), tokutime_to_seconds(bfe.deserialize_time)); printf(" n_children=%d\n", n->n_children); @@ -277,7 +487,7 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) { if (n->height > 0) { printf("%" PRId64 "\n", BP_BLOCKNUM(n, i).b); NONLEAF_CHILDINFO bnc = BNC(n, i); - unsigned int n_bytes = toku_bnc_nbytesinbuf(bnc); + unsigned int n_bytes = toku_bnc_nbytesinbuf(bnc); int n_entries = toku_bnc_n_entries(bnc); if (n_bytes > 0 || n_entries > 0) { printf(" buffer contains %u bytes (%d items)\n", n_bytes, n_entries); @@ -402,8 +612,12 @@ static void dump_garbage_stats(int fd, FT ft) { uint64_t total_space = 0; uint64_t used_space = 0; toku_ft_get_garbage(ft, &total_space, &used_space); - printf("garbage total size\t%" PRIu64 "\n", total_space); - printf("garbage used size\t%" PRIu64 "\n", used_space); + printf("garbage total size :%20" PRIu64 "\n", total_space); + printf("garbage used size :%20" PRIu64 "\n", used_space); + float a=used_space,b=total_space; + + float percentage=((1-a/b)*100); + printf("Total garbage : %2.3f%%\n", percentage); } typedef struct __dump_node_extra { @@ -438,7 +652,7 @@ static void sub_block_deserialize(struct dump_sub_block *sb, unsigned char *sub_ static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size) { // verify the header checksum const size_t node_header = 8 + sizeof (uint32_t) + sizeof (uint32_t) + sizeof (uint32_t); - + printf("%.8s layout_version=%u %u build=%d\n", cp, get_unaligned_uint32(cp+8), get_unaligned_uint32(cp+12), get_unaligned_uint32(cp+16)); unsigned char *sub_block_header = &cp[node_header]; @@ -544,7 +758,9 @@ static uint64_t getuint64(const char *f) { static void interactive_help(void) { fprintf(stderr, "help\n"); fprintf(stderr, "header\n"); - fprintf(stderr, "node NUMBER\n"); + cout <<"mr/MessagesReport [NUMBER] \n Reports messages for the level of the tree you want get more details about\n"; + cout <<"rf/readFile ft-file-name \n Switch to a different FT\n"; + fprintf(stderr, "node NUMBER \n"); fprintf(stderr, "bx OFFSET | block_translation OFFSET\n"); fprintf(stderr, "dumpdata 0|1\n"); fprintf(stderr, "fragmentation\n"); @@ -554,10 +770,160 @@ static void interactive_help(void) { fprintf(stderr, "quit\n"); } +static void freeNMC(NMC *msgs[], int height){ + for(int i=0;i<height;i++){ + if(msgs[i]!=NULL){ + delete(msgs[i]->count); + + while(msgs[i]->nextNode!=NULL){ + NMC* ptr=msgs[i]->nextNode; + msgs[i]=msgs[i]->nextNode; + delete ptr; + + } + msgs[i]=NULL; + } + } +} + +static void writeTree(NMC *msgs[],int height,char *name UU()){ + ofstream mytree ("/tmp/tree.txt",fstream::out); + if (mytree.is_open()){ + for(int i=height;i>=0;i--){ + NMC * ptr=msgs[i]; + mytree <<i<<endl; + while(ptr!=NULL){ + mytree << ptr->id<<"\t"; + if(ptr->clean!=0)mytree << "1"<<"\t"; + else mytree << "0"<<"\t"; + for(int j=0;j<15;j++)mytree << ptr->count[j]<<" "; + mytree << ptr->count[i]<<endl; + ptr=ptr->nextNode; + } + mytree <<endl; + } + } + else cout << "Unable to open file"; + mytree.close(); +} + +static void writeJson(NMC *msgs[],int height,const char *name){ + ofstream mytree (name,fstream::out); + if (mytree.is_open()){ + mytree <<"{\n \"FT\":["; + for(int i=height;i>=0;i--){ + NMC * ptr=msgs[i]; + mytree <<"{\n\"Level\": {\"Height\":\""<<i<<"\",\n \"Nodes\":["; + while(ptr!=NULL){ + mytree <<"{\"ID\":\""<< ptr->id<<"\","; + if(ptr->clean!=0){ + mytree <<"\"Messages\":["; + for(int j=0;j<16;j++) + { + mytree <<"{"; + switch (j) { + case FT_INSERT: mytree <<"\"INSERT\":\""<<ptr->count[j]<<"\""; break; + case FT_INSERT_NO_OVERWRITE: mytree <<"\"INSERT_NOVERWTE\":\""<<ptr->count[j]<<"\""; break; + case FT_DELETE_ANY: mytree <<"\"DELETE\":\""<<ptr->count[j]<<"\""; break; + case FT_ABORT_ANY: mytree <<"\"ABORT\":\""<<ptr->count[j]<<"\""; break; + case FT_COMMIT_ANY: mytree <<"\"COMMITY\":\""<<ptr->count[j]<<"\""; break; + case FT_COMMIT_BROADCAST_ALL: mytree <<"\"COMMIT_BROADCAST_ALL\":\""<<ptr->count[j]<<"\"" ; break; + case FT_COMMIT_BROADCAST_TXN: mytree <<"\"COMMIT_BROADCAST_TXN\":\""<<ptr->count[j]<<"\""; break; + case FT_ABORT_BROADCAST_TXN: mytree <<"\"ABORT_BROADCAST_TXN\":\""<<ptr->count[j]<<"\"";break; + case FT_OPTIMIZE: mytree <<"\"OPTIMIZE\":\""<<ptr->count[j]<<"\""; break; + case FT_OPTIMIZE_FOR_UPGRADE: mytree <<"\"OPTIMIZE_FOR_UPGRADE\":\""<<ptr->count[j]<<"\"";break; + case FT_UPDATE: mytree <<"\"UPDATE\":\""<<ptr->count[j]<<"\""; break; + case FT_UPDATE_BROADCAST_ALL: mytree <<"\"UPDATE_BROADCAST_ALL\":\""<<ptr->count[j]<<"\""; break; + } + mytree <<"}"; + if(j<15)mytree<<","; + } + + mytree <<"]}"; + + } + else { + mytree <<"\"Messages\":\""<< "0"<<"\"}"; + } + if(ptr->nextNode!=NULL)mytree <<",\n"; + else mytree <<"]}\n"; + ptr=ptr->nextNode; + } + mytree <<"\n}\n"; + if(i!=0)mytree <<",\n"; + } + mytree <<"\n]}\n"; + + } + else cout << "Unable to open file"; + mytree.close(); +} + +static void writeTree(NMC *msgs[],int height){ + ofstream mytree ("/tmp/tree1.txt",fstream::out); + if (mytree.is_open()){ + for(int i=height;i>=0;i--){ + NMC * ptr=msgs[i]; + mytree <<i<<endl; + while(ptr!=NULL){ + mytree << ptr->id<<","; + if(ptr->clean!=0)mytree << "1"<<","; + else mytree << "0"<<","; + for(int j=0;j<15;j++)mytree << ptr->count[j]<<","; + mytree << ptr->count[i]<<endl; + ptr=ptr->nextNode; + } + mytree <<".\""; + } + } + else cout << "Unable to open file"; + mytree.close(); +} + +static void FT_to_JSON(int fd, FT ft, CACHEFILE cf, const char * JsonFile){ + toku_ft_free(ft); + open_header(fd, &ft, cf); + int root=getRootNode(ft); + BLOCKNUM off = make_blocknum(root); + int height=getHeight(fd,off, ft); + NMC *msgs[height]; + for(int i=0;i<=height;i++){ + msgs[i]=NULL; + } + open_header(fd, &ft, cf); + root=getRootNode(ft); + off = make_blocknum(root); + countMessagesInFT(fd,off, ft,msgs); + cout <<"to STD output: \n"; + treeToSTDout(msgs,height); + writeTree(msgs,height); + cout<<"FT's json file was generated here:"; + if(JsonFile!=NULL) { + cout <<JsonFile; + writeJson(msgs,height,JsonFile); + } + else { + cout <<"./FT.json"; + writeJson(msgs,height,"./FT.json"); + } + cout<<endl; + freeNMC(msgs,height); + exit(0); +} + static void run_iteractive_loop(int fd, FT ft, CACHEFILE cf) { + toku_ft_free(ft); + open_header(fd, &ft, cf); + int root=getRootNode(ft); + BLOCKNUM off = make_blocknum(root); + int height=getHeight(fd,off, ft); + NMC *msgs[height]; + for(int i=0;i<=height;i++){ + msgs[i]=NULL; + } while (1) { - printf("ftdump>"); fflush(stdout); - enum { maxline = 64}; + printf("ftdump>"); + fflush(stdout); char line[maxline+1]; int r = readline(line, maxline); if (r == EOF) @@ -565,23 +931,57 @@ static void run_iteractive_loop(int fd, FT ft, CACHEFILE cf) { const int maxfields = 4; char *fields[maxfields]; int nfields = split_fields(line, fields, maxfields); - if (nfields == 0) + if (nfields == 0) continue; if (strcmp(fields[0], "help") == 0) { interactive_help(); } else if (strcmp(fields[0], "header") == 0) { toku_ft_free(ft); open_header(fd, &ft, cf); - dump_header(ft); + } else if (strcmp(fields[0], "rn") == 0||strcmp(fields[0], "rootNode")==0||strcmp(fields[0], "rootnode") == 0) { + printf("Root node :%d\n",root); } else if (strcmp(fields[0], "block") == 0 && nfields == 2) { BLOCKNUM blocknum = make_blocknum(getuint64(fields[1])); dump_block(fd, blocknum, ft); + }else if ((strcmp(fields[0], "readFile") == 0 ||strcmp(fields[0], "readfile") == 0 ||strcmp(fields[0], "rf") == 0 )&& nfields == 2) { + fname=fields[1]; + fd = open(fname, O_RDWR + O_BINARY); + toku_ft_free(ft); + open_header(fd, &ft, cf); + root=getRootNode(ft); + off = make_blocknum(root); + height=getHeight(fd,off, ft); + if (fd < 0) { + fprintf(stderr, "%s: can not open the FT dump %s errno %d\n", arg0, fname, errno); + continue; + } } else if (strcmp(fields[0], "node") == 0 && nfields == 2) { - BLOCKNUM off = make_blocknum(getuint64(fields[1])); + off = make_blocknum(getuint64(fields[1])); dump_node(fd, off, ft); - } else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) { + }else if ((strcmp(fields[0], "mr") == 0||(strcmp(fields[0], "nc")) == 0 ||strcmp(fields[0], "messagesReport") == 0 )) { + freeNMC(msgs,height); + toku_ft_free(ft); + open_header(fd, &ft, cf); + root=getRootNode(ft); + off = make_blocknum(root); + countMessagesInFT(fd,off, ft,msgs); + int level=-1; + if(nfields == 2)level=getuint64(fields[1]); + if(level>=0){ + levelToSTDout(msgs[level], level); + } + else{ + cout <<"to STD output: \n"; + treeToSTDout(msgs,height); + } + writeTree(msgs,height); + writeTree(msgs,height, NULL); + + }else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) { + do_dump_data = strtol(fields[1], NULL, 10); - } else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) { + } + else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) { uint64_t offset = 0; if (nfields == 2) offset = getuint64(fields[1]); @@ -590,7 +990,7 @@ static void run_iteractive_loop(int fd, FT ft, CACHEFILE cf) { dump_fragmentation(fd, ft, do_tsv); } else if (strcmp(fields[0], "nodesizes") == 0) { dump_nodesizes(fd, ft); - } else if (strcmp(fields[0], "garbage") == 0) { + } else if (strcmp(fields[0], "garbage") == 0||strcmp(fields[0], "g") == 0) { dump_garbage_stats(fd, ft); } else if (strcmp(fields[0], "file") == 0 && nfields >= 3) { uint64_t offset = getuint64(fields[1]); @@ -604,14 +1004,18 @@ static void run_iteractive_loop(int fd, FT ft, CACHEFILE cf) { unsigned char newc = getuint64(fields[2]); set_file(fd, offset, newc); } else if (strcmp(fields[0], "quit") == 0 || strcmp(fields[0], "q") == 0) { - break; + toku_ft_free(ft); + exit(0); } } + freeNMC(msgs,height); } static int usage(void) { fprintf(stderr, "Usage: %s ", arg0); fprintf(stderr, "--interactive "); + fprintf(stderr, "--support /path/to/fractal-tree/file \n\t an interactive way to see what messages and/or switch between FTs"); + fprintf(stderr, "--json /path/to/fractal-tree/file [output json file]\n\t if left empty an FT.json will be created automatically"); fprintf(stderr, "--nodata "); fprintf(stderr, "--dumpdata 0|1 "); fprintf(stderr, "--header "); @@ -632,10 +1036,15 @@ int main (int argc, const char *const argv[]) { while (argc>0) { if (strcmp(argv[0], "--interactive") == 0 || strcmp(argv[0], "--i") == 0) { do_interactive = 1; + } + else if ((strcmp(argv[0], "--json") == 0 || strcmp(argv[0], "--s")== 0)&& argc >= 2) { + do_json = 1; + fname=argv[1]; + argc--; argv++; + break; } else if (strcmp(argv[0], "--nodata") == 0) { do_dump_data = 0; } else if (strcmp(argv[0], "--dumpdata") == 0 && argc > 1) { - argc--; argv++; do_dump_data = atoi(argv[0]); } else if (strcmp(argv[0], "--header") == 0) { do_header = 1; @@ -660,39 +1069,39 @@ int main (int argc, const char *const argv[]) { } argc--; argv++; } - if (argc != 1) - return usage(); + if (argc != 1 && do_json==0) + return usage(); int r = toku_ft_layer_init(); assert_zero(r); - - fname = argv[0]; + if(fname==NULL)fname = argv[0]; int fd = open(fname, O_RDWR + O_BINARY); if (fd < 0) { fprintf(stderr, "%s: can not open %s errno %d\n", arg0, fname, errno); return 1; } - // create a cachefile for the header CACHETABLE ct = NULL; toku_cachetable_create(&ct, 1<<25, (LSN){0}, 0); - CACHEFILE cf = NULL; r = toku_cachetable_openfd (&cf, ct, fd, fname); assert_zero(r); - FT ft = NULL; open_header(fd, &ft, cf); - + if (do_json ) { + const char *arg=argv[1]; + FT_to_JSON(fd, ft, cf,arg); + } if (do_interactive) { run_iteractive_loop(fd, ft, cf); - } else { + } + else { if (do_header) { dump_header(ft); } if (do_rootnode) { dump_node(fd, ft->h->root_blocknum, ft); - } + } if (do_node) { dump_node(fd, do_node_num, ft); } @@ -708,14 +1117,12 @@ int main (int argc, const char *const argv[]) { if (!do_header && !do_rootnode && !do_fragmentation && !do_translation_table && !do_garbage) { printf("Block translation:"); ft->blocktable.dump_translation_table(stdout); - dump_header(ft); - struct __dump_node_extra info; info.fd = fd; info.ft = ft; ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, - dump_node_wrapper, &info, true, true); + dump_node_wrapper, &info, true, true); } } toku_cachefile_close(&cf, false, ZERO_LSN); diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 66074ccb2fc..1f72e51b242 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -3272,7 +3272,7 @@ void ha_tokudb::start_bulk_insert(ha_rows rows) { lock_count = 0; if ((rows == 0 || rows > 1) && share->try_table_lock) { - if (get_prelock_empty(thd) && may_table_be_empty(transaction)) { + if (get_prelock_empty(thd) && may_table_be_empty(transaction) && transaction != NULL) { if (using_ignore || is_insert_ignore(thd) || thd->lex->duplicates != DUP_ERROR || table->s->next_number_key_offset) { acquire_table_lock(transaction, lock_write); @@ -3963,13 +3963,13 @@ int ha_tokudb::write_row(uchar * record) { goto cleanup; } } - txn = create_sub_trans ? sub_trans : transaction; - + if (tokudb_debug & TOKUDB_DEBUG_TXN) { + TOKUDB_HANDLER_TRACE("txn %p", txn); + } if (tokudb_debug & TOKUDB_DEBUG_CHECK_KEY) { test_row_packing(record,&prim_key,&row); } - if (loader) { error = loader->put(loader, &prim_key, &row); if (error) { @@ -4243,7 +4243,7 @@ int ha_tokudb::delete_row(const uchar * record) { bool has_null; THD* thd = ha_thd(); uint curr_num_DBs; - tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);; + tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); ha_statistic_increment(&SSV::ha_delete_count); @@ -4268,10 +4268,14 @@ int ha_tokudb::delete_row(const uchar * record) { goto cleanup; } + if (tokudb_debug & TOKUDB_DEBUG_TXN) { + TOKUDB_HANDLER_TRACE("all %p stmt %p sub_sp_level %p transaction %p", trx->all, trx->stmt, trx->sub_sp_level, transaction); + } + error = db_env->del_multiple( db_env, share->key_file[primary_key], - transaction, + transaction, &prim_key, &row, curr_num_DBs, @@ -7177,12 +7181,15 @@ To rename the table, make sure no transactions touch the table.", from, to); double ha_tokudb::scan_time() { TOKUDB_HANDLER_DBUG_ENTER(""); double ret_val = (double)stats.records / 3; + if (tokudb_debug & TOKUDB_DEBUG_RETURN) { + TOKUDB_HANDLER_TRACE("return %" PRIu64 " %f", (uint64_t) stats.records, ret_val); + } DBUG_RETURN(ret_val); } double ha_tokudb::keyread_time(uint index, uint ranges, ha_rows rows) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%u %u %" PRIu64, index, ranges, (uint64_t) rows); double ret_val; if (index == primary_key || key_is_clustering(&table->key_info[index])) { ret_val = read_time(index, ranges, rows); @@ -7200,6 +7207,9 @@ double ha_tokudb::keyread_time(uint index, uint ranges, ha_rows rows) (table->key_info[index].key_length + ref_length) + 1); ret_val = (rows + keys_per_block - 1)/ keys_per_block; + if (tokudb_debug & TOKUDB_DEBUG_RETURN) { + TOKUDB_HANDLER_TRACE("return %f", ret_val); + } DBUG_RETURN(ret_val); } @@ -7220,7 +7230,7 @@ double ha_tokudb::read_time( ha_rows rows ) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%u %u %" PRIu64, index, ranges, (uint64_t) rows); double total_scan; double ret_val; bool is_primary = (index == primary_key); @@ -7262,12 +7272,18 @@ double ha_tokudb::read_time( ret_val = is_clustering ? ret_val + 0.00001 : ret_val; cleanup: + if (tokudb_debug & TOKUDB_DEBUG_RETURN) { + TOKUDB_HANDLER_TRACE("return %f", ret_val); + } DBUG_RETURN(ret_val); } double ha_tokudb::index_only_read_time(uint keynr, double records) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%u %f", keynr, records); double ret_val = keyread_time(keynr, 1, (ha_rows)records); + if (tokudb_debug & TOKUDB_DEBUG_RETURN) { + TOKUDB_HANDLER_TRACE("return %f", ret_val); + } DBUG_RETURN(ret_val); } @@ -7342,7 +7358,7 @@ ha_rows ha_tokudb::records_in_range(uint keynr, key_range* start_key, key_range* cleanup: if (tokudb_debug & TOKUDB_DEBUG_RETURN) { - TOKUDB_HANDLER_TRACE("%" PRIu64 " %" PRIu64, (uint64_t) ret_val, rows); + TOKUDB_HANDLER_TRACE("return %" PRIu64 " %" PRIu64, (uint64_t) ret_val, rows); } DBUG_RETURN(ret_val); } diff --git a/storage/tokudb/ha_tokudb_admin.cc b/storage/tokudb/ha_tokudb_admin.cc index d6da45733a5..b109cd1b976 100644 --- a/storage/tokudb/ha_tokudb_admin.cc +++ b/storage/tokudb/ha_tokudb_admin.cc @@ -156,18 +156,47 @@ int ha_tokudb::analyze(THD *thd, HA_CHECK_OPT *check_opt) { bool is_unique = false; if (i == primary_key || (key_info->flags & HA_NOSAME)) is_unique = true; + uint64_t rows = 0; + uint64_t deleted_rows = 0; int error = tokudb::analyze_card(share->key_file[i], txn, is_unique, num_key_parts, &rec_per_key[total_key_parts], - tokudb_cmp_dbt_key_parts, analyze_progress, &analyze_progress_extra); + tokudb_cmp_dbt_key_parts, analyze_progress, &analyze_progress_extra, + &rows, &deleted_rows); + sql_print_information("tokudb analyze %d %" PRIu64 " %" PRIu64, error, rows, deleted_rows); if (error != 0 && error != ETIME) { result = HA_ADMIN_FAILED; - } else { - // debug - if (tokudb_debug & TOKUDB_DEBUG_ANALYZE) { - TOKUDB_HANDLER_TRACE("%s.%s.%s", - table_share->db.str, table_share->table_name.str, i == primary_key ? "primary" : table_share->key_info[i].name); - for (uint j = 0; j < num_key_parts; j++) - TOKUDB_HANDLER_TRACE("%lu", rec_per_key[total_key_parts+j]); - } + } + if (error != 0 && rows == 0 && deleted_rows > 0) { + result = HA_ADMIN_FAILED; + } + double f = THDVAR(thd, analyze_delete_fraction); + if (result == HA_ADMIN_FAILED || (double) deleted_rows > f * (rows + deleted_rows)) { + char name[256]; int namelen; + namelen = snprintf(name, sizeof name, "%.*s.%.*s.%s", + (int) table_share->db.length, table_share->db.str, + (int) table_share->table_name.length, table_share->table_name.str, + key_name); + thd->protocol->prepare_for_resend(); + thd->protocol->store(name, namelen, system_charset_info); + thd->protocol->store("analyze", 7, system_charset_info); + thd->protocol->store("info", 4, system_charset_info); + char rowmsg[256]; int rowmsglen; + rowmsglen = snprintf(rowmsg, sizeof rowmsg, "rows processed %" PRIu64 " rows deleted %" PRIu64, rows, deleted_rows); + thd->protocol->store(rowmsg, rowmsglen, system_charset_info); + thd->protocol->write(); + + sql_print_information("tokudb analyze on %.*s %.*s", + namelen, name, rowmsglen, rowmsg); + } + if (tokudb_debug & TOKUDB_DEBUG_ANALYZE) { + char name[256]; int namelen; + namelen = snprintf(name, sizeof name, "%.*s.%.*s.%s", + (int) table_share->db.length, table_share->db.str, + (int) table_share->table_name.length, table_share->table_name.str, + key_name); + TOKUDB_HANDLER_TRACE("%.*s rows %" PRIu64 " deleted %" PRIu64, + namelen, name, rows, deleted_rows); + for (uint j = 0; j < num_key_parts; j++) + TOKUDB_HANDLER_TRACE("%lu", rec_per_key[total_key_parts+j]); } total_key_parts += num_key_parts; } diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc index c16a5b37e1c..c268780dd2a 100644 --- a/storage/tokudb/hatoku_hton.cc +++ b/storage/tokudb/hatoku_hton.cc @@ -790,7 +790,7 @@ extern "C" enum durability_properties thd_get_durability_property(const MYSQL_TH #endif // Determine if an fsync is used when a transaction is committed. -static bool tokudb_fsync_on_commit(THD *thd, tokudb_trx_data *trx, DB_TXN *txn) { +static bool tokudb_sync_on_commit(THD *thd, tokudb_trx_data *trx, DB_TXN *txn) { #if MYSQL_VERSION_ID >= 50600 // Check the client durability property which is set during 2PC if (thd_get_durability_property(thd) == HA_IGNORE_DURABILITY) @@ -801,17 +801,19 @@ static bool tokudb_fsync_on_commit(THD *thd, tokudb_trx_data *trx, DB_TXN *txn) if (txn->is_prepared(txn) && mysql_bin_log.is_open()) return false; #endif + if (tokudb_fsync_log_period > 0) + return false; return THDVAR(thd, commit_sync) != 0; } static int tokudb_commit(handlerton * hton, THD * thd, bool all) { - TOKUDB_DBUG_ENTER(""); + TOKUDB_DBUG_ENTER("%u", all); DBUG_PRINT("trans", ("ending transaction %s", all ? "all" : "stmt")); tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); DB_TXN **txn = all ? &trx->all : &trx->stmt; DB_TXN *this_txn = *txn; if (this_txn) { - uint32_t syncflag = tokudb_fsync_on_commit(thd, trx, this_txn) ? 0 : DB_TXN_NOSYNC; + uint32_t syncflag = tokudb_sync_on_commit(thd, trx, this_txn) ? 0 : DB_TXN_NOSYNC; if (tokudb_debug & TOKUDB_DEBUG_TXN) { TOKUDB_TRACE("commit trx %u txn %p syncflag %u", all, this_txn, syncflag); } @@ -821,11 +823,11 @@ static int tokudb_commit(handlerton * hton, THD * thd, bool all) { commit_txn_with_progress(this_txn, syncflag, thd); // test hook to induce a crash on a debug build DBUG_EXECUTE_IF("tokudb_crash_commit_after", DBUG_SUICIDE();); - if (this_txn == trx->sp_level) { - trx->sp_level = 0; - } - *txn = 0; + *txn = NULL; trx->sub_sp_level = NULL; + if (this_txn == trx->sp_level || trx->all == NULL) { + trx->sp_level = NULL; + } } else if (tokudb_debug & TOKUDB_DEBUG_TXN) { TOKUDB_TRACE("nothing to commit %d", all); @@ -835,7 +837,7 @@ static int tokudb_commit(handlerton * hton, THD * thd, bool all) { } static int tokudb_rollback(handlerton * hton, THD * thd, bool all) { - TOKUDB_DBUG_ENTER(""); + TOKUDB_DBUG_ENTER("%u", all); DBUG_PRINT("trans", ("aborting transaction %s", all ? "all" : "stmt")); tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); DB_TXN **txn = all ? &trx->all : &trx->stmt; @@ -846,11 +848,11 @@ static int tokudb_rollback(handlerton * hton, THD * thd, bool all) { } tokudb_cleanup_handlers(trx, this_txn); abort_txn_with_progress(this_txn, thd); - if (this_txn == trx->sp_level) { - trx->sp_level = 0; - } - *txn = 0; + *txn = NULL; trx->sub_sp_level = NULL; + if (this_txn == trx->sp_level || trx->all == NULL) { + trx->sp_level = NULL; + } } else { if (tokudb_debug & TOKUDB_DEBUG_TXN) { @@ -862,6 +864,13 @@ static int tokudb_rollback(handlerton * hton, THD * thd, bool all) { } #if TOKU_INCLUDE_XA +static bool tokudb_sync_on_prepare(void) { + // skip sync of log if fsync log period > 0 + if (tokudb_fsync_log_period > 0) + return false; + else + return true; +} static int tokudb_xa_prepare(handlerton* hton, THD* thd, bool all) { TOKUDB_DBUG_ENTER(""); @@ -876,6 +885,7 @@ static int tokudb_xa_prepare(handlerton* hton, THD* thd, bool all) { tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); DB_TXN* txn = all ? trx->all : trx->stmt; if (txn) { + uint32_t syncflag = tokudb_sync_on_prepare() ? 0 : DB_TXN_NOSYNC; if (tokudb_debug & TOKUDB_DEBUG_TXN) { TOKUDB_TRACE("doing txn prepare:%d:%p", all, txn); } @@ -884,7 +894,7 @@ static int tokudb_xa_prepare(handlerton* hton, THD* thd, bool all) { thd_get_xid(thd, (MYSQL_XID*) &thd_xid); // test hook to induce a crash on a debug build DBUG_EXECUTE_IF("tokudb_crash_prepare_before", DBUG_SUICIDE();); - r = txn->xa_prepare(txn, &thd_xid); + r = txn->xa_prepare(txn, &thd_xid, syncflag); // test hook to induce a crash on a debug build DBUG_EXECUTE_IF("tokudb_crash_prepare_after", DBUG_SUICIDE();); } @@ -949,7 +959,7 @@ cleanup: #endif static int tokudb_savepoint(handlerton * hton, THD * thd, void *savepoint) { - TOKUDB_DBUG_ENTER(""); + TOKUDB_DBUG_ENTER("%p", savepoint); int error; SP_INFO save_info = (SP_INFO)savepoint; tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); @@ -970,6 +980,9 @@ static int tokudb_savepoint(handlerton * hton, THD * thd, void *savepoint) { trx->sp_level = save_info->txn; save_info->in_sub_stmt = false; } + if (tokudb_debug & TOKUDB_DEBUG_TXN) { + TOKUDB_TRACE("begin txn %p", save_info->txn); + } save_info->trx = trx; error = 0; cleanup: @@ -977,7 +990,7 @@ cleanup: } static int tokudb_rollback_to_savepoint(handlerton * hton, THD * thd, void *savepoint) { - TOKUDB_DBUG_ENTER(""); + TOKUDB_DBUG_ENTER("%p", savepoint); int error; SP_INFO save_info = (SP_INFO)savepoint; DB_TXN* parent = NULL; @@ -985,6 +998,9 @@ static int tokudb_rollback_to_savepoint(handlerton * hton, THD * thd, void *save tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); parent = txn_to_rollback->parent; + if (tokudb_debug & TOKUDB_DEBUG_TXN) { + TOKUDB_TRACE("rollback txn %p", txn_to_rollback); + } if (!(error = txn_to_rollback->abort(txn_to_rollback))) { if (save_info->in_sub_stmt) { trx->sub_sp_level = parent; @@ -998,24 +1014,27 @@ static int tokudb_rollback_to_savepoint(handlerton * hton, THD * thd, void *save } static int tokudb_release_savepoint(handlerton * hton, THD * thd, void *savepoint) { - TOKUDB_DBUG_ENTER(""); - int error; - + TOKUDB_DBUG_ENTER("%p", savepoint); + int error = 0; SP_INFO save_info = (SP_INFO)savepoint; DB_TXN* parent = NULL; DB_TXN* txn_to_commit = save_info->txn; tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); parent = txn_to_commit->parent; - if (!(error = txn_to_commit->commit(txn_to_commit, 0))) { + if (tokudb_debug & TOKUDB_DEBUG_TXN) { + TOKUDB_TRACE("commit txn %p", txn_to_commit); + } + DB_TXN *child = txn_to_commit->get_child(txn_to_commit); + if (child == NULL && !(error = txn_to_commit->commit(txn_to_commit, 0))) { if (save_info->in_sub_stmt) { trx->sub_sp_level = parent; } else { trx->sp_level = parent; } - save_info->txn = NULL; } + save_info->txn = NULL; TOKUDB_DBUG_RETURN(error); } @@ -1457,6 +1476,7 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = { MYSQL_SYSVAR(disable_slow_upsert), #endif MYSQL_SYSVAR(analyze_time), + MYSQL_SYSVAR(analyze_delete_fraction), MYSQL_SYSVAR(fsync_log_period), #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL MYSQL_SYSVAR(gdb_path), diff --git a/storage/tokudb/hatoku_hton.h b/storage/tokudb/hatoku_hton.h index ff17ecc276d..71d78e57d63 100644 --- a/storage/tokudb/hatoku_hton.h +++ b/storage/tokudb/hatoku_hton.h @@ -316,16 +316,9 @@ static MYSQL_THDVAR_BOOL(disable_slow_upsert, ); #endif -static MYSQL_THDVAR_UINT(analyze_time, - 0, - "analyze time", - NULL, - NULL, - 5, // default - 0, // min - ~0U, // max - 1 // blocksize -); +static MYSQL_THDVAR_UINT(analyze_time, 0, "analyze time (seconds)", NULL /*check*/, NULL /*update*/, 5 /*default*/, 0 /*min*/, ~0U /*max*/, 1 /*blocksize*/); + +static MYSQL_THDVAR_DOUBLE(analyze_delete_fraction, 0, "fraction of rows allowed to be deleted", NULL /*check*/, NULL /*update*/, 1.0 /*def*/, 0 /*min*/, 1.0 /*max*/, 1); static void tokudb_checkpoint_lock(THD * thd); static void tokudb_checkpoint_unlock(THD * thd); @@ -430,7 +423,7 @@ static int tokudb_killed_callback(void) { return thd_killed(thd); } -static bool tokudb_killed_thd_callback(void *extra) { +static bool tokudb_killed_thd_callback(void *extra, uint64_t deleted_rows) { THD *thd = static_cast<THD *>(extra); return thd_killed(thd) != 0; } diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/5733_tokudb.result b/storage/tokudb/mysql-test/tokudb_bugs/r/5733_tokudb.result index 07e8b37c263..a05587cb0a6 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/5733_tokudb.result +++ b/storage/tokudb/mysql-test/tokudb_bugs/r/5733_tokudb.result @@ -10005,7 +10005,7 @@ insert into t values (9999,0); commit; explain select id from t where id>0 limit 10; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t index_or_range PRIMARY PRIMARY 8 NULL # Using where; Using index_or_range +1 SIMPLE t range_or_index PRIMARY PRIMARY 8 NULL # Using where; Using index explain select * from t where id>0 limit 10; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY PRIMARY 8 NULL # Using where diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db817.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db817.result new file mode 100644 index 00000000000..d69f0dabcb3 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db817.result @@ -0,0 +1,33 @@ +drop table if exists ti; +create table ti (id int primary key) engine=innodb; +begin; +insert into ti values (0); +savepoint b; +insert into ti values (1); +savepoint a2; +insert into ti values (2); +savepoint b; +insert into ti values (3); +rollback to a2; +commit; +select * from ti; +id +0 +1 +drop table if exists tt; +create table tt (id int primary key) engine=tokudb; +begin; +insert into tt values (0); +savepoint b; +insert into tt values (1); +savepoint a2; +insert into tt values (2); +savepoint b; +insert into tt values (3); +rollback to a2; +commit; +select * from tt; +id +0 +1 +drop table ti,tt; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/5733_tokudb.test b/storage/tokudb/mysql-test/tokudb_bugs/t/5733_tokudb.test index 2e30c839905..192004cb113 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/5733_tokudb.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/5733_tokudb.test @@ -20,9 +20,18 @@ while ($i < $n) { } commit; -# TokuDB may do index or range scan on this. Both are ok -replace_column 9 #; ---replace_result index index_or_range range index_or_range +# the plan for the following query should be a range scan. about 1 of 10 times, +# the plan is an index scan. the different scan type occurs because the query optimizer +# is handed different row counts by tokudb::records_in_range. the cost estimates made +# by the query optimizer are very close to begin with. sometimes, the cost of an index +# scan is less than the cost of a range scan. +# +# if a tokudb checkpoint occurs before this query is run, then the records_in_range +# function returns a larger than expected row estimate. +# +# column 4 is the join type (should be range or index) +# column 9 is the estimated key count +replace_column 4 range_or_index 9 #; explain select id from t where id>0 limit 10; replace_column 9 #; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db817.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db817.test new file mode 100644 index 00000000000..53c9edc3893 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db817.test @@ -0,0 +1,38 @@ +# verify that duplicate savepoint names in innodb and tokudb work the same +source include/have_innodb.inc; +source include/have_tokudb.inc; +disable_warnings; +drop table if exists ti; +enable_warnings; +create table ti (id int primary key) engine=innodb; +begin; +insert into ti values (0); +savepoint b; +insert into ti values (1); +savepoint a2; +insert into ti values (2); +savepoint b; +insert into ti values (3); +rollback to a2; +commit; +select * from ti; + +disable_warnings; +drop table if exists tt; +enable_warnings; +create table tt (id int primary key) engine=tokudb; +begin; +insert into tt values (0); +savepoint b; +insert into tt values (1); +savepoint a2; +insert into tt values (2); +savepoint b; +insert into tt values (3); +rollback to a2; +commit; +select * from tt; + +drop table ti,tt; + + diff --git a/storage/tokudb/scripts/make.mysql.bash b/storage/tokudb/scripts/make.mysql.bash index a614424d9a0..c1259797590 100755 --- a/storage/tokudb/scripts/make.mysql.bash +++ b/storage/tokudb/scripts/make.mysql.bash @@ -117,6 +117,7 @@ elif [ $build_type = enterprise ] ; then github_download Tokutek/tokudb-backup-plugin $(git_tree $git_tag $backup_tree) tokudb-backup-plugin mv tokudb-backup-plugin plugin github_download Tokutek/backup-enterprise $(git_tree $git_tag $backup_tree) backup-enterprise + rm -rf plugin/tokudb-backup-plugin/backup mv backup-enterprise/backup plugin/tokudb-backup-plugin rm -rf backup-enterprise fi diff --git a/storage/tokudb/scripts/make.mysql.debug.env.bash b/storage/tokudb/scripts/make.mysql.debug.env.bash index 08ea19827bc..3f8b4e6c219 100755 --- a/storage/tokudb/scripts/make.mysql.debug.env.bash +++ b/storage/tokudb/scripts/make.mysql.debug.env.bash @@ -62,7 +62,7 @@ tokudbengine=tokudb-engine tokudbengine_tree=master ftindex=ft-index ftindex_tree=master -backup=backup-community +backup=tokudb-backup-plugin backup_tree=master cc=gcc cxx=g++ @@ -119,9 +119,9 @@ if [ $? != 0 ] ; then exit 1; fi ln -s ../../$tokudbengine/storage/tokudb tokudb if [ $? != 0 ] ; then exit 1; fi popd -pushd $mysql_tree +pushd $mysql_tree/plugin if [ $? != 0 ] ; then exit 1; fi -ln -s ../$backup/backup toku_backup +ln -s ../../$backup $backup if [ $? != 0 ] ; then exit 1; fi popd pushd $mysql_tree/scripts diff --git a/storage/tokudb/tokudb_card.h b/storage/tokudb/tokudb_card.h index 797c705bbaf..22e6fb9b3da 100644 --- a/storage/tokudb/tokudb_card.h +++ b/storage/tokudb/tokudb_card.h @@ -218,15 +218,32 @@ namespace tokudb { return error; } + struct analyze_card_cursor_callback_extra { + int (*analyze_progress)(void *extra, uint64_t rows); + void *analyze_extra; + uint64_t *rows; + uint64_t *deleted_rows; + }; + + bool analyze_card_cursor_callback(void *extra, uint64_t deleted_rows) { + analyze_card_cursor_callback_extra *a_extra = static_cast<analyze_card_cursor_callback_extra *>(extra); + *a_extra->deleted_rows += deleted_rows; + int r = a_extra->analyze_progress(a_extra->analyze_extra, *a_extra->rows); + sql_print_information("tokudb analyze_card_cursor_callback %u %" PRIu64 " %" PRIu64, r, *a_extra->deleted_rows, deleted_rows); + return r != 0; + } + // Compute records per key for all key parts of the ith key of the table. // For each key part, put records per key part in *rec_per_key_part[key_part_index]. // Returns 0 if success, otherwise an error number. // TODO statistical dives into the FT int analyze_card(DB *db, DB_TXN *txn, bool is_unique, uint64_t num_key_parts, uint64_t *rec_per_key_part, int (*key_compare)(DB *, const DBT *, const DBT *, uint), - int (*analyze_progress)(void *extra, uint64_t rows), void *progress_extra) { + int (*analyze_progress)(void *extra, uint64_t rows), void *progress_extra, + uint64_t *return_rows, uint64_t *return_deleted_rows) { int error = 0; uint64_t rows = 0; + uint64_t deleted_rows = 0; uint64_t unique_rows[num_key_parts]; if (is_unique && num_key_parts == 1) { // dont compute for unique keys with a single part. we already know the answer. @@ -235,6 +252,8 @@ namespace tokudb { DBC *cursor = NULL; error = db->cursor(db, txn, &cursor, 0); if (error == 0) { + analyze_card_cursor_callback_extra e = { analyze_progress, progress_extra, &rows, &deleted_rows }; + cursor->c_set_check_interrupt_callback(cursor, analyze_card_cursor_callback, &e); for (uint64_t i = 0; i < num_key_parts; i++) unique_rows[i] = 1; // stop looking when the entire dictionary was analyzed, or a cap on execution time was reached, or the analyze was killed. @@ -243,8 +262,8 @@ namespace tokudb { while (1) { error = cursor->c_get(cursor, &key, 0, DB_NEXT); if (error != 0) { - if (error == DB_NOTFOUND) - error = 0; // eof is not an error + if (error == DB_NOTFOUND || error == TOKUDB_INTERRUPTED) + error = 0; // not an error break; } rows++; @@ -287,10 +306,12 @@ namespace tokudb { } } // return cardinality - if (error == 0 || error == ETIME) { - for (uint64_t i = 0; i < num_key_parts; i++) - rec_per_key_part[i] = rows / unique_rows[i]; - } + if (return_rows) + *return_rows = rows; + if (return_deleted_rows) + *return_deleted_rows = deleted_rows; + for (uint64_t i = 0; i < num_key_parts; i++) + rec_per_key_part[i] = rows / unique_rows[i]; return error; } } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index fa1686b5eb9..c1488526b4e 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -6527,12 +6527,15 @@ ha_innobase::innobase_lock_autoinc(void) break; case AUTOINC_NEW_STYLE_LOCKING: - /* For simple (single/multi) row INSERTs, we fallback to the - old style only if another transaction has already acquired - the AUTOINC lock on behalf of a LOAD FILE or INSERT ... SELECT - etc. type of statement. */ + /* For simple (single/multi) row INSERTs/REPLACEs and RBR + events, we fallback to the old style only if another + transaction has already acquired the AUTOINC lock on + behalf of a LOAD FILE or INSERT ... SELECT etc. type of + statement. */ if (thd_sql_command(user_thd) == SQLCOM_INSERT - || thd_sql_command(user_thd) == SQLCOM_REPLACE) { + || thd_sql_command(user_thd) == SQLCOM_REPLACE + || thd_sql_command(user_thd) == SQLCOM_END // RBR event + ) { dict_table_t* table = prebuilt->table; /* Acquire the AUTOINC mutex. */ @@ -6541,9 +6544,11 @@ ha_innobase::innobase_lock_autoinc(void) /* We need to check that another transaction isn't already holding the AUTOINC lock on the table. */ if (table->n_waiting_or_granted_auto_inc_locks) { - /* Release the mutex to avoid deadlocks. */ + /* Release the mutex to avoid deadlocks and + fall back to old style locking. */ dict_table_autoinc_unlock(table); } else { + /* Do not fall back to old style locking. */ break; } } @@ -12782,10 +12787,8 @@ ha_innobase::cmp_ref( len1 = innobase_read_from_2_little_endian(ref1); len2 = innobase_read_from_2_little_endian(ref2); - ref1 += 2; - ref2 += 2; - result = ((Field_blob*)field)->cmp( ref1, len1, - ref2, len2); + result = ((Field_blob*)field)->cmp(ref1 + 2, len1, + ref2 + 2, len2); } else { result = field->key_cmp(ref1, ref2); } @@ -15390,7 +15393,7 @@ innobase_convert_to_filename_charset( /********************************************************************** Issue a warning that the row is too big. */ -extern "C" +extern "C" UNIV_INTERN void ib_warn_row_too_big(const dict_table_t* table) { diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 48b477d45a9..ab7e8ba4b84 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -403,7 +403,7 @@ amount of increment. */ atomic_add_long_nv(ptr, amount) # define os_atomic_increment_uint64(ptr, amount) \ - atomic_add_64_nv(ptr, amount) + atomic_add_64_nv((uint64_t *) ptr, amount) /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 1154e2e2f42..a377911022e 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -64,10 +64,10 @@ component, i.e. we show M.N.P as M.N */ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR) #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 37.0 +#define PERCONA_INNODB_VERSION 37.1 #endif -#define INNODB_VERSION_STR "5.5.41-MariaDB-" IB_TO_STR(PERCONA_INNODB_VERSION) +#define INNODB_VERSION_STR "5.5.42-MariaDB-" IB_TO_STR(PERCONA_INNODB_VERSION) #define REFMAN "http://dev.mysql.com/doc/refman/" \ IB_TO_STR(MYSQL_MAJOR_VERSION) "." \ diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index e36829daed6..ef617b03d71 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -5051,7 +5051,7 @@ loop: } } - if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) { + if (!srv_print_innodb_lock_monitor || !srv_show_locks_held) { nth_trx++; goto loop; } diff --git a/storage/xtradb/rem/rem0rec.c b/storage/xtradb/rem/rem0rec.c index 3494d4e4773..a0e289e2163 100644 --- a/storage/xtradb/rem/rem0rec.c +++ b/storage/xtradb/rem/rem0rec.c @@ -833,7 +833,6 @@ rec_get_converted_size_comp_prefix_low( } ut_ad(len <= col->len || col->mtype == DATA_BLOB - || col->mtype == DATA_VARMYSQL || (col->len == 0 && col->mtype == DATA_VARCHAR)); fixed_len = field->fixed_len; @@ -1261,8 +1260,7 @@ rec_convert_dtuple_to_rec_comp( *lens-- = (byte) len; } else { ut_ad(len <= dtype_get_len(type) - || dtype_get_mtype(type) == DATA_BLOB - || dtype_get_mtype(type) == DATA_VARMYSQL); + || dtype_get_mtype(type) == DATA_BLOB); if (len < 128 || (dtype_get_len(type) < 256 && dtype_get_mtype(type) != DATA_BLOB)) { diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 5bf2be6e33e..4318da10619 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -126,9 +126,16 @@ UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE; static os_file_t files[1000]; /** io_handler_thread parameters for thread identification */ -static ulint n[SRV_MAX_N_IO_THREADS + 8]; -/** io_handler_thread identifiers */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 8]; +static ulint n[SRV_MAX_N_IO_THREADS]; +/** io_handler_thread identifiers. The extra elements at the end are allocated +as follows: +SRV_MAX_N_IO_THREADS + 1: srv_master_thread +SRV_MAX_N_IO_THREADS + 2: srv_lock_timeout_thread +SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread +SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread +SRV_MAX_N_IO_THREADS + 5: srv_LRU_dump_restore_thread +SRV_MAX_N_IO_THREADS + 6: srv_redo_log_follow_thread */ +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7]; /** We use this mutex to test the return value of pthread_mutex_trylock on successful locking. HP-UX does NOT return 0, though Linux et al do. */ @@ -1197,7 +1204,7 @@ init_log_online(void) /* Create the thread that follows the redo log to output the changed page bitmap */ os_thread_create(&srv_redo_log_follow_thread, NULL, - thread_ids + 5 + SRV_MAX_N_IO_THREADS); + thread_ids + 6 + SRV_MAX_N_IO_THREADS); } } |