diff options
Diffstat (limited to 'sql/ha_ndbcluster.cc')
-rw-r--r-- | sql/ha_ndbcluster.cc | 7042 |
1 files changed, 5175 insertions, 1867 deletions
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index 78520e4c6d1..fea50aeecdb 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -24,58 +24,96 @@ #endif #include "mysql_priv.h" +#include "rpl_mi.h" -#ifdef HAVE_NDBCLUSTER_DB #include <my_dir.h> +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE #include "ha_ndbcluster.h" #include <ndbapi/NdbApi.hpp> #include "ha_ndbcluster_cond.h" +#include <../util/Bitmask.hpp> +#include <ndbapi/NdbIndexStat.hpp> + +#include "ha_ndbcluster_binlog.h" +#include "ha_ndbcluster_tables.h" + +#include <mysql/plugin.h> + +#ifdef ndb_dynamite +#undef assert +#define assert(x) do { if(x) break; ::printf("%s %d: assert failed: %s\n", __FILE__, __LINE__, #x); ::fflush(stdout); ::signal(SIGABRT,SIG_DFL); ::abort(); ::kill(::getpid(),6); ::kill(::getpid(),9); } while (0) +#endif // options from from mysqld.cc extern my_bool opt_ndb_optimized_node_selection; extern const char *opt_ndbcluster_connectstring; extern ulong opt_ndb_cache_check_time; +// ndb interface initialization/cleanup +#ifdef __cplusplus +extern "C" { +#endif +extern void ndb_init_internal(); +extern void ndb_end_internal(); +#ifdef __cplusplus +} +#endif + +const char *ndb_distribution_names[]= {"KEYHASH", "LINHASH", NullS}; +TYPELIB ndb_distribution_typelib= { array_elements(ndb_distribution_names)-1, + "", ndb_distribution_names, NULL }; +const char *opt_ndb_distribution= ndb_distribution_names[ND_KEYHASH]; +enum ndb_distribution opt_ndb_distribution_id= ND_KEYHASH; + // Default value for parallelism static const int parallelism= 0; // Default value for max number of transactions // createable against NDB from this handler -static const int max_transactions= 2; - -static const char *ha_ndb_ext=".ndb"; - -static int ndbcluster_close_connection(THD *thd); -static int ndbcluster_commit(THD *thd, bool all); -static int ndbcluster_rollback(THD *thd, bool all); - -handlerton ndbcluster_hton = { - "ndbcluster", - SHOW_OPTION_YES, - "Clustered, fault-tolerant, memory-based tables", - DB_TYPE_NDBCLUSTER, - ndbcluster_init, - 0, /* slot */ - 0, /* savepoint size */ - ndbcluster_close_connection, - NULL, /* savepoint_set */ - NULL, /* savepoint_rollback */ - NULL, /* savepoint_release */ - ndbcluster_commit, - ndbcluster_rollback, - NULL, /* prepare */ - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE -}; +static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used -#define NDB_AUTO_INCREMENT_RETRIES 10 +static uint ndbcluster_partition_flags(); +static uint ndbcluster_alter_table_flags(uint flags); +static int ndbcluster_init(void *); +static int ndbcluster_end(handlerton *hton, ha_panic_function flag); +static bool ndbcluster_show_status(handlerton *hton, THD*, + stat_print_fn *, + enum ha_stat_type); +static int ndbcluster_alter_tablespace(handlerton *hton, + THD* thd, + st_alter_tablespace *info); +static int ndbcluster_fill_files_table(handlerton *hton, + THD *thd, + TABLE_LIST *tables, + COND *cond); + +handlerton *ndbcluster_hton; -#define NDB_INVALID_SCHEMA_OBJECT 241 +static handler *ndbcluster_create_handler(handlerton *hton, + TABLE_SHARE *table, + MEM_ROOT *mem_root) +{ + return new (mem_root) ha_ndbcluster(hton, table); +} + +static uint ndbcluster_partition_flags() +{ + return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY | + HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION); +} + +static uint ndbcluster_alter_table_flags(uint flags) +{ + if (flags & ALTER_DROP_PARTITION) + return 0; + else + return (HA_ONLINE_ADD_INDEX | HA_ONLINE_DROP_INDEX | + HA_ONLINE_ADD_UNIQUE_INDEX | HA_ONLINE_DROP_UNIQUE_INDEX | + HA_PARTITION_FUNCTION_SUPPORTED); + +} + +#define NDB_AUTO_INCREMENT_RETRIES 10 #define ERR_PRINT(err) \ DBUG_PRINT("error", ("%d message: %s", err.code, err.message)) @@ -83,43 +121,46 @@ handlerton ndbcluster_hton = { #define ERR_RETURN(err) \ { \ const NdbError& tmp= err; \ - ERR_PRINT(tmp); \ + set_ndb_err(current_thd, tmp); \ DBUG_RETURN(ndb_to_mysql_error(&tmp)); \ } -// Typedefs for long names -typedef NdbDictionary::Column NDBCOL; -typedef NdbDictionary::Table NDBTAB; -typedef NdbDictionary::Index NDBINDEX; -typedef NdbDictionary::Dictionary NDBDICT; +#define ERR_BREAK(err, code) \ +{ \ + const NdbError& tmp= err; \ + set_ndb_err(current_thd, tmp); \ + code= ndb_to_mysql_error(&tmp); \ + break; \ +} -bool ndbcluster_inited= FALSE; +static int ndbcluster_inited= 0; +int ndbcluster_terminating= 0; static Ndb* g_ndb= NULL; -static Ndb_cluster_connection* g_ndb_cluster_connection= NULL; +Ndb_cluster_connection* g_ndb_cluster_connection= NULL; +uchar g_node_id_map[max_ndb_nodes]; // Handler synchronization pthread_mutex_t ndbcluster_mutex; // Table lock handling -static HASH ndbcluster_open_tables; +HASH ndbcluster_open_tables; -static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, +static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length, my_bool not_used __attribute__((unused))); -static NDB_SHARE *get_share(const char *table_name); -static void free_share(NDB_SHARE *share); - -static int packfrm(const void *data, uint len, const void **pack_data, uint *pack_len); -static int unpackfrm(const void **data, uint *len, - const void* pack_data); - -static int ndb_get_table_statistics(ha_ndbcluster*, bool, Ndb*, const char *, +#ifdef HAVE_NDB_BINLOG +static int rename_share(NDB_SHARE *share, const char *new_key); +#endif +static int ndb_get_table_statistics(ha_ndbcluster*, bool, Ndb*, const NDBTAB *, struct Ndb_statistics *); + // Util thread variables -static pthread_t ndb_util_thread; +pthread_t ndb_util_thread; +int ndb_util_thread_running= 0; pthread_mutex_t LOCK_ndb_util_thread; pthread_cond_t COND_ndb_util_thread; +pthread_cond_t COND_ndb_util_ready; pthread_handler_t ndb_util_thread_func(void *arg); ulong ndb_cache_check_time; @@ -146,7 +187,9 @@ static long ndb_cluster_node_id= 0; static const char * ndb_connected_host= 0; static long ndb_connected_port= 0; static long ndb_number_of_replicas= 0; -static long ndb_number_of_data_nodes= 0; +long ndb_number_of_data_nodes= 0; +long ndb_number_of_ready_data_nodes= 0; +long ndb_connect_count= 0; static int update_status_variables(Ndb_cluster_connection *c) { @@ -154,11 +197,13 @@ static int update_status_variables(Ndb_cluster_connection *c) ndb_connected_port= c->get_connected_port(); ndb_connected_host= c->get_connected_host(); ndb_number_of_replicas= 0; - ndb_number_of_data_nodes= c->no_db_nodes(); + ndb_number_of_ready_data_nodes= c->get_no_ready(); + ndb_number_of_data_nodes= c->no_db_nodes(); + ndb_connect_count= c->get_connect_count(); return 0; } -struct show_var_st ndb_status_variables[]= { +SHOW_VAR ndb_status_variables[]= { {"cluster_node_id", (char*) &ndb_cluster_node_id, SHOW_LONG}, {"config_from_host", (char*) &ndb_connected_host, SHOW_CHAR_PTR}, {"config_from_port", (char*) &ndb_connected_port, SHOW_LONG}, @@ -171,80 +216,79 @@ struct show_var_st ndb_status_variables[]= { Error handling functions */ -struct err_code_mapping -{ - int ndb_err; - int my_err; - int show_warning; -}; +/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */ -static const err_code_mapping err_map[]= +static int ndb_to_mysql_error(const NdbError *ndberr) { - { 626, HA_ERR_KEY_NOT_FOUND, 0 }, - { 630, HA_ERR_FOUND_DUPP_KEY, 1 }, - { 893, HA_ERR_FOUND_DUPP_KEY, 1 }, - { 721, HA_ERR_TABLE_EXIST, 1 }, - { 4244, HA_ERR_TABLE_EXIST, 1 }, - - { 709, HA_ERR_NO_SUCH_TABLE, 0 }, + /* read the mysql mapped error code */ + int error= ndberr->mysql_code; - { 266, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 274, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 296, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 297, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 237, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - - { 623, HA_ERR_RECORD_FILE_FULL, 1 }, - { 624, HA_ERR_RECORD_FILE_FULL, 1 }, - { 625, HA_ERR_RECORD_FILE_FULL, 1 }, - { 826, HA_ERR_RECORD_FILE_FULL, 1 }, - { 827, HA_ERR_RECORD_FILE_FULL, 1 }, - { 832, HA_ERR_RECORD_FILE_FULL, 1 }, - - { 284, HA_ERR_TABLE_DEF_CHANGED, 0 }, - - {4000, HA_ERR_OUT_OF_MEM, 1 }, - {4009, HA_ERR_NO_CONNECTION, 1 }, - - { 0, 1, 0 }, - - { -1, -1, 1 } -}; + switch (error) + { + /* errors for which we do not add warnings, just return mapped error code + */ + case HA_ERR_NO_SUCH_TABLE: + case HA_ERR_KEY_NOT_FOUND: + return error; + /* Mapping missing, go with the ndb error code*/ + case -1: + error= ndberr->code; + break; + /* Mapping exists, go with the mapped code */ + default: + break; + } -static int ndb_to_mysql_error(const NdbError *err) -{ - uint i; - for (i=0; err_map[i].ndb_err != err->code && err_map[i].my_err != -1; i++); - if (err_map[i].show_warning) - { - // Push the NDB error message as warning + /* + Push the NDB error message as warning + - Used to be able to use SHOW WARNINGS toget more info on what the error is + - Used by replication to see if the error was temporary + */ + if (ndberr->status == NdbError::TemporaryError) push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - err->code, err->message, "NDB"); - } - if (err_map[i].my_err == -1) - return err->code; - return err_map[i].my_err; + ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + else + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + return error; } +int execute_no_commit_ignore_no_key(ha_ndbcluster *h, NdbTransaction *trans) +{ + if (trans->execute(NdbTransaction::NoCommit, + NdbOperation::AO_IgnoreError, + h->m_force_send) == -1) + return -1; + const NdbError &err= trans->getNdbError(); + if (err.classification != NdbError::NoError && + err.classification != NdbError::ConstraintViolation && + err.classification != NdbError::NoDataFound) + return -1; + + return 0; +} inline int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans, bool force_release) { h->release_completed_operations(trans, force_release); - return trans->execute(NdbTransaction::NoCommit, - NdbTransaction::AbortOnError, - h->m_force_send); + return h->m_ignore_no_key ? + execute_no_commit_ignore_no_key(h,trans) : + trans->execute(NdbTransaction::NoCommit, + NdbOperation::AbortOnError, + h->m_force_send); } inline int execute_commit(ha_ndbcluster *h, NdbTransaction *trans) { return trans->execute(NdbTransaction::Commit, - NdbTransaction::AbortOnError, + NdbOperation::AbortOnError, h->m_force_send); } @@ -252,7 +296,7 @@ inline int execute_commit(THD *thd, NdbTransaction *trans) { return trans->execute(NdbTransaction::Commit, - NdbTransaction::AbortOnError, + NdbOperation::AbortOnError, thd->variables.ndb_force_send); } @@ -262,22 +306,34 @@ int execute_no_commit_ie(ha_ndbcluster *h, NdbTransaction *trans, { h->release_completed_operations(trans, force_release); return trans->execute(NdbTransaction::NoCommit, - NdbTransaction::AO_IgnoreError, + NdbOperation::AO_IgnoreError, h->m_force_send); } /* Place holder for ha_ndbcluster thread specific data */ +static +uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= sizeof(thd_ndb_share->key); + return (uchar*) &thd_ndb_share->key; +} + Thd_ndb::Thd_ndb() { ndb= new Ndb(g_ndb_cluster_connection, ""); lock_count= 0; + start_stmt_count= 0; count= 0; - all= NULL; - stmt= NULL; - error= 0; + trans= NULL; + m_error= FALSE; + m_error_code= 0; query_state&= NDB_QUERY_NORMAL; + options= 0; + (void) hash_init(&open_tables, &my_charset_bin, 5, 0, 0, + (hash_get_key)thd_ndb_share_get_key, 0, 0); } Thd_ndb::~Thd_ndb() @@ -301,15 +357,52 @@ Thd_ndb::~Thd_ndb() ndb= NULL; } changed_tables.empty(); + hash_free(&open_tables); } -inline -Thd_ndb * -get_thd_ndb(THD *thd) { return (Thd_ndb *) thd->ha_data[ndbcluster_hton.slot]; } - -inline void -set_thd_ndb(THD *thd, Thd_ndb *thd_ndb) { thd->ha_data[ndbcluster_hton.slot]= thd_ndb; } +Thd_ndb::init_open_tables() +{ + count= 0; + m_error= FALSE; + m_error_code= 0; + my_hash_reset(&open_tables); +} + +THD_NDB_SHARE * +Thd_ndb::get_open_table(THD *thd, const void *key) +{ + DBUG_ENTER("Thd_ndb::get_open_table"); + HASH_SEARCH_STATE state; + THD_NDB_SHARE *thd_ndb_share= + (THD_NDB_SHARE*)hash_first(&open_tables, (uchar *)&key, sizeof(key), &state); + while (thd_ndb_share && thd_ndb_share->key != key) + thd_ndb_share= (THD_NDB_SHARE*)hash_next(&open_tables, (uchar *)&key, sizeof(key), &state); + if (thd_ndb_share == 0) + { + thd_ndb_share= (THD_NDB_SHARE *) alloc_root(&thd->transaction.mem_root, + sizeof(THD_NDB_SHARE)); + if (!thd_ndb_share) + { + mem_alloc_error(sizeof(THD_NDB_SHARE)); + DBUG_RETURN(NULL); + } + thd_ndb_share->key= key; + thd_ndb_share->stat.last_count= count; + thd_ndb_share->stat.no_uncommitted_rows_count= 0; + thd_ndb_share->stat.records= ~(ha_rows)0; + my_hash_insert(&open_tables, (uchar *)thd_ndb_share); + } + else if (thd_ndb_share->stat.last_count != count) + { + thd_ndb_share->stat.last_count= count; + thd_ndb_share->stat.no_uncommitted_rows_count= 0; + thd_ndb_share->stat.records= ~(ha_rows)0; + } + DBUG_PRINT("exit", ("thd_ndb_share: 0x%lx key: 0x%lx", + (long) thd_ndb_share, (long) key)); + DBUG_RETURN(thd_ndb_share); +} inline Ndb *ha_ndbcluster::get_ndb() @@ -321,22 +414,44 @@ Ndb *ha_ndbcluster::get_ndb() * manage uncommitted insert/deletes during transactio to get records correct */ -struct Ndb_local_table_statistics { - int no_uncommitted_rows_count; - ulong last_count; - ha_rows records; -}; - void ha_ndbcluster::set_rec_per_key() { DBUG_ENTER("ha_ndbcluster::get_status_const"); - for (uint i=0 ; i < table->s->keys ; i++) + for (uint i=0 ; i < table_share->keys ; i++) { table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= 1; } DBUG_VOID_RETURN; } +ha_rows ha_ndbcluster::records() +{ + ha_rows retval; + DBUG_ENTER("ha_ndbcluster::records"); + struct Ndb_local_table_statistics *local_info= m_table_info; + DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d", + ((const NDBTAB *)m_table)->getTableId(), + local_info->no_uncommitted_rows_count)); + + Ndb *ndb= get_ndb(); + ndb->setDatabaseName(m_dbname); + struct Ndb_statistics stat; + if (ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat) == 0) + { + retval= stat.row_count; + } + else + { + DBUG_RETURN(HA_POS_ERROR); + } + + THD *thd= current_thd; + if (get_thd_ndb(thd)->m_error) + local_info->no_uncommitted_rows_count= 0; + + DBUG_RETURN(retval + local_info->no_uncommitted_rows_count); +} + int ha_ndbcluster::records_update() { if (m_ha_not_exact_count) @@ -344,12 +459,10 @@ int ha_ndbcluster::records_update() DBUG_ENTER("ha_ndbcluster::records_update"); int result= 0; - struct Ndb_local_table_statistics *local_info= - (struct Ndb_local_table_statistics *)m_table_info; + struct Ndb_local_table_statistics *local_info= m_table_info; DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d", ((const NDBTAB *)m_table)->getTableId(), local_info->no_uncommitted_rows_count)); - // if (info->records == ~(ha_rows)0) { Ndb *ndb= get_ndb(); struct Ndb_statistics stat; @@ -357,21 +470,21 @@ int ha_ndbcluster::records_update() { return my_errno= HA_ERR_OUT_OF_MEM; } - result= ndb_get_table_statistics(this, true, ndb, m_tabname, &stat); + result= ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat); if (result == 0) { - mean_rec_length= stat.row_size; - data_file_length= stat.fragment_memory; + stats.mean_rec_length= stat.row_size; + stats.data_file_length= stat.fragment_memory; local_info->records= stat.row_count; } } { THD *thd= current_thd; - if (get_thd_ndb(thd)->error) + if (get_thd_ndb(thd)->m_error) local_info->no_uncommitted_rows_count= 0; } - if(result==0) - records= local_info->records+ local_info->no_uncommitted_rows_count; + if (result == 0) + stats.records= local_info->records+ local_info->no_uncommitted_rows_count; DBUG_RETURN(result); } @@ -380,27 +493,8 @@ void ha_ndbcluster::no_uncommitted_rows_execute_failure() if (m_ha_not_exact_count) return; DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure"); - get_thd_ndb(current_thd)->error= 1; - DBUG_VOID_RETURN; -} - -void ha_ndbcluster::no_uncommitted_rows_init(THD *thd) -{ - if (m_ha_not_exact_count) - return; - DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_init"); - struct Ndb_local_table_statistics *local_info= - (struct Ndb_local_table_statistics *)m_table_info; - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (local_info->last_count != thd_ndb->count) - { - local_info->last_count= thd_ndb->count; - local_info->no_uncommitted_rows_count= 0; - local_info->records= ~(ha_rows)0; - DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d", - ((const NDBTAB *)m_table)->getTableId(), - local_info->no_uncommitted_rows_count)); - } + get_thd_ndb(current_thd)->m_error= TRUE; + get_thd_ndb(current_thd)->m_error_code= 0; DBUG_VOID_RETURN; } @@ -409,8 +503,7 @@ void ha_ndbcluster::no_uncommitted_rows_update(int c) if (m_ha_not_exact_count) return; DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update"); - struct Ndb_local_table_statistics *local_info= - (struct Ndb_local_table_statistics *)m_table_info; + struct Ndb_local_table_statistics *local_info= m_table_info; local_info->no_uncommitted_rows_count+= c; DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d", ((const NDBTAB *)m_table)->getTableId(), @@ -425,111 +518,66 @@ void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd) DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset"); Thd_ndb *thd_ndb= get_thd_ndb(thd); thd_ndb->count++; - thd_ndb->error= 0; + thd_ndb->m_error= FALSE; DBUG_VOID_RETURN; } /* - Take care of the error that occured in NDB - - RETURN - 0 No error - # The mapped error code + Sets the latest ndb error code on the thd_ndb object such that it + can be retrieved later to know which ndb error caused the handler + error. */ - -void ha_ndbcluster::invalidate_dictionary_cache(bool global) +static void set_ndb_err(THD *thd, const NdbError &err) { - NDBDICT *dict= get_ndb()->getDictionary(); - DBUG_ENTER("invalidate_dictionary_cache"); - DBUG_PRINT("info", ("invalidating %s", m_tabname)); + DBUG_ENTER("set_ndb_err"); + ERR_PRINT(err); - if (global) + Thd_ndb *thd_ndb= get_thd_ndb(thd); + if (thd_ndb == NULL) + DBUG_VOID_RETURN; +#ifdef NOT_YET + /* + Check if error code is overwritten, in this case the original + failure cause will be lost. E.g. if 4350 error is given. So + push a warning so that it can be detected which is the root + error cause. + */ + if (thd_ndb->m_query_id == thd->query_id && + thd_ndb->m_error_code != 0 && + thd_ndb->m_error_code != err.code) { - const NDBTAB *tab= dict->getTable(m_tabname); - if (!tab) - DBUG_VOID_RETURN; - if (tab->getObjectStatus() == NdbDictionary::Object::Invalid) - { - // Global cache has already been invalidated - dict->removeCachedTable(m_tabname); - global= FALSE; - } - else - dict->invalidateTable(m_tabname); - } - else - dict->removeCachedTable(m_tabname); - table->s->version=0L; /* Free when thread is ready */ - /* Invalidate indexes */ - for (uint i= 0; i < table->s->keys; i++) - { - NDBINDEX *index = (NDBINDEX *) m_index[i].index; - NDBINDEX *unique_index = (NDBINDEX *) m_index[i].unique_index; - NDB_INDEX_TYPE idx_type= m_index[i].type; - - switch (idx_type) { - case PRIMARY_KEY_ORDERED_INDEX: - case ORDERED_INDEX: - if (global) - dict->invalidateIndex(index->getName(), m_tabname); - else - dict->removeCachedIndex(index->getName(), m_tabname); - break; - case UNIQUE_ORDERED_INDEX: - if (global) - dict->invalidateIndex(index->getName(), m_tabname); - else - dict->removeCachedIndex(index->getName(), m_tabname); - case UNIQUE_INDEX: - if (global) - dict->invalidateIndex(unique_index->getName(), m_tabname); - else - dict->removeCachedIndex(unique_index->getName(), m_tabname); - break; - case PRIMARY_KEY_INDEX: - case UNDEFINED_INDEX: - break; - } + char buf[FN_REFLEN]; + ndb_error_string(thd_ndb->m_error_code, buf, sizeof(buf)); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + thd_ndb->m_error_code, buf, "NDB"); } +#endif + thd_ndb->m_query_id= thd->query_id; + thd_ndb->m_error_code= err.code; DBUG_VOID_RETURN; } int ha_ndbcluster::ndb_err(NdbTransaction *trans) { + THD *thd= current_thd; int res; NdbError err= trans->getNdbError(); DBUG_ENTER("ndb_err"); - ERR_PRINT(err); + set_ndb_err(thd, err); + switch (err.classification) { case NdbError::SchemaError: { + // TODO perhaps we need to do more here, invalidate also in the cache + m_table->setStatusInvalid(); /* Close other open handlers not used by any thread */ TABLE_LIST table_list; bzero((char*) &table_list,sizeof(table_list)); table_list.db= m_dbname; table_list.alias= table_list.table_name= m_tabname; - close_cached_tables(current_thd, 0, &table_list); - - invalidate_dictionary_cache(TRUE); - - if (err.code==284) - { - /* - Check if the table is _really_ gone or if the table has - been alterend and thus changed table id - */ - NDBDICT *dict= get_ndb()->getDictionary(); - DBUG_PRINT("info", ("Check if table %s is really gone", m_tabname)); - if (!(dict->getTable(m_tabname))) - { - err= dict->getNdbError(); - DBUG_PRINT("info", ("Table not found, error: %d", err.code)); - if (err.code != 709) - DBUG_RETURN(1); - } - DBUG_PRINT("info", ("Table exists but must have changed")); - } + close_cached_tables(thd, 0, &table_list); break; } default: @@ -551,8 +599,7 @@ int ha_ndbcluster::ndb_err(NdbTransaction *trans) const NDBINDEX *unique_index= (const NDBINDEX *) m_index[i].unique_index; if (unique_index && - unique_index->getIndexTable() && - (char *) unique_index->getIndexTable()->getTableId() == error_data) + (char *) unique_index->getObjectId() == (int) error_data) { dupkey= i; break; @@ -566,7 +613,7 @@ int ha_ndbcluster::ndb_err(NdbTransaction *trans) violations here, so we need to return MAX_KEY for non-primary to signal that key is unknown */ - m_dupkey= err.code == 630 ? table->s->primary_key : dupkey; + m_dupkey= err.code == 630 ? table_share->primary_key : dupkey; } else { @@ -589,7 +636,7 @@ bool ha_ndbcluster::get_error_message(int error, DBUG_ENTER("ha_ndbcluster::get_error_message"); DBUG_PRINT("enter", ("error: %d", error)); - Ndb *ndb= get_ndb(); + Ndb *ndb= check_ndb_in_thd(current_thd); if (!ndb) DBUG_RETURN(FALSE); @@ -645,15 +692,34 @@ static bool ndb_supported_type(enum_field_types type) /* + Check if MySQL field type forces var part in ndb storage +*/ +static bool field_type_forces_var_part(enum_field_types type) +{ + switch (type) { + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_VARCHAR: + return TRUE; + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_GEOMETRY: + return FALSE; + default: + return FALSE; + } +} + +/* Instruct NDB to set the value of the hidden primary key */ bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op, - uint fieldnr, const byte *field_ptr) + uint fieldnr, const uchar *field_ptr) { DBUG_ENTER("set_hidden_key"); - DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr, - NDB_HIDDEN_PRIMARY_KEY_LENGTH) != 0); + DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr) != 0); } @@ -662,14 +728,14 @@ bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op, */ int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field, - uint fieldnr, const byte *field_ptr) + uint fieldnr, const uchar *field_ptr) { uint32 pack_len= field->pack_length(); DBUG_ENTER("set_ndb_key"); DBUG_PRINT("enter", ("%d: %s, ndb_type: %u, len=%d", fieldnr, field->field_name, field->type(), pack_len)); - DBUG_DUMP("key", (char*)field_ptr, pack_len); + DBUG_DUMP("key", field_ptr, pack_len); DBUG_ASSERT(ndb_supported_type(field->type())); DBUG_ASSERT(! (field->flags & BLOB_FLAG)); @@ -683,15 +749,16 @@ int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field, */ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, - uint fieldnr, bool *set_blob_value) + uint fieldnr, int row_offset, + bool *set_blob_value) { - const byte* field_ptr= field->ptr; - uint32 pack_len= field->pack_length(); + const uchar* field_ptr= field->ptr + row_offset; + uint32 pack_len= field->pack_length(); DBUG_ENTER("set_ndb_value"); - DBUG_PRINT("enter", ("%d: %s, type: %u, len=%d, is_null=%s", + DBUG_PRINT("enter", ("%d: %s type: %u len=%d is_null=%s", fieldnr, field->field_name, field->type(), - pack_len, field->is_null()?"Y":"N")); - DBUG_DUMP("value", (char*) field_ptr, pack_len); + pack_len, field->is_null(row_offset) ? "Y" : "N")); + DBUG_DUMP("value", field_ptr, pack_len); DBUG_ASSERT(ndb_supported_type(field->type())); { @@ -700,8 +767,8 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, if (pack_len == 0) { pack_len= sizeof(empty_field); - field_ptr= (byte *)&empty_field; - if (field->is_null()) + field_ptr= (uchar *)&empty_field; + if (field->is_null(row_offset)) empty_field= 0; else empty_field= 1; @@ -710,13 +777,14 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, { if (field->type() != MYSQL_TYPE_BIT) { - if (field->is_null()) + if (field->is_null(row_offset)) + { + DBUG_PRINT("info", ("field is NULL")); // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, - (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); + } // Common implementation for most field types - DBUG_RETURN(ndb_op->setValue(fieldnr, - (char*)field_ptr, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)field_ptr) != 0); } else // if (field->type() == MYSQL_TYPE_BIT) { @@ -725,42 +793,42 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, // Round up bit field length to nearest word boundry pack_len= ((pack_len + 3) >> 2) << 2; DBUG_ASSERT(pack_len <= 8); - if (field->is_null()) + if (field->is_null(row_offset)) // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); DBUG_PRINT("info", ("bit field")); - DBUG_DUMP("value", (char*)&bits, pack_len); + DBUG_DUMP("value", (uchar*)&bits, pack_len); #ifdef WORDS_BIGENDIAN /* store lsw first */ bits = ((bits >> 32) & 0x00000000FFFFFFFFLL) | ((bits << 32) & 0xFFFFFFFF00000000LL); #endif - DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits) != 0); } } // Blob type NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr); if (ndb_blob != NULL) { - if (field->is_null()) + if (field->is_null(row_offset)) DBUG_RETURN(ndb_blob->setNull() != 0); Field_blob *field_blob= (Field_blob*)field; // Get length and pointer to data uint32 blob_len= field_blob->get_length(field_ptr); - char* blob_ptr= NULL; + uchar* blob_ptr= NULL; field_blob->get_ptr(&blob_ptr); // Looks like NULL ptr signals length 0 blob if (blob_ptr == NULL) { DBUG_ASSERT(blob_len == 0); - blob_ptr= (char*)""; + blob_ptr= (uchar*)""; } - DBUG_PRINT("value", ("set blob ptr: %p len: %u", - blob_ptr, blob_len)); - DBUG_DUMP("value", (char*)blob_ptr, min(blob_len, 26)); + DBUG_PRINT("value", ("set blob ptr: 0x%lx len: %u", + (long) blob_ptr, blob_len)); + DBUG_DUMP("value", blob_ptr, min(blob_len, 26)); if (set_blob_value) *set_blob_value= TRUE; @@ -792,11 +860,20 @@ int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg) if (ndb_blob->blobsNextBlob() != NULL) DBUG_RETURN(0); ha_ndbcluster *ha= (ha_ndbcluster *)arg; - DBUG_RETURN(ha->get_ndb_blobs_value(ndb_blob, ha->m_blobs_offset)); + int ret= get_ndb_blobs_value(ha->table, ha->m_value, + ha->m_blobs_buffer, ha->m_blobs_buffer_size, + ha->m_blobs_offset); + DBUG_RETURN(ret); } -int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob, - my_ptrdiff_t ptrdiff) +/* + This routine is shared by injector. There is no common blobs buffer + so the buffer and length are passed by reference. Injector also + passes a record pointer diff. + */ +int get_ndb_blobs_value(TABLE* table, NdbValue* value_array, + uchar*& buffer, uint& buffer_size, + my_ptrdiff_t ptrdiff) { DBUG_ENTER("get_ndb_blobs_value"); @@ -808,48 +885,63 @@ int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob, for (uint i= 0; i < table->s->fields; i++) { Field *field= table->field[i]; - NdbValue value= m_value[i]; - if (value.ptr != NULL && (field->flags & BLOB_FLAG)) + NdbValue value= value_array[i]; + if (! (field->flags & BLOB_FLAG)) + continue; + if (value.blob == NULL) { - Field_blob *field_blob= (Field_blob *)field; - NdbBlob *ndb_blob= value.blob; - Uint64 blob_len= 0; - if (ndb_blob->getLength(blob_len) != 0) - DBUG_RETURN(-1); + DBUG_PRINT("info",("[%u] skipped", i)); + continue; + } + Field_blob *field_blob= (Field_blob *)field; + NdbBlob *ndb_blob= value.blob; + int isNull; + if (ndb_blob->getNull(isNull) != 0) + ERR_RETURN(ndb_blob->getNdbError()); + if (isNull == 0) { + Uint64 len64= 0; + if (ndb_blob->getLength(len64) != 0) + ERR_RETURN(ndb_blob->getNdbError()); // Align to Uint64 - uint32 blob_size= blob_len; - if (blob_size % 8 != 0) - blob_size+= 8 - blob_size % 8; + uint32 size= len64; + if (size % 8 != 0) + size+= 8 - size % 8; if (loop == 1) { - char *buf= m_blobs_buffer + offset; + uchar *buf= buffer + offset; uint32 len= 0xffffffff; // Max uint32 - DBUG_PRINT("value", ("read blob ptr: 0x%lx len: %u", - (long)buf, (uint)blob_len)); if (ndb_blob->readData(buf, len) != 0) - DBUG_RETURN(-1); - DBUG_ASSERT(len == blob_len); + ERR_RETURN(ndb_blob->getNdbError()); + DBUG_PRINT("info", ("[%u] offset: %u buf: 0x%lx len=%u [ptrdiff=%d]", + i, offset, (long) buf, len, (int)ptrdiff)); + DBUG_ASSERT(len == len64); // Ugly hack assumes only ptr needs to be changed - field_blob->ptr+= ptrdiff; - field_blob->set_ptr(len, buf); - field_blob->ptr-= ptrdiff; + field_blob->set_ptr_offset(ptrdiff, len, buf); } - offset+= blob_size; + offset+= size; + } + else if (loop == 1) // undefined or null + { + // have to set length even in this case + uchar *buf= buffer + offset; // or maybe NULL + uint32 len= 0; + field_blob->set_ptr_offset(ptrdiff, len, buf); + DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull)); } } - if (loop == 0 && offset > m_blobs_buffer_size) + if (loop == 0 && offset > buffer_size) { - my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR)); - m_blobs_buffer_size= 0; - DBUG_PRINT("value", ("allocate blobs buffer size %u", offset)); - m_blobs_buffer= my_malloc(offset, MYF(MY_WME)); - if (m_blobs_buffer == NULL) + my_free(buffer, MYF(MY_ALLOW_ZERO_PTR)); + buffer_size= 0; + DBUG_PRINT("info", ("allocate blobs buffer size %u", offset)); + buffer= (uchar*) my_malloc(offset, MYF(MY_WME)); + if (buffer == NULL) { sql_print_error("ha_ndbcluster::get_ndb_blobs_value: " "my_malloc(%u) failed", offset); DBUG_RETURN(-1); } - m_blobs_buffer_size= offset; + buffer_size= offset; } } DBUG_RETURN(0); @@ -863,7 +955,7 @@ int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob, */ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, - uint fieldnr, byte* buf) + uint fieldnr, uchar* buf) { DBUG_ENTER("get_ndb_value"); DBUG_PRINT("enter", ("fieldnr: %d flags: %o", fieldnr, @@ -878,13 +970,13 @@ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, { if (field->type() != MYSQL_TYPE_BIT) { - byte *field_buf; + uchar *field_buf; if (field->pack_length() != 0) field_buf= buf + (field->ptr - table->record[0]); else - field_buf= (byte *)&dummy_buf; + field_buf= (uchar *)&dummy_buf; m_value[fieldnr].rec= ndb_op->getValue(fieldnr, - field_buf); + (char*) field_buf); } else // if (field->type() == MYSQL_TYPE_BIT) { @@ -899,7 +991,7 @@ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, if (ndb_blob != NULL) { // Set callback - m_blobs_offset= buf - (byte*) table->record[0]; + m_blobs_offset= buf - (uchar*) table->record[0]; void *arg= (void *)this; DBUG_RETURN(ndb_blob->setActiveHook(g_get_ndb_blobs_value, arg) != 0); } @@ -907,34 +999,39 @@ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, } // Used for hidden key only - m_value[fieldnr].rec= ndb_op->getValue(fieldnr, m_ref); + m_value[fieldnr].rec= ndb_op->getValue(fieldnr, (char*) m_ref); DBUG_RETURN(m_value[fieldnr].rec == NULL); } +/* + Instruct NDB to fetch the partition id (fragment id) +*/ +int ha_ndbcluster::get_ndb_partition_id(NdbOperation *ndb_op) +{ + DBUG_ENTER("get_ndb_partition_id"); + DBUG_RETURN(ndb_op->getValue(NdbDictionary::Column::FRAGMENT, + (char *)&m_part_id) == NULL); +} /* Check if any set or get of blob value in current query. */ -bool ha_ndbcluster::uses_blob_value(bool all_fields) + +bool ha_ndbcluster::uses_blob_value() { - if (table->s->blob_fields == 0) + MY_BITMAP *bitmap; + uint *blob_index, *blob_index_end; + if (table_share->blob_fields == 0) return FALSE; - if (all_fields) - return TRUE; + + bitmap= m_write_op ? table->write_set : table->read_set; + blob_index= table_share->blob_field; + blob_index_end= blob_index + table_share->blob_fields; + do { - uint no_fields= table->s->fields; - int i; - THD *thd= current_thd; - // They always put blobs at the end.. - for (i= no_fields - 1; i >= 0; i--) - { - Field *field= table->field[i]; - if (thd->query_id == field->query_id) - { - return TRUE; - } - } - } + if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index)) + return TRUE; + } while (++blob_index != blob_index_end); return FALSE; } @@ -945,82 +1042,84 @@ bool ha_ndbcluster::uses_blob_value(bool all_fields) IMPLEMENTATION - check that frm-file on disk is equal to frm-file of table accessed in NDB + + RETURN + 0 ok + -2 Meta data has changed; Re-read data and try again */ +int cmp_frm(const NDBTAB *ndbtab, const void *pack_data, + uint pack_length) +{ + DBUG_ENTER("cmp_frm"); + /* + Compare FrmData in NDB with frm file from disk. + */ + if ((pack_length != ndbtab->getFrmLength()) || + (memcmp(pack_data, ndbtab->getFrmData(), pack_length))) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + int ha_ndbcluster::get_metadata(const char *path) { Ndb *ndb= get_ndb(); NDBDICT *dict= ndb->getDictionary(); const NDBTAB *tab; int error; - bool invalidating_ndb_table= FALSE; - DBUG_ENTER("get_metadata"); DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path)); - do { - const void *data= NULL, *pack_data= NULL; - uint length, pack_length; + DBUG_ASSERT(m_table == NULL); + DBUG_ASSERT(m_table_info == NULL); - if (!(tab= dict->getTable(m_tabname))) - ERR_RETURN(dict->getNdbError()); - // Check if thread has stale local cache - if (tab->getObjectStatus() == NdbDictionary::Object::Invalid) - { - invalidate_dictionary_cache(FALSE); - if (!(tab= dict->getTable(m_tabname))) - ERR_RETURN(dict->getNdbError()); - DBUG_PRINT("info", ("Table schema version: %d", tab->getObjectVersion())); - } - /* - Compare FrmData in NDB with frm file from disk. - */ - error= 0; - if (readfrm(path, &data, &length) || - packfrm(data, length, &pack_data, &pack_length)) - { - my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); - my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR)); - DBUG_RETURN(1); - } + uchar *data= NULL, *pack_data= NULL; + size_t length, pack_length; + + /* + Compare FrmData in NDB with frm file from disk. + */ + error= 0; + if (readfrm(path, &data, &length) || + packfrm(data, length, &pack_data, &pack_length)) + { + my_free(data, MYF(MY_ALLOW_ZERO_PTR)); + my_free(pack_data, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(1); + } - if ((pack_length != tab->getFrmLength()) || - (memcmp(pack_data, tab->getFrmData(), pack_length))) - { - if (!invalidating_ndb_table) - { - DBUG_PRINT("info", ("Invalidating table")); - invalidate_dictionary_cache(TRUE); - invalidating_ndb_table= TRUE; - } - else - { - DBUG_PRINT("error", - ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", - pack_length, tab->getFrmLength(), - memcmp(pack_data, tab->getFrmData(), pack_length))); - DBUG_DUMP("pack_data", (char*)pack_data, pack_length); - DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength()); - error= 3; - invalidating_ndb_table= FALSE; - } - } - else - { - invalidating_ndb_table= FALSE; - } - my_free((char*)data, MYF(0)); - my_free((char*)pack_data, MYF(0)); - } while (invalidating_ndb_table); + Ndb_table_guard ndbtab_g(dict, m_tabname); + if (!(tab= ndbtab_g.get_table())) + ERR_RETURN(dict->getNdbError()); + + if (get_ndb_share_state(m_share) != NSS_ALTERED + && cmp_frm(tab, pack_data, pack_length)) + { + DBUG_PRINT("error", + ("metadata, pack_length: %lu getFrmLength: %d memcmp: %d", + (ulong) pack_length, tab->getFrmLength(), + memcmp(pack_data, tab->getFrmData(), pack_length))); + DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length); + DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength()); + error= HA_ERR_TABLE_DEF_CHANGED; + } + my_free((char*)data, MYF(0)); + my_free((char*)pack_data, MYF(0)); if (error) - DBUG_RETURN(error); - - m_table_version= tab->getObjectVersion(); - m_table= (void *)tab; - m_table_info= NULL; // Set in external lock - - DBUG_RETURN(build_index_list(ndb, table, ILBP_OPEN)); + goto err; + + DBUG_PRINT("info", ("fetched table %s", tab->getName())); + m_table= tab; + if ((error= open_indexes(ndb, table, FALSE)) == 0) + { + ndbtab_g.release(); + DBUG_RETURN(0); + } +err: + ndbtab_g.invalidate(); + m_table= NULL; + DBUG_RETURN(error); } static int fix_unique_index_attr_order(NDB_INDEX_DATA &data, @@ -1032,7 +1131,7 @@ static int fix_unique_index_attr_order(NDB_INDEX_DATA &data, if (data.unique_index_attrid_map) my_free((char*)data.unique_index_attrid_map, MYF(0)); - data.unique_index_attrid_map= (unsigned char*)my_malloc(sz,MYF(MY_WME)); + data.unique_index_attrid_map= (uchar*)my_malloc(sz,MYF(MY_WME)); if (data.unique_index_attrid_map == 0) { sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure", @@ -1063,132 +1162,312 @@ static int fix_unique_index_attr_order(NDB_INDEX_DATA &data, DBUG_RETURN(0); } - - -int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase) +/* + Create all the indexes for a table. + If any index should fail to be created, + the error is returned immediately +*/ +int ha_ndbcluster::create_indexes(Ndb *ndb, TABLE *tab) { uint i; int error= 0; const char *index_name; - char unique_index_name[FN_LEN]; - bool null_in_unique_index= false; - static const char* unique_suffix= "$unique"; KEY* key_info= tab->key_info; const char **key_name= tab->s->keynames.type_names; - NDBDICT *dict= ndb->getDictionary(); - DBUG_ENTER("ha_ndbcluster::build_index_list"); - - m_has_unique_index= FALSE; - // Save information about all known indexes + DBUG_ENTER("ha_ndbcluster::create_indexes"); + for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) { index_name= *key_name; NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); - m_index[i].type= idx_type; - if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + error= create_index(index_name, key_info, idx_type, i); + if (error) { - m_has_unique_index= TRUE; - strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS); - DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d", - unique_index_name, i)); + DBUG_PRINT("error", ("Failed to create index %u", i)); + break; } - // Create secondary indexes if in create phase - if (phase == ILBP_CREATE) + } + + DBUG_RETURN(error); +} + +static void ndb_init_index(NDB_INDEX_DATA &data) +{ + data.type= UNDEFINED_INDEX; + data.status= UNDEFINED; + data.unique_index= NULL; + data.index= NULL; + data.unique_index_attrid_map= NULL; + data.index_stat=NULL; + data.index_stat_cache_entries=0; + data.index_stat_update_freq=0; + data.index_stat_query_count=0; +} + +static void ndb_clear_index(NDB_INDEX_DATA &data) +{ + if (data.unique_index_attrid_map) + { + my_free((char*)data.unique_index_attrid_map, MYF(0)); + } + if (data.index_stat) + { + delete data.index_stat; + } + ndb_init_index(data); +} + +/* + Associate a direct reference to an index handle + with an index (for faster access) + */ +int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info, + const char *index_name, uint index_no) +{ + int error= 0; + NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no); + m_index[index_no].type= idx_type; + DBUG_ENTER("ha_ndbcluster::add_index_handle"); + DBUG_PRINT("enter", ("table %s", m_tabname)); + + if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX) + { + DBUG_PRINT("info", ("Get handle to index %s", index_name)); + const NDBINDEX *index; + do { - DBUG_PRINT("info", ("Creating index %u: %s", i, index_name)); - switch (idx_type){ - - case PRIMARY_KEY_INDEX: - // Do nothing, already created - break; - case PRIMARY_KEY_ORDERED_INDEX: - error= create_ordered_index(index_name, key_info); - break; - case UNIQUE_ORDERED_INDEX: - if (!(error= create_ordered_index(index_name, key_info))) - error= create_unique_index(unique_index_name, key_info); - break; - case UNIQUE_INDEX: - if (check_index_fields_not_null(i)) - { - push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_NULL_COLUMN_IN_INDEX, - "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan"); - null_in_unique_index= true; - } - error= create_unique_index(unique_index_name, key_info); - break; - case ORDERED_INDEX: - if (key_info->algorithm == HA_KEY_ALG_HASH) - { - push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, - ER_UNSUPPORTED_EXTENSION, - ER(ER_UNSUPPORTED_EXTENSION), - "Ndb does not support non-unique " - "hash based indexes"); - error= HA_ERR_UNSUPPORTED; - break; - } - error= create_ordered_index(index_name, key_info); - break; - default: - DBUG_ASSERT(FALSE); + index= dict->getIndexGlobal(index_name, *m_table); + if (!index) + ERR_RETURN(dict->getNdbError()); + DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d", + (long) index, + index->getObjectId(), + index->getObjectVersion() & 0xFFFFFF, + index->getObjectVersion() >> 24, + index->getObjectStatus())); + DBUG_ASSERT(index->getObjectStatus() == + NdbDictionary::Object::Retrieved); + break; + } while (1); + m_index[index_no].index= index; + // ordered index - add stats + NDB_INDEX_DATA& d=m_index[index_no]; + delete d.index_stat; + d.index_stat=NULL; + if (thd->variables.ndb_index_stat_enable) + { + d.index_stat=new NdbIndexStat(index); + d.index_stat_cache_entries=thd->variables.ndb_index_stat_cache_entries; + d.index_stat_update_freq=thd->variables.ndb_index_stat_update_freq; + d.index_stat_query_count=0; + d.index_stat->alloc_cache(d.index_stat_cache_entries); + DBUG_PRINT("info", ("index %s stat=on cache_entries=%u update_freq=%u", + index->getName(), + d.index_stat_cache_entries, + d.index_stat_update_freq)); + } else + { + DBUG_PRINT("info", ("index %s stat=off", index->getName())); + } + } + if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + { + char unique_index_name[FN_LEN]; + static const char* unique_suffix= "$unique"; + m_has_unique_index= TRUE; + strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS); + DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name)); + const NDBINDEX *index; + do + { + index= dict->getIndexGlobal(unique_index_name, *m_table); + if (!index) + ERR_RETURN(dict->getNdbError()); + DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d", + (long) index, + index->getObjectId(), + index->getObjectVersion() & 0xFFFFFF, + index->getObjectVersion() >> 24, + index->getObjectStatus())); + DBUG_ASSERT(index->getObjectStatus() == + NdbDictionary::Object::Retrieved); + break; + } while (1); + m_index[index_no].unique_index= index; + error= fix_unique_index_attr_order(m_index[index_no], index, key_info); + } + if (!error) + m_index[index_no].status= ACTIVE; + + DBUG_RETURN(error); +} + +/* + Associate index handles for each index of a table +*/ +int ha_ndbcluster::open_indexes(Ndb *ndb, TABLE *tab, bool ignore_error) +{ + uint i; + int error= 0; + THD *thd=current_thd; + NDBDICT *dict= ndb->getDictionary(); + KEY* key_info= tab->key_info; + const char **key_name= tab->s->keynames.type_names; + DBUG_ENTER("ha_ndbcluster::open_indexes"); + m_has_unique_index= FALSE; + for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) + { + if ((error= add_index_handle(thd, dict, key_info, *key_name, i))) + if (ignore_error) + m_index[i].index= m_index[i].unique_index= NULL; + else break; + m_index[i].null_in_unique_index= FALSE; + if (check_index_fields_not_null(key_info)) + m_index[i].null_in_unique_index= TRUE; + } + + if (error && !ignore_error) + { + while (i > 0) + { + i--; + if (m_index[i].index) + { + dict->removeIndexGlobal(*m_index[i].index, 1); + m_index[i].index= NULL; } - if (error) + if (m_index[i].unique_index) { - DBUG_PRINT("error", ("Failed to create index %u", i)); - drop_table(); - break; + dict->removeIndexGlobal(*m_index[i].unique_index, 1); + m_index[i].unique_index= NULL; } } - // Add handles to index objects - if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX) - { - DBUG_PRINT("info", ("Get handle to index %s", index_name)); - const NDBINDEX *index= dict->getIndex(index_name, m_tabname); - if (!index) - ERR_RETURN(dict->getNdbError()); - m_index[i].index= (void *) index; + } + + DBUG_ASSERT(error == 0 || error == 4243); + + DBUG_RETURN(error); +} + +/* + Renumber indexes in index list by shifting out + indexes that are to be dropped + */ +void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab) +{ + uint i; + const char *index_name; + KEY* key_info= tab->key_info; + const char **key_name= tab->s->keynames.type_names; + DBUG_ENTER("ha_ndbcluster::renumber_indexes"); + + for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) + { + index_name= *key_name; + NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); + m_index[i].type= idx_type; + if (m_index[i].status == TO_BE_DROPPED) + { + DBUG_PRINT("info", ("Shifting index %s(%i) out of the list", + index_name, i)); + NDB_INDEX_DATA tmp; + uint j= i + 1; + // Shift index out of list + while(j != MAX_KEY && m_index[j].status != UNDEFINED) + { + tmp= m_index[j - 1]; + m_index[j - 1]= m_index[j]; + m_index[j]= tmp; + j++; + } } - if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + } + + DBUG_VOID_RETURN; +} + +/* + Drop all indexes that are marked for deletion +*/ +int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab) +{ + uint i; + int error= 0; + const char *index_name; + KEY* key_info= tab->key_info; + NDBDICT *dict= ndb->getDictionary(); + DBUG_ENTER("ha_ndbcluster::drop_indexes"); + + for (i= 0; i < tab->s->keys; i++, key_info++) + { + NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); + m_index[i].type= idx_type; + if (m_index[i].status == TO_BE_DROPPED) { - DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name)); - const NDBINDEX *index= dict->getIndex(unique_index_name, m_tabname); - if (!index) - ERR_RETURN(dict->getNdbError()); - m_index[i].unique_index= (void *) index; - error= fix_unique_index_attr_order(m_index[i], index, key_info); + const NdbDictionary::Index *index= m_index[i].index; + const NdbDictionary::Index *unique_index= m_index[i].unique_index; + + if (index) + { + index_name= index->getName(); + DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name)); + // Drop ordered index from ndb + error= dict->dropIndexGlobal(*index); + if (!error) + { + dict->removeIndexGlobal(*index, 1); + m_index[i].index= NULL; + } + } + if (!error && unique_index) + { + index_name= unique_index->getName(); + DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name)); + // Drop unique index from ndb + error= dict->dropIndexGlobal(*unique_index); + if (!error) + { + dict->removeIndexGlobal(*unique_index, 1); + m_index[i].unique_index= NULL; + } + } + if (error) + DBUG_RETURN(error); + ndb_clear_index(m_index[i]); + continue; } - if (idx_type == UNIQUE_INDEX && - phase != ILBP_CREATE && - check_index_fields_not_null(i)) - null_in_unique_index= true; - m_index[i].null_in_unique_index= null_in_unique_index; } DBUG_RETURN(error); } - /* Decode the type of an index from information provided in table object */ NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const { - bool is_hash_index= (table->key_info[inx].algorithm == HA_KEY_ALG_HASH); - if (inx == table->s->primary_key) - return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX; + return get_index_type_from_key(inx, table_share->key_info, + inx == table_share->primary_key); +} - return ((table->key_info[inx].flags & HA_NOSAME) ? +NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx, + KEY *key_info, + bool primary) const +{ + bool is_hash_index= (key_info[inx].algorithm == + HA_KEY_ALG_HASH); + if (primary) + return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX; + + return ((key_info[inx].flags & HA_NOSAME) ? (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) : ORDERED_INDEX); } -bool ha_ndbcluster::check_index_fields_not_null(uint inx) +bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info) { - KEY* key_info= table->key_info + inx; KEY_PART_INFO* key_part= key_info->key_part; KEY_PART_INFO* end= key_part+key_info->key_parts; DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null"); @@ -1197,56 +1476,63 @@ bool ha_ndbcluster::check_index_fields_not_null(uint inx) { Field* field= key_part->field; if (field->maybe_null()) - DBUG_RETURN(true); + DBUG_RETURN(TRUE); } - DBUG_RETURN(false); + DBUG_RETURN(FALSE); } -void ha_ndbcluster::release_metadata() +void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb) { uint i; DBUG_ENTER("release_metadata"); DBUG_PRINT("enter", ("m_tabname: %s", m_tabname)); - m_table= NULL; + NDBDICT *dict= ndb->getDictionary(); + int invalidate_indexes= 0; + if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH) + { + invalidate_indexes = 1; + } + if (m_table != NULL) + { + if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid) + invalidate_indexes= 1; + dict->removeTableGlobal(*m_table, invalidate_indexes); + } + // TODO investigate + DBUG_ASSERT(m_table_info == NULL); m_table_info= NULL; // Release index list for (i= 0; i < MAX_KEY; i++) { - m_index[i].unique_index= NULL; - m_index[i].index= NULL; - if (m_index[i].unique_index_attrid_map) + if (m_index[i].unique_index) + { + DBUG_ASSERT(m_table != NULL); + dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes); + } + if (m_index[i].index) { - my_free((char *)m_index[i].unique_index_attrid_map, MYF(0)); - m_index[i].unique_index_attrid_map= NULL; + DBUG_ASSERT(m_table != NULL); + dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes); } + ndb_clear_index(m_index[i]); } + m_table= NULL; DBUG_VOID_RETURN; } int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type) { - DBUG_ENTER("ha_ndbcluster::get_ndb_lock_type"); if (type >= TL_WRITE_ALLOW_WRITE) - { - DBUG_PRINT("info", ("Using exclusive lock")); - DBUG_RETURN(NdbOperation::LM_Exclusive); - } - else if (type == TL_READ_WITH_SHARED_LOCKS || - uses_blob_value(m_retrieve_all_fields)) - { - DBUG_PRINT("info", ("Using read lock")); - DBUG_RETURN(NdbOperation::LM_Read); - } - else - { - DBUG_PRINT("info", ("Using committed read")); - DBUG_RETURN(NdbOperation::LM_CommittedRead); - } + return NdbOperation::LM_Exclusive; + if (type == TL_READ_WITH_SHARED_LOCKS || + uses_blob_value()) + return NdbOperation::LM_Read; + return NdbOperation::LM_CommittedRead; } static const ulong index_type_flags[]= @@ -1311,13 +1597,13 @@ inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part, bool all_parts) const { DBUG_ENTER("ha_ndbcluster::index_flags"); - DBUG_PRINT("info", ("idx_no: %d", idx_no)); + DBUG_PRINT("enter", ("idx_no: %u", idx_no)); DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size); DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | HA_KEY_SCAN_NOT_ROR); } -static void shrink_varchar(Field* field, const byte* & ptr, char* buf) +static void shrink_varchar(Field* field, const uchar* & ptr, uchar* buf) { if (field->type() == MYSQL_TYPE_VARCHAR && ptr != NULL) { Field_varstring* f= (Field_varstring*)field; @@ -1336,9 +1622,9 @@ static void shrink_varchar(Field* field, const byte* & ptr, char* buf) } } -int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key) +int ha_ndbcluster::set_primary_key(NdbOperation *op, const uchar *key) { - KEY* key_info= table->key_info + table->s->primary_key; + KEY* key_info= table->key_info + table_share->primary_key; KEY_PART_INFO* key_part= key_info->key_part; KEY_PART_INFO* end= key_part+key_info->key_parts; DBUG_ENTER("set_primary_key"); @@ -1346,8 +1632,8 @@ int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key) for (; key_part != end; key_part++) { Field* field= key_part->field; - const byte* ptr= key; - char buf[256]; + const uchar* ptr= key; + uchar buf[256]; shrink_varchar(field, ptr, buf); if (set_ndb_key(op, field, key_part->fieldnr-1, ptr)) @@ -1358,9 +1644,9 @@ int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key) } -int ha_ndbcluster::set_primary_key_from_record(NdbOperation *op, const byte *record) +int ha_ndbcluster::set_primary_key_from_record(NdbOperation *op, const uchar *record) { - KEY* key_info= table->key_info + table->s->primary_key; + KEY* key_info= table->key_info + table_share->primary_key; KEY_PART_INFO* key_part= key_info->key_part; KEY_PART_INFO* end= key_part+key_info->key_parts; DBUG_ENTER("set_primary_key_from_record"); @@ -1383,14 +1669,10 @@ bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno) uint i; DBUG_ENTER("check_index_fields_in_write_set"); - if (m_retrieve_all_fields) - { - DBUG_RETURN(true); - } for (i= 0; key_part != end; key_part++, i++) { Field* field= key_part->field; - if (field->query_id != current_thd->query_id) + if (!bitmap_is_set(table->write_set, field->field_index)) { DBUG_RETURN(false); } @@ -1399,7 +1681,8 @@ bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno) DBUG_RETURN(true); } -int ha_ndbcluster::set_index_key_from_record(NdbOperation *op, const byte *record, uint keyno) +int ha_ndbcluster::set_index_key_from_record(NdbOperation *op, + const uchar *record, uint keyno) { KEY* key_info= table->key_info + keyno; KEY_PART_INFO* key_part= key_info->key_part; @@ -1420,7 +1703,7 @@ int ha_ndbcluster::set_index_key_from_record(NdbOperation *op, const byte *recor int ha_ndbcluster::set_index_key(NdbOperation *op, const KEY *key_info, - const byte * key_ptr) + const uchar * key_ptr) { DBUG_ENTER("ha_ndbcluster::set_index_key"); uint i; @@ -1430,8 +1713,8 @@ ha_ndbcluster::set_index_key(NdbOperation *op, for (i= 0; key_part != end; key_part++, i++) { Field* field= key_part->field; - const byte* ptr= key_part->null_bit ? key_ptr + 1 : key_ptr; - char buf[256]; + const uchar* ptr= key_part->null_bit ? key_ptr + 1 : key_ptr; + uchar buf[256]; shrink_varchar(field, ptr, buf); if (set_ndb_key(op, field, m_index[active_index].unique_index_attrid_map[i], ptr)) ERR_RETURN(m_active_trans->getNdbError()); @@ -1441,35 +1724,32 @@ ha_ndbcluster::set_index_key(NdbOperation *op, } inline -int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) +int ha_ndbcluster::define_read_attrs(uchar* buf, NdbOperation* op) { uint i; - THD *thd= current_thd; - DBUG_ENTER("define_read_attrs"); // Define attributes to read - for (i= 0; i < table->s->fields; i++) + for (i= 0; i < table_share->fields; i++) { Field *field= table->field[i]; - if ((thd->query_id == field->query_id) || - ((field->flags & PRI_KEY_FLAG)) || - m_retrieve_all_fields) + if (bitmap_is_set(table->read_set, i) || + ((field->flags & PRI_KEY_FLAG))) { if (get_ndb_value(op, field, i, buf)) ERR_RETURN(op->getNdbError()); } - else + else { m_value[i].ptr= NULL; } } - if (table->s->primary_key == MAX_KEY) + if (table_share->primary_key == MAX_KEY) { DBUG_PRINT("info", ("Getting hidden key")); // Scanning table with no primary key - int hidden_no= table->s->fields; + int hidden_no= table_share->fields; #ifndef DBUG_OFF const NDBTAB *tab= (const NDBTAB *) m_table; if (!tab->getColumn(hidden_no)) @@ -1481,20 +1761,23 @@ int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) DBUG_RETURN(0); } + /* Read one record from NDB using primary key */ -int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) +int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf, + uint32 part_id) { - uint no_fields= table->s->fields; + uint no_fields= table_share->fields; NdbConnection *trans= m_active_trans; NdbOperation *op; int res; DBUG_ENTER("pk_read"); DBUG_PRINT("enter", ("key_len: %u", key_len)); - DBUG_DUMP("key", (char*)key, key_len); + DBUG_DUMP("key", key, key_len); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1502,11 +1785,11 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) op->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); - if (table->s->primary_key == MAX_KEY) + if (table_share->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key DBUG_PRINT("info", ("Using hidden key")); - DBUG_DUMP("key", (char*)key, 8); + DBUG_DUMP("key", key, 8); if (set_hidden_key(op, no_fields, key)) ERR_RETURN(trans->getNdbError()); @@ -1522,8 +1805,20 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) if ((res= define_read_attrs(buf, op))) DBUG_RETURN(res); - - if (execute_no_commit_ie(this,trans,false) != 0) + + if (m_use_partition_function) + { + op->setPartitionId(part_id); + // If table has user defined partitioning + // and no indexes, we need to read the partition id + // to support ORDER BY queries + if (table_share->primary_key == MAX_KEY && + get_ndb_partition_id(op)) + ERR_RETURN(trans->getNdbError()); + } + + if ((res = execute_no_commit_ie(this,trans,FALSE)) != 0 || + op->getNdbError().code) { table->status= STATUS_NOT_FOUND; DBUG_RETURN(ndb_err(trans)); @@ -1537,40 +1832,58 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) /* Read one complementing record from NDB using primary key from old_data + or hidden key */ -int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) +int ha_ndbcluster::complemented_read(const uchar *old_data, uchar *new_data, + uint32 old_part_id) { - uint no_fields= table->s->fields, i; + uint no_fields= table_share->fields, i; NdbTransaction *trans= m_active_trans; NdbOperation *op; - THD *thd= current_thd; - DBUG_ENTER("complemented_pk_read"); + DBUG_ENTER("complemented_read"); + m_write_op= FALSE; - if (m_retrieve_all_fields) + if (bitmap_is_set_all(table->read_set)) + { // We have allready retrieved all fields, nothing to complement DBUG_RETURN(0); + } NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || op->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); - int res; - if ((res= set_primary_key_from_record(op, old_data))) - ERR_RETURN(trans->getNdbError()); + if (table_share->primary_key != MAX_KEY) + { + if (set_primary_key_from_record(op, old_data)) + ERR_RETURN(trans->getNdbError()); + } + else + { + // This table has no primary key, use "hidden" primary key + if (set_hidden_key(op, table->s->fields, m_ref)) + ERR_RETURN(op->getNdbError()); + } + + if (m_use_partition_function) + op->setPartitionId(old_part_id); + // Read all unreferenced non-key field(s) for (i= 0; i < no_fields; i++) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + bitmap_is_set(table->read_set, i)) && + !bitmap_is_set(table->write_set, i)) { if (get_ndb_value(op, field, i, new_data)) ERR_RETURN(trans->getNdbError()); } } - if (execute_no_commit(this,trans,false) != 0) + + if (execute_no_commit(this,trans,FALSE) != 0) { table->status= STATUS_NOT_FOUND; DBUG_RETURN(ndb_err(trans)); @@ -1587,7 +1900,7 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + bitmap_is_set(table->read_set, i))) { m_value[i].ptr= NULL; } @@ -1616,7 +1929,7 @@ bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans, if (err.status != NdbError::Success) { if (ndb_to_mysql_error(&err) != (int) errcode) - DBUG_RETURN(false); + DBUG_RETURN(FALSE); if (op == last) break; op= trans->getNextCompletedOperation(op); } @@ -1647,10 +1960,10 @@ bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans, if (errcode == HA_ERR_KEY_NOT_FOUND) m_dupkey= table->s->primary_key; } - DBUG_RETURN(false); + DBUG_RETURN(FALSE); } } - DBUG_RETURN(true); + DBUG_RETURN(TRUE); } @@ -1659,7 +1972,7 @@ bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans, */ static int -check_null_in_record(const KEY* key_info, const byte *record) +check_null_in_record(const KEY* key_info, const uchar *record) { KEY_PART_INFO *curr_part, *end_part; curr_part= key_info->key_part; @@ -1686,7 +1999,7 @@ check_null_in_record(const KEY* key_info, const byte *record) * primary key or unique index values */ -int ha_ndbcluster::peek_indexed_rows(const byte *record, +int ha_ndbcluster::peek_indexed_rows(const uchar *record, NDB_WRITE_OP write_op) { NdbTransaction *trans= m_active_trans; @@ -1697,8 +2010,7 @@ int ha_ndbcluster::peek_indexed_rows(const byte *record, DBUG_ENTER("peek_indexed_rows"); NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - + (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); first= NULL; if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY) { @@ -1712,6 +2024,22 @@ int ha_ndbcluster::peek_indexed_rows(const byte *record, first= op; if ((res= set_primary_key_from_record(op, record))) ERR_RETURN(trans->getNdbError()); + + if (m_use_partition_function) + { + uint32 part_id; + int error; + longlong func_value; + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); + error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value); + dbug_tmp_restore_column_map(table->read_set, old_map); + if (error) + { + m_part_info->err_value= func_value; + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } } /* * Fetch any rows with colliding unique indexes @@ -1739,13 +2067,11 @@ int ha_ndbcluster::peek_indexed_rows(const byte *record, DBUG_PRINT("info", ("skipping check for key %u not in write_set", i)); continue; } - NdbIndexOperation *iop; - NDBINDEX *unique_index = (NDBINDEX *) m_index[i].unique_index; + const NDBINDEX *unique_index = m_index[i].unique_index; key_part= key_info->key_part; end= key_part + key_info->key_parts; - if (!(iop= trans->getNdbIndexOperation(unique_index, - (const NDBTAB *) m_table)) || + if (!(iop= trans->getNdbIndexOperation(unique_index, m_table)) || iop->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); @@ -1757,7 +2083,7 @@ int ha_ndbcluster::peek_indexed_rows(const byte *record, } last= trans->getLastDefinedOperation(); if (first) - res= execute_no_commit_ie(this,trans,false); + res= execute_no_commit_ie(this,trans,FALSE); else { // Table has no keys @@ -1777,25 +2103,25 @@ int ha_ndbcluster::peek_indexed_rows(const byte *record, DBUG_RETURN(0); } + /* Read one record from NDB using unique secondary index */ -int ha_ndbcluster::unique_index_read(const byte *key, - uint key_len, byte *buf) +int ha_ndbcluster::unique_index_read(const uchar *key, + uint key_len, uchar *buf) { int res; NdbTransaction *trans= m_active_trans; NdbIndexOperation *op; DBUG_ENTER("ha_ndbcluster::unique_index_read"); DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index)); - DBUG_DUMP("key", (char*)key, key_len); + DBUG_DUMP("key", key, key_len); NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - if (!(op= trans->getNdbIndexOperation((NDBINDEX *) - m_index[active_index].unique_index, - (const NDBTAB *) m_table)) || + if (!(op= trans->getNdbIndexOperation(m_index[active_index].unique_index, + m_table)) || op->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); @@ -1806,7 +2132,8 @@ int ha_ndbcluster::unique_index_read(const byte *key, if ((res= define_read_attrs(buf, op))) DBUG_RETURN(res); - if (execute_no_commit_ie(this,trans,false) != 0) + if (execute_no_commit_ie(this,trans,FALSE) != 0 || + op->getNdbError().code) { int err= ndb_err(trans); if(err==HA_ERR_KEY_NOT_FOUND) @@ -1829,7 +2156,7 @@ inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor) int local_check; NdbTransaction *trans= m_active_trans; - if (m_lock_tuple) + if (m_lock_tuple) { /* Lock level m_lock.type either TL_WRITE_ALLOW_WRITE @@ -1845,16 +2172,16 @@ inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor) if (!(op= m_active_cursor->lockCurrentTuple())) { /* purecov: begin inspected */ - m_lock_tuple= false; + m_lock_tuple= FALSE; ERR_RETURN(con_trans->getNdbError()); /* purecov: end */ } m_ops_pending++; } - m_lock_tuple= false; - + m_lock_tuple= FALSE; + bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE && - m_lock.type != TL_READ_WITH_SHARED_LOCKS; + m_lock.type != TL_READ_WITH_SHARED_LOCKS;; do { DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb)); /* @@ -1862,7 +2189,7 @@ inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor) */ if (m_ops_pending && m_blobs_pending) { - if (execute_no_commit(this,trans,false) != 0) + if (execute_no_commit(this,trans,FALSE) != 0) DBUG_RETURN(ndb_err(trans)); m_ops_pending= 0; m_blobs_pending= FALSE; @@ -1894,7 +2221,7 @@ inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor) { if (m_transaction_on) { - if (execute_no_commit(this,trans,false) != 0) + if (execute_no_commit(this,trans,FALSE) != 0) DBUG_RETURN(-1); } else @@ -1931,7 +2258,7 @@ inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor) */ -inline int ha_ndbcluster::next_result(byte *buf) +inline int ha_ndbcluster::next_result(uchar *buf) { int res; DBUG_ENTER("next_result"); @@ -1966,10 +2293,12 @@ inline int ha_ndbcluster::next_result(byte *buf) */ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, + uint inx, + bool rir, const key_range *keys[2], uint range_no) { - const KEY *const key_info= table->key_info + active_index; + const KEY *const key_info= table->key_info + inx; const uint key_parts= key_info->key_parts; uint key_tot_len[2]; uint tot_len; @@ -2008,10 +2337,10 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, struct part_st { bool part_last; const key_range *key; - const byte *part_ptr; + const uchar *part_ptr; bool part_null; int bound_type; - const char* bound_ptr; + const uchar* bound_ptr; }; struct part_st part[2]; @@ -2034,7 +2363,10 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, switch (p.key->flag) { case HA_READ_KEY_EXACT: - p.bound_type= NdbIndexScanOperation::BoundEQ; + if (! rir) + p.bound_type= NdbIndexScanOperation::BoundEQ; + else // differs for records_in_range + p.bound_type= NdbIndexScanOperation::BoundLE; break; // ascending case HA_READ_KEY_OR_NEXT: @@ -2116,15 +2448,15 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, // Set bound if not done with this key if (p.key != NULL) { - DBUG_PRINT("info", ("key %d:%d offset=%d length=%d last=%d bound=%d", + DBUG_PRINT("info", ("key %d:%d offset: %d length: %d last: %d bound: %d", j, i, tot_len, part_len, p.part_last, p.bound_type)); - DBUG_DUMP("info", (const char*)p.part_ptr, part_store_len); + DBUG_DUMP("info", p.part_ptr, part_store_len); // Set bound if not cancelled via type -1 if (p.bound_type != -1) { - const char* ptr= p.bound_ptr; - char buf[256]; + const uchar* ptr= p.bound_ptr; + uchar buf[256]; shrink_varchar(field, ptr, buf); if (op->setBound(i, p.bound_type, ptr)) ERR_RETURN(op->getNdbError()); @@ -2143,7 +2475,8 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, int ha_ndbcluster::ordered_index_scan(const key_range *start_key, const key_range *end_key, - bool sorted, bool descending, byte* buf) + bool sorted, bool descending, + uchar* buf, part_id_range *part_spec) { int res; bool restart; @@ -2154,6 +2487,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d", active_index, sorted, descending)); DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname)); + m_write_op= FALSE; // Check that sorted seems to be initialised DBUG_ASSERT(sorted == 0 || sorted == 1); @@ -2163,17 +2497,22 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, restart= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - bool need_pk = (lm == NdbOperation::LM_Read); - if (!(op= trans->getNdbIndexScanOperation((NDBINDEX *) - m_index[active_index].index, - (const NDBTAB *) m_table)) || - op->readTuples(lm, 0, parallelism, sorted, descending, false, need_pk)) + bool need_pk = (lm == NdbOperation::LM_Read); + if (!(op= trans->getNdbIndexScanOperation(m_index[active_index].index, + m_table)) || + op->readTuples(lm, 0, parallelism, sorted, descending, FALSE, need_pk)) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); m_active_cursor= op; } else { restart= TRUE; op= (NdbIndexScanOperation*)m_active_cursor; + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); DBUG_ASSERT(op->getSorted() == sorted); DBUG_ASSERT(op->getLockMode() == (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type)); @@ -2183,50 +2522,119 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, { const key_range *keys[2]= { start_key, end_key }; - res= set_bounds(op, keys); + res= set_bounds(op, active_index, FALSE, keys); if (res) DBUG_RETURN(res); } - if (!restart && m_cond && m_cond->generate_scan_filter(op)) - DBUG_RETURN(ndb_err(trans)); - - if (!restart && (res= define_read_attrs(buf, op))) + if (!restart) { - DBUG_RETURN(res); + if (m_cond && m_cond->generate_scan_filter(op)) + DBUG_RETURN(ndb_err(trans)); + + if ((res= define_read_attrs(buf, op))) + { + DBUG_RETURN(res); + } + + // If table has user defined partitioning + // and no primary key, we need to read the partition id + // to support ORDER BY queries + if (m_use_partition_function && + (table_share->primary_key == MAX_KEY) && + (get_ndb_partition_id(op))) + ERR_RETURN(trans->getNdbError()); } - if (execute_no_commit(this,trans,false) != 0) + if (execute_no_commit(this,trans,FALSE) != 0) DBUG_RETURN(ndb_err(trans)); DBUG_RETURN(next_result(buf)); } +static +int +guess_scan_flags(NdbOperation::LockMode lm, + const NDBTAB* tab, const MY_BITMAP* readset) +{ + int flags= 0; + flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0; + if (tab->checkColumns(0, 0) & 2) + { + int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset)); + + if (ret & 2) + { // If disk columns...use disk scan + flags |= NdbScanOperation::SF_DiskScan; + } + else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive)) + { + // If no mem column is set and exclusive...guess disk scan + flags |= NdbScanOperation::SF_DiskScan; + } + } + return flags; +} + + /* Unique index scan in NDB (full table scan with scan filter) */ int ha_ndbcluster::unique_index_scan(const KEY* key_info, - const byte *key, + const uchar *key, uint key_len, - byte *buf) + uchar *buf) { int res; NdbScanOperation *op; NdbTransaction *trans= m_active_trans; + part_id_range part_spec; DBUG_ENTER("unique_index_scan"); DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname)); NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - bool need_pk = (lm == NdbOperation::LM_Read); + int flags= guess_scan_flags(lm, m_table, table->read_set); if (!(op=trans->getNdbScanOperation((const NDBTAB *) m_table)) || - op->readTuples(lm, - (need_pk)?NdbScanOperation::SF_KeyInfo:0, - parallelism)) + op->readTuples(lm, flags, parallelism)) ERR_RETURN(trans->getNdbError()); m_active_cursor= op; + + if (m_use_partition_function) + { + part_spec.start_part= 0; + part_spec.end_part= m_part_info->get_tot_partitions() - 1; + prune_partition_set(table, &part_spec); + DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u", + part_spec.start_part, part_spec.end_part)); + /* + If partition pruning has found no partition in set + we can return HA_ERR_END_OF_FILE + If partition pruning has found exactly one partition in set + we can optimize scan to run towards that partition only. + */ + if (part_spec.start_part > part_spec.end_part) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + else if (part_spec.start_part == part_spec.end_part) + { + /* + Only one partition is required to scan, if sorted is required we + don't need it any more since output from one ordered partitioned + index is always sorted. + */ + m_active_cursor->setPartitionId(part_spec.start_part); + } + // If table has user defined partitioning + // and no primary key, we need to read the partition id + // to support ORDER BY queries + if ((table_share->primary_key == MAX_KEY) && + (get_ndb_partition_id(op))) + ERR_RETURN(trans->getNdbError()); + } if (!m_cond) m_cond= new ha_ndbcluster_cond; if (!m_cond) @@ -2239,40 +2647,76 @@ int ha_ndbcluster::unique_index_scan(const KEY* key_info, if ((res= define_read_attrs(buf, op))) DBUG_RETURN(res); - if (execute_no_commit(this,trans,false) != 0) + if (execute_no_commit(this,trans,FALSE) != 0) DBUG_RETURN(ndb_err(trans)); DBUG_PRINT("exit", ("Scan started successfully")); DBUG_RETURN(next_result(buf)); } + /* Start full table scan in NDB */ -int ha_ndbcluster::full_table_scan(byte *buf) +int ha_ndbcluster::full_table_scan(uchar *buf) { int res; NdbScanOperation *op; NdbTransaction *trans= m_active_trans; + part_id_range part_spec; DBUG_ENTER("full_table_scan"); DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname)); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - bool need_pk = (lm == NdbOperation::LM_Read); - if (!(op=trans->getNdbScanOperation((const NDBTAB *) m_table)) || - op->readTuples(lm, - (need_pk)?NdbScanOperation::SF_KeyInfo:0, - parallelism)) + int flags= guess_scan_flags(lm, m_table, table->read_set); + if (!(op=trans->getNdbScanOperation(m_table)) || + op->readTuples(lm, flags, parallelism)) ERR_RETURN(trans->getNdbError()); m_active_cursor= op; + + if (m_use_partition_function) + { + part_spec.start_part= 0; + part_spec.end_part= m_part_info->get_tot_partitions() - 1; + prune_partition_set(table, &part_spec); + DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u", + part_spec.start_part, part_spec.end_part)); + /* + If partition pruning has found no partition in set + we can return HA_ERR_END_OF_FILE + If partition pruning has found exactly one partition in set + we can optimize scan to run towards that partition only. + */ + if (part_spec.start_part > part_spec.end_part) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + else if (part_spec.start_part == part_spec.end_part) + { + /* + Only one partition is required to scan, if sorted is required we + don't need it any more since output from one ordered partitioned + index is always sorted. + */ + m_active_cursor->setPartitionId(part_spec.start_part); + } + // If table has user defined partitioning + // and no primary key, we need to read the partition id + // to support ORDER BY queries + if ((table_share->primary_key == MAX_KEY) && + (get_ndb_partition_id(op))) + ERR_RETURN(trans->getNdbError()); + } + if (m_cond && m_cond->generate_scan_filter(op)) DBUG_RETURN(ndb_err(trans)); if ((res= define_read_attrs(buf, op))) DBUG_RETURN(res); - if (execute_no_commit(this,trans,false) != 0) + if (execute_no_commit(this,trans,FALSE) != 0) DBUG_RETURN(ndb_err(trans)); DBUG_PRINT("exit", ("Scan started successfully")); DBUG_RETURN(next_result(buf)); @@ -2281,26 +2725,30 @@ int ha_ndbcluster::full_table_scan(byte *buf) int ha_ndbcluster::set_auto_inc(Field *field) { - Ndb *ndb= get_ndb(); - Uint64 next_val= (Uint64) field->val_int() + 1; DBUG_ENTER("ha_ndbcluster::set_auto_inc"); + Ndb *ndb= get_ndb(); + bool read_bit= bitmap_is_set(table->read_set, field->field_index); + bitmap_set_bit(table->read_set, field->field_index); + Uint64 next_val= (Uint64) field->val_int() + 1; + if (!read_bit) + bitmap_clear_bit(table->read_set, field->field_index); #ifndef DBUG_OFF char buff[22]; DBUG_PRINT("info", ("Trying to set next auto increment value to %s", llstr(next_val, buff))); #endif - if (ndb->setAutoIncrementValue((const NDBTAB *) m_table, next_val, TRUE) + Ndb_tuple_id_range_guard g(m_share); + if (ndb->setAutoIncrementValue(m_table, g.range, next_val, TRUE) == -1) ERR_RETURN(ndb->getNdbError()); DBUG_RETURN(0); } - /* Insert one record into NDB */ -int ha_ndbcluster::write_row(byte *record) +int ha_ndbcluster::write_row(uchar *record) { bool has_auto_increment; uint i; @@ -2308,10 +2756,12 @@ int ha_ndbcluster::write_row(byte *record) NdbOperation *op; int res; THD *thd= table->in_use; - DBUG_ENTER("write_row"); + longlong func_value= 0; + DBUG_ENTER("ha_ndbcluster::write_row"); + m_write_op= TRUE; has_auto_increment= (table->next_number_field && record == table->record[0]); - if (table->s->primary_key != MAX_KEY) + if (table_share->primary_key != MAX_KEY) { /* * Increase any auto_incremented primary key @@ -2323,10 +2773,10 @@ int ha_ndbcluster::write_row(byte *record) m_skip_auto_increment= FALSE; if ((error= update_auto_increment())) DBUG_RETURN(error); - m_skip_auto_increment= !auto_increment_column_changed; + m_skip_auto_increment= (insert_id_for_cur_row == 0); } } - + /* * If IGNORE the ignore constraint violations on primary and unique keys */ @@ -2347,18 +2797,33 @@ int ha_ndbcluster::write_row(byte *record) DBUG_RETURN(peek_res); } - statistic_increment(thd->status_var.ha_write_count, &LOCK_status); + ha_statistic_increment(&SSV::ha_write_count); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) table->timestamp_field->set_time(); - if (!(op= trans->getNdbOperation((const NDBTAB *) m_table))) + if (!(op= trans->getNdbOperation(m_table))) ERR_RETURN(trans->getNdbError()); res= (m_use_write) ? op->writeTuple() :op->insertTuple(); if (res != 0) ERR_RETURN(trans->getNdbError()); - if (table->s->primary_key == MAX_KEY) + if (m_use_partition_function) + { + uint32 part_id; + int error; + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); + error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value); + dbug_tmp_restore_column_map(table->read_set, old_map); + if (error) + { + m_part_info->err_value= func_value; + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } + + if (table_share->primary_key == MAX_KEY) { // Table has hidden primary key Ndb *ndb= get_ndb(); @@ -2367,41 +2832,76 @@ int ha_ndbcluster::write_row(byte *record) int retry_sleep= 30; /* 30 milliseconds, transaction */ for (;;) { - if (ndb->getAutoIncrementValue((const NDBTAB *) m_table, - auto_value, 1) == -1) + Ndb_tuple_id_range_guard g(m_share); + if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1) == -1) { - if (--retries && - ndb->getNdbError().status == NdbError::TemporaryError) - { - my_sleep(retry_sleep); - continue; - } - ERR_RETURN(ndb->getNdbError()); + if (--retries && + ndb->getNdbError().status == NdbError::TemporaryError) + { + my_sleep(retry_sleep); + continue; + } + ERR_RETURN(ndb->getNdbError()); } break; } - if (set_hidden_key(op, table->s->fields, (const byte*)&auto_value)) + if (set_hidden_key(op, table_share->fields, (const uchar*)&auto_value)) ERR_RETURN(op->getNdbError()); } - else + else { - if ((res= set_primary_key_from_record(op, record))) - return res; + int error; + if ((error= set_primary_key_from_record(op, record))) + DBUG_RETURN(error); } // Set non-key attribute(s) bool set_blob_value= FALSE; - for (i= 0; i < table->s->fields; i++) + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); + for (i= 0; i < table_share->fields; i++) { Field *field= table->field[i]; if (!(field->flags & PRI_KEY_FLAG) && - set_ndb_value(op, field, i, &set_blob_value)) + (bitmap_is_set(table->write_set, i) || !m_use_write) && + set_ndb_value(op, field, i, record-table->record[0], &set_blob_value)) { m_skip_auto_increment= TRUE; + dbug_tmp_restore_column_map(table->read_set, old_map); ERR_RETURN(op->getNdbError()); } } + dbug_tmp_restore_column_map(table->read_set, old_map); + + if (m_use_partition_function) + { + /* + We need to set the value of the partition function value in + NDB since the NDB kernel doesn't have easy access to the function + to calculate the value. + */ + if (func_value >= INT_MAX32) + func_value= INT_MAX32; + uint32 part_func_value= (uint32)func_value; + uint no_fields= table_share->fields; + if (table_share->primary_key == MAX_KEY) + no_fields++; + op->setValue(no_fields, part_func_value); + } + if (unlikely(m_slow_path)) + { + /* + ignore TNTO_NO_LOGGING for slave thd. It is used to indicate + log-slave-updates option. This is instead handled in the + injector thread, by looking explicitly at the + opt_log_slave_updates flag. + */ + Thd_ndb *thd_ndb= get_thd_ndb(thd); + if (thd->slave_thread) + op->setAnyValue(thd->server_id); + else if (thd_ndb->trans_options & TNTO_NO_LOGGING) + op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); + } m_rows_changed++; /* @@ -2421,13 +2921,13 @@ int ha_ndbcluster::write_row(byte *record) { // Send rows to NDB DBUG_PRINT("info", ("Sending inserts to NDB, "\ - "rows_inserted:%d, bulk_insert_rows: %d", + "rows_inserted: %d bulk_insert_rows: %d", (int)m_rows_inserted, (int)m_bulk_insert_rows)); m_bulk_insert_not_flushed= FALSE; if (m_transaction_on) { - if (execute_no_commit(this,trans,false) != 0) + if (execute_no_commit(this,trans,FALSE) != 0) { m_skip_auto_increment= TRUE; no_uncommitted_rows_execute_failure(); @@ -2459,14 +2959,15 @@ int ha_ndbcluster::write_row(byte *record) } m_skip_auto_increment= TRUE; + DBUG_PRINT("exit",("ok")); DBUG_RETURN(0); } /* Compare if a key in a row has changed */ -int ha_ndbcluster::key_cmp(uint keynr, const byte * old_row, - const byte * new_row) +int ha_ndbcluster::key_cmp(uint keynr, const uchar * old_row, + const uchar * new_row) { KEY_PART_INFO *key_part=table->key_info[keynr].key_part; KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts; @@ -2482,8 +2983,8 @@ int ha_ndbcluster::key_cmp(uint keynr, const byte * old_row, if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) { - if (key_part->field->cmp_binary((char*) (old_row + key_part->offset), - (char*) (new_row + key_part->offset), + if (key_part->field->cmp_binary((old_row + key_part->offset), + (new_row + key_part->offset), (ulong) key_part->length)) return 1; } @@ -2501,17 +3002,20 @@ int ha_ndbcluster::key_cmp(uint keynr, const byte * old_row, Update one record in NDB using primary key */ -int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) +int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data) { - THD *thd= current_thd; + THD *thd= table->in_use; NdbTransaction *trans= m_active_trans; NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; uint i; - int auto_res; - bool pk_update= (table->s->primary_key != MAX_KEY && - key_cmp(table->s->primary_key, old_data, new_data)); + uint32 old_part_id= 0, new_part_id= 0; + int error; + longlong func_value; + bool pk_update= (table_share->primary_key != MAX_KEY && + key_cmp(table_share->primary_key, old_data, new_data)); DBUG_ENTER("update_row"); + m_write_op= TRUE; /* * If IGNORE the ignore constraint violations on primary and unique keys, @@ -2531,25 +3035,37 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) DBUG_RETURN(peek_res); } - statistic_increment(thd->status_var.ha_update_count, &LOCK_status); + ha_statistic_increment(&SSV::ha_update_count); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { table->timestamp_field->set_time(); - // Set query_id so that field is really updated - table->timestamp_field->query_id= thd->query_id; + bitmap_set_bit(table->write_set, table->timestamp_field->field_index); + } + + if (m_use_partition_function && + (error= get_parts_for_update(old_data, new_data, table->record[0], + m_part_info, &old_part_id, &new_part_id, + &func_value))) + { + m_part_info->err_value= func_value; + DBUG_RETURN(error); } - /* Check for update of primary key for special handling */ - if (pk_update) + /* + * Check for update of primary key or partition change + * for special handling + */ + if (pk_update || old_part_id != new_part_id) { int read_res, insert_res, delete_res, undo_res; - DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert")); + DBUG_PRINT("info", ("primary key update or partition change, " + "doing read+delete+insert")); // Get all old fields, since we optimize away fields not in query - read_res= complemented_pk_read(old_data, new_data); + read_res= complemented_read(old_data, new_data, old_part_id); if (read_res) { - DBUG_PRINT("info", ("pk read failed")); + DBUG_PRINT("info", ("read failed")); DBUG_RETURN(read_res); } // Delete old row @@ -2569,10 +3085,11 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) then we need to update the auto_increment counter */ if (table->found_next_number_field && - table->found_next_number_field->query_id == thd->query_id && - (auto_res= set_auto_inc(table->found_next_number_field))) + bitmap_is_set(table->write_set, + table->found_next_number_field->field_index) && + (error= set_auto_inc(table->found_next_number_field))) { - DBUG_RETURN(auto_res); + DBUG_RETURN(error); } insert_res= write_row(new_data); m_primary_key_update= FALSE; @@ -2583,7 +3100,7 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) { // Undo delete_row(old_data) m_primary_key_update= TRUE; - undo_res= write_row((byte *)old_data); + undo_res= write_row((uchar *)old_data); if (undo_res) push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, @@ -2601,10 +3118,11 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) then we need to update the auto_increment counter */ if (table->found_next_number_field && - table->found_next_number_field->query_id == thd->query_id && - (auto_res= set_auto_inc(table->found_next_number_field))) + bitmap_is_set(table->write_set, + table->found_next_number_field->field_index) && + (error= set_auto_inc(table->found_next_number_field))) { - DBUG_RETURN(auto_res); + DBUG_RETURN(error); } if (cursor) { @@ -2618,18 +3136,22 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) DBUG_PRINT("info", ("Calling updateTuple on cursor")); if (!(op= cursor->updateCurrentTuple())) ERR_RETURN(trans->getNdbError()); - m_lock_tuple= false; + m_lock_tuple= FALSE; m_ops_pending++; - if (uses_blob_value(FALSE)) + if (uses_blob_value()) m_blobs_pending= TRUE; + if (m_use_partition_function) + cursor->setPartitionId(new_part_id); } else { - if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || + if (!(op= trans->getNdbOperation(m_table)) || op->updateTuple() != 0) ERR_RETURN(trans->getNdbError()); - if (table->s->primary_key == MAX_KEY) + if (m_use_partition_function) + op->setPartitionId(new_part_id); + if (table_share->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key DBUG_PRINT("info", ("Using hidden key")); @@ -2652,15 +3174,45 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) m_rows_changed++; // Set non-key attribute(s) - for (i= 0; i < table->s->fields; i++) + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); + for (i= 0; i < table_share->fields; i++) { Field *field= table->field[i]; - if (((thd->query_id == field->query_id) || m_retrieve_all_fields) && + if (bitmap_is_set(table->write_set, i) && (!(field->flags & PRI_KEY_FLAG)) && - set_ndb_value(op, field, i)) + set_ndb_value(op, field, i, new_data - table->record[0])) + { + dbug_tmp_restore_column_map(table->read_set, old_map); ERR_RETURN(op->getNdbError()); + } } + dbug_tmp_restore_column_map(table->read_set, old_map); + if (m_use_partition_function) + { + if (func_value >= INT_MAX32) + func_value= INT_MAX32; + uint32 part_func_value= (uint32)func_value; + uint no_fields= table_share->fields; + if (table_share->primary_key == MAX_KEY) + no_fields++; + op->setValue(no_fields, part_func_value); + } + + if (unlikely(m_slow_path)) + { + /* + ignore TNTO_NO_LOGGING for slave thd. It is used to indicate + log-slave-updates option. This is instead handled in the + injector thread, by looking explicitly at the + opt_log_slave_updates flag. + */ + Thd_ndb *thd_ndb= get_thd_ndb(thd); + if (thd->slave_thread) + op->setAnyValue(thd->server_id); + else if (thd_ndb->trans_options & TNTO_NO_LOGGING) + op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); + } /* Execute update operation if we are not doing a scan for update and there exist UPDATE AFTER triggers @@ -2680,17 +3232,27 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) Delete one record from NDB, using primary key */ -int ha_ndbcluster::delete_row(const byte *record) +int ha_ndbcluster::delete_row(const uchar *record) { - THD *thd= current_thd; + THD *thd= table->in_use; NdbTransaction *trans= m_active_trans; NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; + uint32 part_id; + int error; DBUG_ENTER("delete_row"); + m_write_op= TRUE; - statistic_increment(thd->status_var.ha_delete_count,&LOCK_status); + ha_statistic_increment(&SSV::ha_delete_count); m_rows_changed++; + if (m_use_partition_function && + (error= get_part_for_delete(record, table->record[0], m_part_info, + &part_id))) + { + DBUG_RETURN(error); + } + if (cursor) { /* @@ -2703,11 +3265,30 @@ int ha_ndbcluster::delete_row(const byte *record) DBUG_PRINT("info", ("Calling deleteTuple on cursor")); if (cursor->deleteCurrentTuple() != 0) ERR_RETURN(trans->getNdbError()); - m_lock_tuple= false; + m_lock_tuple= FALSE; m_ops_pending++; + if (m_use_partition_function) + cursor->setPartitionId(part_id); + no_uncommitted_rows_update(-1); + if (unlikely(m_slow_path)) + { + /* + ignore TNTO_NO_LOGGING for slave thd. It is used to indicate + log-slave-updates option. This is instead handled in the + injector thread, by looking explicitly at the + opt_log_slave_updates flag. + */ + Thd_ndb *thd_ndb= get_thd_ndb(thd); + if (thd->slave_thread) + ((NdbOperation *)trans->getLastDefinedOperation())-> + setAnyValue(thd->server_id); + else if (thd_ndb->trans_options & TNTO_NO_LOGGING) + ((NdbOperation *)trans->getLastDefinedOperation())-> + setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); + } if (!(m_primary_key_update || m_delete_cannot_batch)) // If deleting from cursor, NoCommit will be handled in next_result DBUG_RETURN(0); @@ -2715,13 +3296,16 @@ int ha_ndbcluster::delete_row(const byte *record) else { - if (!(op=trans->getNdbOperation((const NDBTAB *) m_table)) || + if (!(op=trans->getNdbOperation(m_table)) || op->deleteTuple() != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(part_id); + no_uncommitted_rows_update(-1); - if (table->s->primary_key == MAX_KEY) + if (table_share->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key DBUG_PRINT("info", ("Using hidden key")); @@ -2731,14 +3315,28 @@ int ha_ndbcluster::delete_row(const byte *record) } else { - int res; - if ((res= set_primary_key_from_record(op, record))) - return res; + if ((error= set_primary_key_from_record(op, record))) + DBUG_RETURN(error); + } + + if (unlikely(m_slow_path)) + { + /* + ignore TNTO_NO_LOGGING for slave thd. It is used to indicate + log-slave-updates option. This is instead handled in the + injector thread, by looking explicitly at the + opt_log_slave_updates flag. + */ + Thd_ndb *thd_ndb= get_thd_ndb(thd); + if (thd->slave_thread) + op->setAnyValue(thd->server_id); + else if (thd_ndb->trans_options & TNTO_NO_LOGGING) + op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); } } // Execute delete operation - if (execute_no_commit(this,trans,false) != 0) { + if (execute_no_commit(this,trans,FALSE) != 0) { no_uncommitted_rows_execute_failure(); DBUG_RETURN(ndb_err(trans)); } @@ -2759,37 +3357,67 @@ int ha_ndbcluster::delete_row(const byte *record) set to null. */ -void ha_ndbcluster::unpack_record(byte* buf) +void ndb_unpack_record(TABLE *table, NdbValue *value, + MY_BITMAP *defined, uchar *buf) { - uint row_offset= (uint) (buf - table->record[0]); - Field **field, **end; - NdbValue *value= m_value; - DBUG_ENTER("unpack_record"); + Field **p_field= table->field, *field= *p_field; + my_ptrdiff_t row_offset= (my_ptrdiff_t) (buf - table->record[0]); + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); + DBUG_ENTER("ndb_unpack_record"); - end= table->field + table->s->fields; - - // Set null flag(s) - bzero(buf, table->s->null_bytes); - for (field= table->field; - field < end; - field++, value++) + /* + Set the filler bits of the null byte, since they are + not touched in the code below. + + The filler bits are the MSBs in the last null byte + */ + if (table->s->null_bytes > 0) + buf[table->s->null_bytes - 1]|= 256U - (1U << + table->s->last_null_bit_pos); + /* + Set null flag(s) + */ + for ( ; field; + p_field++, value++, field= *p_field) { + field->set_notnull(row_offset); if ((*value).ptr) { - if (! ((*field)->flags & BLOB_FLAG)) + if (!(field->flags & BLOB_FLAG)) { - if ((*value).rec->isNULL()) - (*field)->set_null(row_offset); - else if ((*field)->type() == MYSQL_TYPE_BIT) + int is_null= (*value).rec->isNULL(); + if (is_null) { - uint pack_len= (*field)->pack_length(); - if (pack_len < 5) + if (is_null > 0) + { + DBUG_PRINT("info",("[%u] NULL", + (*value).rec->getColumn()->getColumnNo())); + field->set_null(row_offset); + } + else + { + DBUG_PRINT("info",("[%u] UNDEFINED", + (*value).rec->getColumn()->getColumnNo())); + bitmap_clear_bit(defined, + (*value).rec->getColumn()->getColumnNo()); + } + } + else if (field->type() == MYSQL_TYPE_BIT) + { + Field_bit *field_bit= static_cast<Field_bit*>(field); + + /* + Move internal field pointer to point to 'buf'. Calling + the correct member function directly since we know the + type of the object. + */ + field_bit->Field_bit::move_field_offset(row_offset); + if (field->pack_length() < 5) { DBUG_PRINT("info", ("bit field H'%.8X", (*value).rec->u_32_value())); - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_32_value(), - FALSE); + field_bit->Field_bit::store((longlong) (*value).rec->u_32_value(), + FALSE); } else { @@ -2799,56 +3427,95 @@ void ha_ndbcluster::unpack_record(byte* buf) #ifdef WORDS_BIGENDIAN /* lsw is stored first */ Uint32 *buf= (Uint32 *)(*value).rec->aRef(); - ((Field_bit *) *field)->store((((longlong)*buf) - & 0x000000000FFFFFFFFLL) - | - ((((longlong)*(buf+1)) << 32) - & 0xFFFFFFFF00000000LL), - TRUE); + field_bit->Field_bit::store((((longlong)*buf) + & 0x000000000FFFFFFFFLL) + | + ((((longlong)*(buf+1)) << 32) + & 0xFFFFFFFF00000000LL), + TRUE); #else - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_64_value(), TRUE); + field_bit->Field_bit::store((longlong) + (*value).rec->u_64_value(), TRUE); #endif } + /* + Move back internal field pointer to point to original + value (usually record[0]). + */ + field_bit->Field_bit::move_field_offset(-row_offset); + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", field->ptr, field->pack_length()); + } + else + { + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", field->ptr, field->pack_length()); } } else { - NdbBlob* ndb_blob= (*value).blob; - bool isNull= TRUE; + NdbBlob *ndb_blob= (*value).blob; + uint col_no = ndb_blob->getColumn()->getColumnNo(); + int isNull; + ndb_blob->getDefined(isNull); + if (isNull == 1) + { + DBUG_PRINT("info",("[%u] NULL", col_no)); + field->set_null(row_offset); + } + else if (isNull == -1) + { + DBUG_PRINT("info",("[%u] UNDEFINED", col_no)); + bitmap_clear_bit(defined, col_no); + } + else + { #ifndef DBUG_OFF - int ret= + // pointer vas set in get_ndb_blobs_value + Field_blob *field_blob= (Field_blob*)field; + uchar *ptr; + field_blob->get_ptr(&ptr, row_offset); + uint32 len= field_blob->get_length(row_offset); + DBUG_PRINT("info",("[%u] SET ptr: 0x%lx len: %u", + col_no, (long) ptr, len)); #endif - ndb_blob->getNull(isNull); - DBUG_ASSERT(ret == 0); - if (isNull) - (*field)->set_null(row_offset); + } } } } - + dbug_tmp_restore_column_map(table->write_set, old_map); + DBUG_VOID_RETURN; +} + +void ha_ndbcluster::unpack_record(uchar *buf) +{ + ndb_unpack_record(table, m_value, 0, buf); #ifndef DBUG_OFF // Read and print all values that was fetched - if (table->s->primary_key == MAX_KEY) + if (table_share->primary_key == MAX_KEY) { // Table with hidden primary key - int hidden_no= table->s->fields; + int hidden_no= table_share->fields; + const NDBTAB *tab= m_table; char buff[22]; - const NDBTAB *tab= (const NDBTAB *) m_table; const NDBCOL *hidden_col= tab->getColumn(hidden_no); const NdbRecAttr* rec= m_value[hidden_no].rec; DBUG_ASSERT(rec); - DBUG_PRINT("hidden", ("%d: %s \"%s\"", hidden_no, + DBUG_PRINT("hidden", ("%d: %s \"%s\"", hidden_no, hidden_col->getName(), llstr(rec->u_64_value(), buff))); } - //print_results(); + //DBUG_EXECUTE("value", print_results();); #endif - DBUG_VOID_RETURN; } /* Utility function to print/dump the fetched field + to avoid unnecessary work, wrap in DBUG_EXECUTE as in: + + DBUG_EXECUTE("value", print_results();); */ void ha_ndbcluster::print_results() @@ -2856,13 +3523,11 @@ void ha_ndbcluster::print_results() DBUG_ENTER("print_results"); #ifndef DBUG_OFF - if (!_db_on_) - DBUG_VOID_RETURN; char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH]; String type(buf_type, sizeof(buf_type), &my_charset_bin); String val(buf_val, sizeof(buf_val), &my_charset_bin); - for (uint f= 0; f < table->s->fields; f++) + for (uint f= 0; f < table_share->fields; f++) { /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */ char buf[2000]; @@ -2910,17 +3575,19 @@ print_value: } -int ha_ndbcluster::index_init(uint index) +int ha_ndbcluster::index_init(uint index, bool sorted) { DBUG_ENTER("ha_ndbcluster::index_init"); - DBUG_PRINT("enter", ("index: %u", index)); - /* + DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted)); + active_index= index; + m_sorted= sorted; + /* Locks are are explicitly released in scan unless m_lock.type == TL_READ_HIGH_PRIORITY and no sub-sequent call to unlock_row() - */ - m_lock_tuple= false; - DBUG_RETURN(handler::index_init(index)); + */ + m_lock_tuple= FALSE; + DBUG_RETURN(0); } @@ -2931,17 +3598,16 @@ int ha_ndbcluster::index_end() } /** - * Check if key contains nullable columns + * Check if key contains null */ static int -check_null_in_key(const KEY* key_info, const byte *key, uint key_len) +check_null_in_key(const KEY* key_info, const uchar *key, uint key_len) { KEY_PART_INFO *curr_part, *end_part; - const byte* end_ptr= key + key_len; + const uchar* end_ptr= key + key_len; curr_part= key_info->key_part; end_part= curr_part + key_info->key_parts; - for (; curr_part != end_part && key < end_ptr; curr_part++) { @@ -2953,59 +3619,20 @@ check_null_in_key(const KEY* key_info, const byte *key, uint key_len) return 0; } -int ha_ndbcluster::index_read(byte *buf, - const byte *key, uint key_len, +int ha_ndbcluster::index_read(uchar *buf, + const uchar *key, uint key_len, enum ha_rkey_function find_flag) { + key_range start_key; + bool descending= FALSE; DBUG_ENTER("ha_ndbcluster::index_read"); DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", active_index, key_len, find_flag)); - int error; - ndb_index_type type= get_index_type(active_index); - const KEY* key_info= table->key_info+active_index; - switch (type){ - case PRIMARY_KEY_ORDERED_INDEX: - case PRIMARY_KEY_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(pk_read(key, key_len, buf)); - } - else if (type == PRIMARY_KEY_INDEX) - { - DBUG_RETURN(1); - } - break; - case UNIQUE_ORDERED_INDEX: - case UNIQUE_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len && - !check_null_in_key(key_info, key, key_len)) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(unique_index_read(key, key_len, buf)); - } - else if (type == UNIQUE_INDEX) - { - DBUG_RETURN(unique_index_scan(key_info, key, key_len, buf)); - } - break; - case ORDERED_INDEX: - break; - default: - case UNDEFINED_INDEX: - DBUG_ASSERT(FALSE); - DBUG_RETURN(1); - break; - } - - key_range start_key; start_key.key= key; start_key.length= key_len; start_key.flag= find_flag; - bool descending= FALSE; + descending= FALSE; switch (find_flag) { case HA_READ_KEY_OR_PREV: case HA_READ_BEFORE_KEY: @@ -3016,101 +3643,113 @@ int ha_ndbcluster::index_read(byte *buf, default: break; } - error= ordered_index_scan(&start_key, 0, TRUE, descending, buf); - DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error); -} - - -int ha_ndbcluster::index_read_idx(byte *buf, uint index_no, - const byte *key, uint key_len, - enum ha_rkey_function find_flag) -{ - statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status); - DBUG_ENTER("ha_ndbcluster::index_read_idx"); - DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len)); - index_init(index_no); - DBUG_RETURN(index_read(buf, key, key_len, find_flag)); + DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending, + m_sorted, buf)); } -int ha_ndbcluster::index_next(byte *buf) +int ha_ndbcluster::index_next(uchar *buf) { DBUG_ENTER("ha_ndbcluster::index_next"); - statistic_increment(current_thd->status_var.ha_read_next_count, - &LOCK_status); + ha_statistic_increment(&SSV::ha_read_next_count); DBUG_RETURN(next_result(buf)); } -int ha_ndbcluster::index_prev(byte *buf) +int ha_ndbcluster::index_prev(uchar *buf) { DBUG_ENTER("ha_ndbcluster::index_prev"); - statistic_increment(current_thd->status_var.ha_read_prev_count, - &LOCK_status); + ha_statistic_increment(&SSV::ha_read_prev_count); DBUG_RETURN(next_result(buf)); } -int ha_ndbcluster::index_first(byte *buf) +int ha_ndbcluster::index_first(uchar *buf) { DBUG_ENTER("ha_ndbcluster::index_first"); - statistic_increment(current_thd->status_var.ha_read_first_count, - &LOCK_status); + ha_statistic_increment(&SSV::ha_read_first_count); // Start the ordered index scan and fetch the first row // Only HA_READ_ORDER indexes get called by index_first - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf)); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL)); } -int ha_ndbcluster::index_last(byte *buf) +int ha_ndbcluster::index_last(uchar *buf) { DBUG_ENTER("ha_ndbcluster::index_last"); - statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status); - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf)); + ha_statistic_increment(&SSV::ha_read_last_count); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL)); } -int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len) +int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len) { DBUG_ENTER("ha_ndbcluster::index_read_last"); DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST)); } -inline int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key, const key_range *end_key, - bool eq_r, bool sorted, - byte* buf) + bool desc, bool sorted, + uchar* buf) { - ndb_index_type type= get_index_type(active_index); -KEY* key_info; - int error= 1; + part_id_range part_spec; + ndb_index_type type= get_index_type(active_index); + const KEY* key_info= table->key_info+active_index; + int error; DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf"); - DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted)); + DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted)); + if (m_use_partition_function) + { + get_partition_set(table, buf, active_index, start_key, &part_spec); + DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u", + part_spec.start_part, part_spec.end_part)); + /* + If partition pruning has found no partition in set + we can return HA_ERR_END_OF_FILE + If partition pruning has found exactly one partition in set + we can optimize scan to run towards that partition only. + */ + if (part_spec.start_part > part_spec.end_part) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + else if (part_spec.start_part == part_spec.end_part) + { + /* + Only one partition is required to scan, if sorted is required we + don't need it any more since output from one ordered partitioned + index is always sorted. + */ + sorted= FALSE; + } + } + + m_write_op= FALSE; switch (type){ case PRIMARY_KEY_ORDERED_INDEX: case PRIMARY_KEY_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); - error= pk_read(start_key->key, start_key->length, buf); + error= pk_read(start_key->key, start_key->length, buf, + part_spec.start_part); DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); } break; case UNIQUE_ORDERED_INDEX: case UNIQUE_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT && !check_null_in_key(key_info, start_key->key, start_key->length)) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); + error= unique_index_read(start_key->key, start_key->length, buf); DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); } @@ -3123,25 +3762,19 @@ KEY* key_info; default: break; } - // Start the ordered index scan and fetch the first row - error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf); - DBUG_RETURN(error); + DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf, + &part_spec)); } - int ha_ndbcluster::read_range_first(const key_range *start_key, const key_range *end_key, bool eq_r, bool sorted) { - byte* buf= table->record[0]; + uchar* buf= table->record[0]; DBUG_ENTER("ha_ndbcluster::read_range_first"); - - DBUG_RETURN(read_range_first_to_buf(start_key, - end_key, - eq_r, - sorted, - buf)); + DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE, + sorted, buf)); } int ha_ndbcluster::read_range_next() @@ -3167,7 +3800,7 @@ int ha_ndbcluster::rnd_init(bool scan) DBUG_RETURN(-1); } } - index_init(table->s->primary_key); + index_init(table_share->primary_key, 0); DBUG_RETURN(0); } @@ -3178,10 +3811,10 @@ int ha_ndbcluster::close_scan() m_multi_cursor= 0; if (!m_active_cursor && !m_multi_cursor) - DBUG_RETURN(1); + DBUG_RETURN(0); NdbScanOperation *cursor= m_active_cursor ? m_active_cursor : m_multi_cursor; - + if (m_lock_tuple) { /* @@ -3196,12 +3829,12 @@ int ha_ndbcluster::close_scan() if (!(op= cursor->lockCurrentTuple())) { - m_lock_tuple= false; + m_lock_tuple= FALSE; ERR_RETURN(trans->getNdbError()); } m_ops_pending++; } - m_lock_tuple= false; + m_lock_tuple= FALSE; if (m_ops_pending) { /* @@ -3209,7 +3842,7 @@ int ha_ndbcluster::close_scan() deleteing/updating transaction before closing the scan */ DBUG_PRINT("info", ("ops_pending: %ld", (long) m_ops_pending)); - if (execute_no_commit(this,trans,false) != 0) { + if (execute_no_commit(this,trans,FALSE) != 0) { no_uncommitted_rows_execute_failure(); DBUG_RETURN(ndb_err(trans)); } @@ -3228,11 +3861,10 @@ int ha_ndbcluster::rnd_end() } -int ha_ndbcluster::rnd_next(byte *buf) +int ha_ndbcluster::rnd_next(uchar *buf) { DBUG_ENTER("rnd_next"); - statistic_increment(current_thd->status_var.ha_read_rnd_next_count, - &LOCK_status); + ha_statistic_increment(&SSV::ha_read_rnd_next_count); if (!m_active_cursor) DBUG_RETURN(full_table_scan(buf)); @@ -3247,14 +3879,43 @@ int ha_ndbcluster::rnd_next(byte *buf) again */ -int ha_ndbcluster::rnd_pos(byte *buf, byte *pos) +int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos) { DBUG_ENTER("rnd_pos"); - statistic_increment(current_thd->status_var.ha_read_rnd_count, - &LOCK_status); + ha_statistic_increment(&SSV::ha_read_rnd_count); // The primary key for the record is stored in pos // Perform a pk_read using primary key "index" - DBUG_RETURN(pk_read(pos, ref_length, buf)); + { + part_id_range part_spec; + uint key_length= ref_length; + if (m_use_partition_function) + { + if (table_share->primary_key == MAX_KEY) + { + /* + The partition id has been fetched from ndb + and has been stored directly after the hidden key + */ + DBUG_DUMP("key+part", pos, key_length); + key_length= ref_length - sizeof(m_part_id); + part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length); + } + else + { + key_range key_spec; + KEY *key_info= table->key_info + table_share->primary_key; + key_spec.key= pos; + key_spec.length= key_length; + key_spec.flag= HA_READ_KEY_EXACT; + get_full_part_id_from_key(table, buf, key_info, + &key_spec, &part_spec); + DBUG_ASSERT(part_spec.start_part == part_spec.end_part); + } + DBUG_PRINT("info", ("partition id %u", part_spec.start_part)); + } + DBUG_DUMP("key", pos, key_length); + DBUG_RETURN(pk_read(pos, key_length, buf, part_spec.start_part)); + } } @@ -3264,17 +3925,20 @@ int ha_ndbcluster::rnd_pos(byte *buf, byte *pos) using "reference" in rnd_pos */ -void ha_ndbcluster::position(const byte *record) +void ha_ndbcluster::position(const uchar *record) { KEY *key_info; KEY_PART_INFO *key_part; KEY_PART_INFO *end; - byte *buff; + uchar *buff; + uint key_length; + DBUG_ENTER("position"); - if (table->s->primary_key != MAX_KEY) + if (table_share->primary_key != MAX_KEY) { - key_info= table->key_info + table->s->primary_key; + key_length= ref_length; + key_info= table->key_info + table_share->primary_key; key_part= key_info->key_part; end= key_part + key_info->key_parts; buff= ref; @@ -3292,7 +3956,7 @@ void ha_ndbcluster::position(const byte *record) } size_t len = key_part->length; - const byte * ptr = record + key_part->offset; + const uchar * ptr = record + key_part->offset; Field *field = key_part->field; if (field->type() == MYSQL_TYPE_VARCHAR) { @@ -3322,18 +3986,30 @@ void ha_ndbcluster::position(const byte *record) { // No primary key, get hidden key DBUG_PRINT("info", ("Getting hidden key")); + // If table has user defined partition save the partition id as well + if(m_use_partition_function) + { + DBUG_PRINT("info", ("Saving partition id %u", m_part_id)); + key_length= ref_length - sizeof(m_part_id); + memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id)); + } + else + key_length= ref_length; #ifndef DBUG_OFF int hidden_no= table->s->fields; - const NDBTAB *tab= (const NDBTAB *) m_table; + const NDBTAB *tab= m_table; const NDBCOL *hidden_col= tab->getColumn(hidden_no); DBUG_ASSERT(hidden_col->getPrimaryKey() && hidden_col->getAutoIncrement() && - ref_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH); + key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH); #endif - memcpy(ref, m_ref, ref_length); + memcpy(ref, m_ref, key_length); } - - DBUG_DUMP("ref", (char*)ref, ref_length); +#ifndef DBUG_OFF + if (table_share->primary_key == MAX_KEY && m_use_partition_function) + DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id)); +#endif + DBUG_DUMP("ref", ref, key_length); DBUG_VOID_RETURN; } @@ -3356,7 +4032,7 @@ int ha_ndbcluster::info(uint flag) if (m_table_info) { if (m_ha_not_exact_count) - records= 100; + stats.records= 100; else result= records_update(); } @@ -3365,23 +4041,24 @@ int ha_ndbcluster::info(uint flag) if ((my_errno= check_ndb_connection())) DBUG_RETURN(my_errno); Ndb *ndb= get_ndb(); + ndb->setDatabaseName(m_dbname); struct Ndb_statistics stat; if (ndb->setDatabaseName(m_dbname)) { DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM); } if (current_thd->variables.ndb_use_exact_count && - (result= ndb_get_table_statistics(this, true, ndb, m_tabname, &stat)) + (result= ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat)) == 0) { - mean_rec_length= stat.row_size; - data_file_length= stat.fragment_memory; - records= stat.row_count; + stats.mean_rec_length= stat.row_size; + stats.data_file_length= stat.fragment_memory; + stats.records= stat.row_count; } else { - mean_rec_length= 0; - records= 100; + stats.mean_rec_length= 0; + stats.records= 100; } } } @@ -3403,18 +4080,19 @@ int ha_ndbcluster::info(uint flag) if ((my_errno= check_ndb_connection())) DBUG_RETURN(my_errno); Ndb *ndb= get_ndb(); + Ndb_tuple_id_range_guard g(m_share); Uint64 auto_increment_value64; - if (ndb->readAutoIncrementValue((const NDBTAB *) m_table, + if (ndb->readAutoIncrementValue(m_table, g.range, auto_increment_value64) == -1) { const NdbError err= ndb->getNdbError(); sql_print_error("Error %lu in readAutoIncrementValue(): %s", (ulong) err.code, err.message); - auto_increment_value= ~(Uint64)0; + stats.auto_increment_value= ~(ulonglong)0; } else - auto_increment_value= (ulonglong)auto_increment_value64; + stats.auto_increment_value= (ulonglong)auto_increment_value64; } } @@ -3425,86 +4103,23 @@ int ha_ndbcluster::info(uint flag) } +void ha_ndbcluster::get_dynamic_partition_info(PARTITION_INFO *stat_info, + uint part_id) +{ + /* + This functions should be fixed. Suggested fix: to + implement ndb function which retrives the statistics + about ndb partitions. + */ + bzero((char*) stat_info, sizeof(PARTITION_INFO)); + return; +} + + int ha_ndbcluster::extra(enum ha_extra_function operation) { DBUG_ENTER("extra"); switch (operation) { - case HA_EXTRA_NORMAL: /* Optimize for space (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NORMAL")); - break; - case HA_EXTRA_QUICK: /* Optimize for speed */ - DBUG_PRINT("info", ("HA_EXTRA_QUICK")); - break; - case HA_EXTRA_RESET: /* Reset database to after open */ - DBUG_PRINT("info", ("HA_EXTRA_RESET")); - reset(); - break; - case HA_EXTRA_CACHE: /* Cash record in HA_rrnd() */ - DBUG_PRINT("info", ("HA_EXTRA_CACHE")); - break; - case HA_EXTRA_NO_CACHE: /* End cacheing of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_CACHE")); - break; - case HA_EXTRA_NO_READCHECK: /* No readcheck on update */ - DBUG_PRINT("info", ("HA_EXTRA_NO_READCHECK")); - break; - case HA_EXTRA_READCHECK: /* Use readcheck (def) */ - DBUG_PRINT("info", ("HA_EXTRA_READCHECK")); - break; - case HA_EXTRA_KEYREAD: /* Read only key to database */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD")); - break; - case HA_EXTRA_NO_KEYREAD: /* Normal read of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD")); - break; - case HA_EXTRA_NO_USER_CHANGE: /* No user is allowed to write */ - DBUG_PRINT("info", ("HA_EXTRA_NO_USER_CHANGE")); - break; - case HA_EXTRA_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_KEY_CACHE")); - break; - case HA_EXTRA_NO_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_NO_KEY_CACHE")); - break; - case HA_EXTRA_WAIT_LOCK: /* Wait until file is avalably (def) */ - DBUG_PRINT("info", ("HA_EXTRA_WAIT_LOCK")); - break; - case HA_EXTRA_NO_WAIT_LOCK: /* If file is locked, return quickly */ - DBUG_PRINT("info", ("HA_EXTRA_NO_WAIT_LOCK")); - break; - case HA_EXTRA_WRITE_CACHE: /* Use write cache in ha_write() */ - DBUG_PRINT("info", ("HA_EXTRA_WRITE_CACHE")); - break; - case HA_EXTRA_FLUSH_CACHE: /* flush write_record_cache */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH_CACHE")); - break; - case HA_EXTRA_NO_KEYS: /* Remove all update of keys */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYS")); - break; - case HA_EXTRA_KEYREAD_CHANGE_POS: /* Keyread, but change pos */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_CHANGE_POS")); /* xxxxchk -r must be used */ - break; - case HA_EXTRA_REMEMBER_POS: /* Remember pos for next/prev */ - DBUG_PRINT("info", ("HA_EXTRA_REMEMBER_POS")); - break; - case HA_EXTRA_RESTORE_POS: - DBUG_PRINT("info", ("HA_EXTRA_RESTORE_POS")); - break; - case HA_EXTRA_REINIT_CACHE: /* init cache from current record */ - DBUG_PRINT("info", ("HA_EXTRA_REINIT_CACHE")); - break; - case HA_EXTRA_FORCE_REOPEN: /* Datafile have changed on disk */ - DBUG_PRINT("info", ("HA_EXTRA_FORCE_REOPEN")); - break; - case HA_EXTRA_FLUSH: /* Flush tables to disk */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH")); - break; - case HA_EXTRA_NO_ROWS: /* Don't write rows */ - DBUG_PRINT("info", ("HA_EXTRA_NO_ROWS")); - break; - case HA_EXTRA_RESET_STATE: /* Reset positions */ - DBUG_PRINT("info", ("HA_EXTRA_RESET_STATE")); - break; case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/ DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY")); DBUG_PRINT("info", ("Ignoring duplicate key")); @@ -3514,36 +4129,20 @@ int ha_ndbcluster::extra(enum ha_extra_function operation) DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY")); m_ignore_dup_key= FALSE; break; - case HA_EXTRA_RETRIEVE_ALL_COLS: /* Retrieve all columns, not just those - where field->query_id is the same as - the current query id */ - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_ALL_COLS")); - m_retrieve_all_fields= TRUE; + case HA_EXTRA_IGNORE_NO_KEY: + DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY")); + DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit")); + m_ignore_no_key= TRUE; break; - case HA_EXTRA_PREPARE_FOR_DELETE: - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_DELETE")); - break; - case HA_EXTRA_PREPARE_FOR_UPDATE: /* Remove read cache if problems */ - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_UPDATE")); - break; - case HA_EXTRA_PRELOAD_BUFFER_SIZE: - DBUG_PRINT("info", ("HA_EXTRA_PRELOAD_BUFFER_SIZE")); - break; - case HA_EXTRA_RETRIEVE_PRIMARY_KEY: - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_PRIMARY_KEY")); - m_retrieve_primary_key= TRUE; - break; - case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_UNIQUE")); - break; - case HA_EXTRA_CHANGE_KEY_TO_DUP: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_DUP")); - case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_PRESERVE_FIELDS")); + case HA_EXTRA_NO_IGNORE_NO_KEY: + DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY")); + DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit")); + m_ignore_no_key= FALSE; break; case HA_EXTRA_WRITE_CAN_REPLACE: DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE")); - if (!m_has_unique_index) + if (!m_has_unique_index || + current_thd->slave_thread) /* always set if slave, quick fix for bug 27378 */ { DBUG_PRINT("info", ("Turning ON use of write instead of insert")); m_use_write= TRUE; @@ -3578,13 +4177,21 @@ int ha_ndbcluster::reset() m_cond->cond_clear(); } + /* + Regular partition pruning will set the bitmap appropriately. + Some queries like ALTER TABLE doesn't use partition pruning and + thus the 'used_partitions' bitmap needs to be initialized + */ + if (m_part_info) + bitmap_set_all(&m_part_info->used_partitions); + /* reset flags set by extra calls */ - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; m_ignore_dup_key= FALSE; m_use_write= FALSE; + m_ignore_no_key= FALSE; m_delete_cannot_batch= FALSE; m_update_cannot_batch= FALSE; + DBUG_RETURN(0); } @@ -3602,7 +4209,7 @@ int ha_ndbcluster::reset() void ha_ndbcluster::start_bulk_insert(ha_rows rows) { int bytes, batch; - const NDBTAB *tab= (const NDBTAB *) m_table; + const NDBTAB *tab= m_table; DBUG_ENTER("start_bulk_insert"); DBUG_PRINT("enter", ("rows: %d", (int)rows)); @@ -3660,12 +4267,12 @@ int ha_ndbcluster::end_bulk_insert() NdbTransaction *trans= m_active_trans; // Send rows to NDB DBUG_PRINT("info", ("Sending inserts to NDB, "\ - "rows_inserted:%d, bulk_insert_rows: %d", + "rows_inserted: %d bulk_insert_rows: %d", (int) m_rows_inserted, (int) m_bulk_insert_rows)); m_bulk_insert_not_flushed= FALSE; if (m_transaction_on) { - if (execute_no_commit(this, trans,false) != 0) + if (execute_no_commit(this, trans,FALSE) != 0) { no_uncommitted_rows_execute_failure(); my_errno= error= ndb_err(trans); @@ -3718,7 +4325,7 @@ const char** ha_ndbcluster::bas_ext() const double ha_ndbcluster::scan_time() { DBUG_ENTER("ha_ndbcluster::scan_time()"); - double res= rows2double(records*1000); + double res= rows2double(stats.records*1000); DBUG_PRINT("exit", ("table: %s value: %f", m_tabname, res)); DBUG_RETURN(res); @@ -3798,12 +4405,166 @@ THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd, - refresh list of the indexes for the table if needed (if altered) */ +#ifdef HAVE_NDB_BINLOG +extern MASTER_INFO *active_mi; +static int ndbcluster_update_apply_status(THD *thd, int do_update) +{ + Thd_ndb *thd_ndb= get_thd_ndb(thd); + Ndb *ndb= thd_ndb->ndb; + NDBDICT *dict= ndb->getDictionary(); + const NDBTAB *ndbtab; + NdbTransaction *trans= thd_ndb->trans; + ndb->setDatabaseName(NDB_REP_DB); + Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE); + if (!(ndbtab= ndbtab_g.get_table())) + { + return -1; + } + NdbOperation *op= 0; + int r= 0; + r|= (op= trans->getNdbOperation(ndbtab)) == 0; + DBUG_ASSERT(r == 0); + if (do_update) + r|= op->updateTuple(); + else + r|= op->writeTuple(); + DBUG_ASSERT(r == 0); + // server_id + r|= op->equal(0u, (Uint32)thd->server_id); + DBUG_ASSERT(r == 0); + if (!do_update) + { + // epoch + r|= op->setValue(1u, (Uint64)0); + DBUG_ASSERT(r == 0); + } + // log_name + char tmp_buf[FN_REFLEN]; + ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf, + active_mi->rli.group_master_log_name, + strlen(active_mi->rli.group_master_log_name)); + r|= op->setValue(2u, tmp_buf); + DBUG_ASSERT(r == 0); + // start_pos + r|= op->setValue(3u, (Uint64)active_mi->rli.group_master_log_pos); + DBUG_ASSERT(r == 0); + // end_pos + r|= op->setValue(4u, (Uint64)active_mi->rli.group_master_log_pos + + ((Uint64)active_mi->rli.future_event_relay_log_pos - + (Uint64)active_mi->rli.group_relay_log_pos)); + DBUG_ASSERT(r == 0); + return 0; +} +#endif /* HAVE_NDB_BINLOG */ + +void ha_ndbcluster::transaction_checks(THD *thd) +{ + if (thd->lex->sql_command == SQLCOM_LOAD) + { + m_transaction_on= FALSE; + /* Would be simpler if has_transactions() didn't always say "yes" */ + thd->transaction.all.modified_non_trans_table= + thd->transaction.stmt.modified_non_trans_table= TRUE; + } + else if (!thd->transaction.on) + m_transaction_on= FALSE; + else + m_transaction_on= thd->variables.ndb_use_transactions; +} + +int ha_ndbcluster::start_statement(THD *thd, + Thd_ndb *thd_ndb, + Ndb *ndb) +{ + DBUG_ENTER("ha_ndbcluster::start_statement"); + PRINT_OPTION_FLAGS(thd); + + trans_register_ha(thd, FALSE, ndbcluster_hton); + if (!thd_ndb->trans) + { + if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + trans_register_ha(thd, TRUE, ndbcluster_hton); + DBUG_PRINT("trans",("Starting transaction")); + thd_ndb->trans= ndb->startTransaction(); + if (thd_ndb->trans == NULL) + ERR_RETURN(ndb->getNdbError()); + thd_ndb->init_open_tables(); + thd_ndb->query_state&= NDB_QUERY_NORMAL; + thd_ndb->trans_options= 0; + thd_ndb->m_slow_path= FALSE; + if (!(thd->options & OPTION_BIN_LOG) || + thd->variables.binlog_format == BINLOG_FORMAT_STMT) + { + thd_ndb->trans_options|= TNTO_NO_LOGGING; + thd_ndb->m_slow_path= TRUE; + } + else if (thd->slave_thread) + thd_ndb->m_slow_path= TRUE; + } + /* + If this is the start of a LOCK TABLE, a table look + should be taken on the table in NDB + + Check if it should be read or write lock + */ + if (thd->options & (OPTION_TABLE_LOCK)) + { + //lockThisTable(); + DBUG_PRINT("info", ("Locking the table..." )); + } + DBUG_RETURN(0); +} + +int ha_ndbcluster::init_handler_for_statement(THD *thd, Thd_ndb *thd_ndb) +{ + /* + This is the place to make sure this handler instance + has a started transaction. + + The transaction is started by the first handler on which + MySQL Server calls external lock + + Other handlers in the same stmt or transaction should use + the same NDB transaction. This is done by setting up the m_active_trans + pointer to point to the NDB transaction. + */ + + DBUG_ENTER("ha_ndbcluster::init_handler_for_statement"); + // store thread specific data first to set the right context + m_force_send= thd->variables.ndb_force_send; + m_ha_not_exact_count= !thd->variables.ndb_use_exact_count; + m_autoincrement_prefetch= + (thd->variables.ndb_autoincrement_prefetch_sz > + NDB_DEFAULT_AUTO_PREFETCH) ? + (ha_rows) thd->variables.ndb_autoincrement_prefetch_sz + : (ha_rows) NDB_DEFAULT_AUTO_PREFETCH; + m_active_trans= thd_ndb->trans; + DBUG_ASSERT(m_active_trans); + // Start of transaction + m_rows_changed= 0; + m_ops_pending= 0; + m_slow_path= thd_ndb->m_slow_path; +#ifdef HAVE_NDB_BINLOG + if (unlikely(m_slow_path)) + { + if (m_share == ndb_apply_status_share && thd->slave_thread) + thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS; + } +#endif + // TODO remove double pointers... + if (!(m_thd_ndb_share= thd_ndb->get_open_table(thd, m_table))) + { + DBUG_RETURN(1); + } + m_table_info= &m_thd_ndb_share->stat; + DBUG_RETURN(0); +} + int ha_ndbcluster::external_lock(THD *thd, int lock_type) { int error=0; - NdbTransaction* trans= NULL; - DBUG_ENTER("external_lock"); + /* Check that this handler instance has a connection set up to the Ndb object of thd @@ -3814,150 +4575,23 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) Thd_ndb *thd_ndb= get_thd_ndb(thd); Ndb *ndb= thd_ndb->ndb; - DBUG_PRINT("enter", ("thd: 0x%lx thd_ndb: 0x%lx thd_ndb->lock_count: %d", - (long) thd, (long) thd_ndb, thd_ndb->lock_count)); + DBUG_PRINT("enter", ("this: 0x%lx thd: 0x%lx thd_ndb: %lx " + "thd_ndb->lock_count: %d", + (long) this, (long) thd, (long) thd_ndb, + thd_ndb->lock_count)); if (lock_type != F_UNLCK) { DBUG_PRINT("info", ("lock_type != F_UNLCK")); - if (thd->lex->sql_command == SQLCOM_LOAD) - { - m_transaction_on= FALSE; - /* Would be simpler if has_transactions() didn't always say "yes" */ - thd->no_trans_update.all= thd->no_trans_update.stmt= TRUE; - } - else if (!thd->transaction.on) - m_transaction_on= FALSE; - else - m_transaction_on= thd->variables.ndb_use_transactions; + transaction_checks(thd); if (!thd_ndb->lock_count++) { - PRINT_OPTION_FLAGS(thd); - if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) - { - // Autocommit transaction - DBUG_ASSERT(!thd_ndb->stmt); - DBUG_PRINT("trans",("Starting transaction stmt")); - - trans= ndb->startTransaction(); - if (trans == NULL) - ERR_RETURN(ndb->getNdbError()); - no_uncommitted_rows_reset(thd); - thd_ndb->stmt= trans; - thd_ndb->query_state&= NDB_QUERY_NORMAL; - trans_register_ha(thd, FALSE, &ndbcluster_hton); - } - else - { - if (!thd_ndb->all) - { - // Not autocommit transaction - // A "master" transaction ha not been started yet - DBUG_PRINT("trans",("starting transaction, all")); - - trans= ndb->startTransaction(); - if (trans == NULL) - ERR_RETURN(ndb->getNdbError()); - no_uncommitted_rows_reset(thd); - thd_ndb->all= trans; - thd_ndb->query_state&= NDB_QUERY_NORMAL; - trans_register_ha(thd, TRUE, &ndbcluster_hton); - - /* - If this is the start of a LOCK TABLE, a table look - should be taken on the table in NDB - - Check if it should be read or write lock - */ - if (thd->options & (OPTION_TABLE_LOCK)) - { - //lockThisTable(); - DBUG_PRINT("info", ("Locking the table..." )); - } - - } - } - } - /* - This is the place to make sure this handler instance - has a started transaction. - - The transaction is started by the first handler on which - MySQL Server calls external lock - - Other handlers in the same stmt or transaction should use - the same NDB transaction. This is done by setting up the m_active_trans - pointer to point to the NDB transaction. - */ - - // store thread specific data first to set the right context - m_force_send= thd->variables.ndb_force_send; - m_ha_not_exact_count= !thd->variables.ndb_use_exact_count; - m_autoincrement_prefetch= - (thd->variables.ndb_autoincrement_prefetch_sz > - NDB_DEFAULT_AUTO_PREFETCH) ? - (ha_rows) thd->variables.ndb_autoincrement_prefetch_sz - : (ha_rows) NDB_DEFAULT_AUTO_PREFETCH; - m_active_trans= thd_ndb->all ? thd_ndb->all : thd_ndb->stmt; - DBUG_ASSERT(m_active_trans); - // Start of transaction - m_rows_changed= 0; - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; - m_ops_pending= 0; - { - NDBDICT *dict= ndb->getDictionary(); - const NDBTAB *tab; - void *tab_info; - if (!(tab= dict->getTable(m_tabname, &tab_info))) - ERR_RETURN(dict->getNdbError()); - DBUG_PRINT("info", ("Table schema version: %d", - tab->getObjectVersion())); - // Check if thread has stale local cache - // New transaction must not use old tables... (trans != 0) - // Running might... - if ((trans && tab->getObjectStatus() != NdbDictionary::Object::Retrieved) - || tab->getObjectStatus() == NdbDictionary::Object::Invalid) - { - invalidate_dictionary_cache(FALSE); - if (!(tab= dict->getTable(m_tabname, &tab_info))) - ERR_RETURN(dict->getNdbError()); - DBUG_PRINT("info", ("Table schema version: %d", - tab->getObjectVersion())); - } - if (m_table_version < tab->getObjectVersion()) - { - /* - The table has been altered, caller has to retry - */ - NdbError err= ndb->getNdbError(NDB_INVALID_SCHEMA_OBJECT); - DBUG_RETURN(ndb_to_mysql_error(&err)); - } - if (m_table != (void *)tab) - { - m_table= (void *)tab; - m_table_version = tab->getObjectVersion(); - if ((my_errno= build_index_list(ndb, table, ILBP_OPEN))) - DBUG_RETURN(my_errno); - - const void *data= NULL, *pack_data= NULL; - uint length, pack_length; - if (readfrm(table->s->path, &data, &length) || - packfrm(data, length, &pack_data, &pack_length) || - pack_length != tab->getFrmLength() || - memcmp(pack_data, tab->getFrmData(), pack_length)) - { - my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); - my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR)); - NdbError err= ndb->getNdbError(NDB_INVALID_SCHEMA_OBJECT); - DBUG_RETURN(ndb_to_mysql_error(&err)); - } - my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); - my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR)); - } - m_table_info= tab_info; + if ((error= start_statement(thd, thd_ndb, ndb))) + goto error; } - no_uncommitted_rows_init(thd); + if ((error= init_handler_for_statement(thd, thd_ndb))) + goto error; + DBUG_RETURN(0); } else { @@ -3985,16 +4619,19 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) DBUG_PRINT("trans", ("Last external_lock")); PRINT_OPTION_FLAGS(thd); - if (thd_ndb->stmt) + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - /* - Unlock is done without a transaction commit / rollback. - This happens if the thread didn't update any rows - We must in this case close the transaction to release resources - */ - DBUG_PRINT("trans",("ending non-updating transaction")); - ndb->closeTransaction(m_active_trans); - thd_ndb->stmt= NULL; + if (thd_ndb->trans) + { + /* + Unlock is done without a transaction commit / rollback. + This happens if the thread didn't update any rows + We must in this case close the transaction to release resources + */ + DBUG_PRINT("trans",("ending non-updating transaction")); + ndb->closeTransaction(thd_ndb->trans); + thd_ndb->trans= NULL; + } } } m_table_info= NULL; @@ -4023,7 +4660,10 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) if (m_ops_pending) DBUG_PRINT("warning", ("ops_pending != 0L")); m_ops_pending= 0; + DBUG_RETURN(0); } +error: + thd_ndb->lock_count--; DBUG_RETURN(error); } @@ -4039,7 +4679,7 @@ void ha_ndbcluster::unlock_row() DBUG_ENTER("unlock_row"); DBUG_PRINT("info", ("Unlocking row")); - m_lock_tuple= false; + m_lock_tuple= FALSE; DBUG_VOID_RETURN; } @@ -4055,27 +4695,20 @@ int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type) { int error=0; DBUG_ENTER("start_stmt"); - PRINT_OPTION_FLAGS(thd); Thd_ndb *thd_ndb= get_thd_ndb(thd); - NdbTransaction *trans= (thd_ndb->stmt)?thd_ndb->stmt:thd_ndb->all; - if (!trans){ + transaction_checks(thd); + if (!thd_ndb->start_stmt_count++) + { Ndb *ndb= thd_ndb->ndb; - DBUG_PRINT("trans",("Starting transaction stmt")); - trans= ndb->startTransaction(); - if (trans == NULL) - ERR_RETURN(ndb->getNdbError()); - no_uncommitted_rows_reset(thd); - thd_ndb->stmt= trans; - thd_ndb->query_state&= NDB_QUERY_NORMAL; - trans_register_ha(thd, FALSE, &ndbcluster_hton); + if ((error= start_statement(thd, thd_ndb, ndb))) + goto error; } - m_active_trans= trans; - // Start of statement - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; - m_ops_pending= 0; - + if ((error= init_handler_for_statement(thd, thd_ndb))) + goto error; + DBUG_RETURN(0); +error: + thd_ndb->start_stmt_count--; DBUG_RETURN(error); } @@ -4084,34 +4717,55 @@ int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type) Commit a transaction started in NDB */ -int ndbcluster_commit(THD *thd, bool all) +static int ndbcluster_commit(handlerton *hton, THD *thd, bool all) { int res= 0; Thd_ndb *thd_ndb= get_thd_ndb(thd); Ndb *ndb= thd_ndb->ndb; - NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt; + NdbTransaction *trans= thd_ndb->trans; DBUG_ENTER("ndbcluster_commit"); - DBUG_PRINT("transaction",("%s", - trans == thd_ndb->stmt ? - "stmt" : "all")); - DBUG_ASSERT(ndb && trans); + DBUG_ASSERT(ndb); + PRINT_OPTION_FLAGS(thd); + DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt"))); + thd_ndb->start_stmt_count= 0; + if (trans == NULL || (!all && + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + /* + An odditity in the handler interface is that commit on handlerton + is called to indicate end of statement only in cases where + autocommit isn't used and the all flag isn't set. + + We also leave quickly when a transaction haven't even been started, + in this case we are safe that no clean up is needed. In this case + the MySQL Server could handle the query without contacting the + NDB kernel. + */ + DBUG_PRINT("info", ("Commit before start or end-of-statement only")); + DBUG_RETURN(0); + } + +#ifdef HAVE_NDB_BINLOG + if (unlikely(thd_ndb->m_slow_path)) + { + if (thd->slave_thread) + ndbcluster_update_apply_status + (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS); + } +#endif /* HAVE_NDB_BINLOG */ if (execute_commit(thd,trans) != 0) { const NdbError err= trans->getNdbError(); const NdbOperation *error_op= trans->getNdbErrorOperation(); - ERR_PRINT(err); + set_ndb_err(thd, err); res= ndb_to_mysql_error(&err); if (res != -1) ndbcluster_print_error(res, error_op); } ndb->closeTransaction(trans); - - if (all) - thd_ndb->all= NULL; - else - thd_ndb->stmt= NULL; + thd_ndb->trans= NULL; /* Clear commit_count for tables changed by transaction */ NDB_SHARE* share; @@ -4135,34 +4789,35 @@ int ndbcluster_commit(THD *thd, bool all) Rollback a transaction started in NDB */ -int ndbcluster_rollback(THD *thd, bool all) +static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all) { int res= 0; Thd_ndb *thd_ndb= get_thd_ndb(thd); Ndb *ndb= thd_ndb->ndb; - NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt; + NdbTransaction *trans= thd_ndb->trans; DBUG_ENTER("ndbcluster_rollback"); - DBUG_PRINT("transaction",("%s", - trans == thd_ndb->stmt ? - "stmt" : "all")); - DBUG_ASSERT(ndb && trans); + DBUG_ASSERT(ndb); + thd_ndb->start_stmt_count= 0; + if (trans == NULL || (!all && + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + /* Ignore end-of-statement until real rollback or commit is called */ + DBUG_PRINT("info", ("Rollback before start or end-of-statement only")); + DBUG_RETURN(0); + } if (trans->execute(NdbTransaction::Rollback) != 0) { const NdbError err= trans->getNdbError(); const NdbOperation *error_op= trans->getNdbErrorOperation(); - ERR_PRINT(err); + set_ndb_err(thd, err); res= ndb_to_mysql_error(&err); if (res != -1) ndbcluster_print_error(res, error_op); } ndb->closeTransaction(trans); - - if (all) - thd_ndb->all= NULL; - else - thd_ndb->stmt= NULL; + thd_ndb->trans= NULL; /* Clear list of tables changed by transaction */ thd_ndb->changed_tables.empty(); @@ -4467,105 +5122,93 @@ static int create_ndb_column(NDBCOL &col, /* Create a table in NDB Cluster - */ - -static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length) -{ - ha_rows max_rows= form->s->max_rows; - ha_rows min_rows= form->s->min_rows; - if (max_rows < min_rows) - max_rows= min_rows; - if (max_rows == (ha_rows)0) /* default setting, don't set fragmentation */ - return; - /** - * get the number of fragments right - */ - uint no_fragments; - { -#if MYSQL_VERSION_ID >= 50000 - uint acc_row_size= 25 + /*safety margin*/ 2; -#else - uint acc_row_size= pk_length*4; - /* add acc overhead */ - if (pk_length <= 8) /* main page will set the limit */ - acc_row_size+= 25 + /*safety margin*/ 2; - else /* overflow page will set the limit */ - acc_row_size+= 4 + /*safety margin*/ 4; -#endif - ulonglong acc_fragment_size= 512*1024*1024; - /* - * if not --with-big-tables then max_rows is ulong - * the warning in this case is misleading though - */ - ulonglong big_max_rows = (ulonglong)max_rows; -#if MYSQL_VERSION_ID >= 50100 - no_fragments= (big_max_rows*acc_row_size)/acc_fragment_size+1; -#else - no_fragments= ((big_max_rows*acc_row_size)/acc_fragment_size+1 - +1/*correct rounding*/)/2; -#endif - } - { - uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); - NDBTAB::FragmentType ftype; - if (no_fragments > 2*no_nodes) - { - ftype= NDBTAB::FragAllLarge; - if (no_fragments > 4*no_nodes) - push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "Ndb might have problems storing the max amount of rows specified"); - } - else if (no_fragments > no_nodes) - ftype= NDBTAB::FragAllMedium; - else - ftype= NDBTAB::FragAllSmall; - tab.setFragmentType(ftype); - } - tab.setMaxRows(max_rows); - tab.setMinRows(min_rows); -} +*/ int ha_ndbcluster::create(const char *name, TABLE *form, HA_CREATE_INFO *create_info) { + THD *thd= current_thd; NDBTAB tab; NDBCOL col; - uint pack_length, length, i, pk_length= 0; - const void *data= NULL, *pack_data= NULL; - char name2[FN_HEADLEN]; + size_t pack_length, length; + uint i, pk_length= 0; + uchar *data= NULL, *pack_data= NULL; bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE); + bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE); + char tablespace[FN_LEN]; + NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked; DBUG_ENTER("ha_ndbcluster::create"); DBUG_PRINT("enter", ("name: %s", name)); - fn_format(name2, name, "", "",2); // Remove the .frm extension - set_dbname(name2); - set_tabname(name2); - if (current_thd->lex->sql_command == SQLCOM_TRUNCATE) + DBUG_ASSERT(*fn_rext((char*)name) == 0); + set_dbname(name); + set_tabname(name); + + if ((my_errno= check_ndb_connection())) + DBUG_RETURN(my_errno); + + Ndb *ndb= get_ndb(); + NDBDICT *dict= ndb->getDictionary(); + + if (is_truncate) { + { + Ndb_table_guard ndbtab_g(dict, m_tabname); + if (!(m_table= ndbtab_g.get_table())) + ERR_RETURN(dict->getNdbError()); + if ((get_tablespace_name(thd, tablespace, FN_LEN))) + create_info->tablespace= tablespace; + m_table= NULL; + } DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE")); if ((my_errno= delete_table(name))) DBUG_RETURN(my_errno); } + table= form; if (create_from_engine) { /* - Table alreay exists in NDB and frm file has been created by + Table already exists in NDB and frm file has been created by caller. Do Ndb specific stuff, such as create a .ndb file */ - my_errno= write_ndb_file(); + if ((my_errno= write_ndb_file(name))) + DBUG_RETURN(my_errno); +#ifdef HAVE_NDB_BINLOG + ndbcluster_create_binlog_setup(get_ndb(), name, strlen(name), + m_dbname, m_tabname, FALSE); +#endif /* HAVE_NDB_BINLOG */ DBUG_RETURN(my_errno); } +#ifdef HAVE_NDB_BINLOG + /* + Don't allow table creation unless + schema distribution table is setup + ( unless it is a creation of the schema dist table itself ) + */ + if (!ndb_schema_share) + { + if (!(strcmp(m_dbname, NDB_REP_DB) == 0 && + strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0)) + { + DBUG_PRINT("info", ("Schema distribution table not setup")); + DBUG_RETURN(HA_ERR_NO_CONNECTION); + } + single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite; + } +#endif /* HAVE_NDB_BINLOG */ + DBUG_PRINT("table", ("name: %s", m_tabname)); if (tab.setName(m_tabname)) { DBUG_RETURN(my_errno= errno); } tab.setLogging(!(create_info->options & HA_LEX_CREATE_TMP_TABLE)); - + tab.setSingleUserMode(single_user_mode); + // Save frm data for this table if (readfrm(name, &data, &length)) DBUG_RETURN(1); @@ -4574,12 +5217,64 @@ int ha_ndbcluster::create(const char *name, my_free((char*)data, MYF(0)); DBUG_RETURN(2); } - - DBUG_PRINT("info", ("setFrm data: 0x%lx len: %d", (long) pack_data, pack_length)); + DBUG_PRINT("info", + ("setFrm data: 0x%lx len: %lu", (long) pack_data, + (ulong) pack_length)); tab.setFrm(pack_data, pack_length); my_free((char*)data, MYF(0)); my_free((char*)pack_data, MYF(0)); + /* + Check for disk options + */ + if (create_info->storage_media == HA_SM_DISK) + { + if (create_info->tablespace) + tab.setTablespaceName(create_info->tablespace); + else + tab.setTablespaceName("DEFAULT-TS"); + } + else if (create_info->tablespace) + { + if (create_info->storage_media == HA_SM_MEMORY) + { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_ILLEGAL_HA_CREATE_OPTION, + ER(ER_ILLEGAL_HA_CREATE_OPTION), + ndbcluster_hton_name, + "TABLESPACE currently only supported for " + "STORAGE DISK"); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + tab.setTablespaceName(create_info->tablespace); + create_info->storage_media = HA_SM_DISK; //if use tablespace, that also means store on disk + } + + /* + Handle table row type + + Default is to let table rows have var part reference so that online + add column can be performed in the future. Explicitly setting row + type to fixed will omit var part reference, which will save data + memory in ndb, but at the cost of not being able to online add + column to this table + */ + switch (create_info->row_type) { + case ROW_TYPE_FIXED: + tab.setForceVarPart(FALSE); + break; + case ROW_TYPE_DYNAMIC: + /* fall through, treat as default */ + default: + /* fall through, treat as default */ + case ROW_TYPE_DEFAULT: + tab.setForceVarPart(TRUE); + break; + } + + /* + Setup columns + */ for (i= 0; i < form->s->fields; i++) { Field *field= form->field[i]; @@ -4588,6 +5283,33 @@ int ha_ndbcluster::create(const char *name, field->pack_length())); if ((my_errno= create_ndb_column(col, field, create_info))) DBUG_RETURN(my_errno); + + if (create_info->storage_media == HA_SM_DISK) + col.setStorageType(NdbDictionary::Column::StorageTypeDisk); + else + col.setStorageType(NdbDictionary::Column::StorageTypeMemory); + + switch (create_info->row_type) { + case ROW_TYPE_FIXED: + if (field_type_forces_var_part(field->type())) + { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_ILLEGAL_HA_CREATE_OPTION, + ER(ER_ILLEGAL_HA_CREATE_OPTION), + ndbcluster_hton_name, + "Row format FIXED incompatible with " + "variable sized attribute"); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + break; + case ROW_TYPE_DYNAMIC: + /* + Future: make columns dynamic in this case + */ + break; + default: + break; + } if (tab.addColumn(col)) { DBUG_RETURN(my_errno= errno); @@ -4595,7 +5317,17 @@ int ha_ndbcluster::create(const char *name, if (col.getPrimaryKey()) pk_length += (field->pack_length() + 3) / 4; } - + + KEY* key_info; + for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++) + { + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end= key_part + key_info->key_parts; + for (; key_part != end; key_part++) + tab.getColumn(key_part->fieldnr-1)->setStorageType( + NdbDictionary::Column::StorageTypeMemory); + } + // No primary key, create shadow key as 64 bit, auto increment if (form->s->primary_key == MAX_KEY) { @@ -4615,7 +5347,7 @@ int ha_ndbcluster::create(const char *name, } pk_length += 2; } - + // Make sure that blob tables don't have to big part size for (i= 0; i < form->s->fields; i++) { @@ -4649,39 +5381,282 @@ int ha_ndbcluster::create(const char *name, } } - ndb_set_fragmentation(tab, form, pk_length); - - if ((my_errno= check_ndb_connection())) + // Check partition info + partition_info *part_info= form->part_info; + if ((my_errno= set_up_partition_info(part_info, form, (void*)&tab))) + { DBUG_RETURN(my_errno); - + } + // Create the table in NDB - Ndb *ndb= get_ndb(); - NDBDICT *dict= ndb->getDictionary(); if (dict->createTable(tab) != 0) { const NdbError err= dict->getNdbError(); - ERR_PRINT(err); + set_ndb_err(thd, err); my_errno= ndb_to_mysql_error(&err); DBUG_RETURN(my_errno); } + + Ndb_table_guard ndbtab_g(dict, m_tabname); + // temporary set m_table during create + // reset at return + m_table= ndbtab_g.get_table(); + // TODO check also that we have the same frm... + if (!m_table) + { + /* purecov: begin deadcode */ + const NdbError err= dict->getNdbError(); + set_ndb_err(thd, err); + my_errno= ndb_to_mysql_error(&err); + DBUG_RETURN(my_errno); + /* purecov: end */ + } + DBUG_PRINT("info", ("Table %s/%s created successfully", m_dbname, m_tabname)); // Create secondary indexes - my_errno= build_index_list(ndb, form, ILBP_CREATE); + my_errno= create_indexes(ndb, form); + + if (!my_errno) + my_errno= write_ndb_file(name); + else + { + /* + Failed to create an index, + drop the table (and all it's indexes) + */ + while (dict->dropTableGlobal(*m_table)) + { + switch (dict->getNdbError().status) + { + case NdbError::TemporaryError: + if (!thd->killed) + continue; // retry indefinitly + break; + default: + break; + } + break; + } + m_table = 0; + DBUG_RETURN(my_errno); + } +#ifdef HAVE_NDB_BINLOG if (!my_errno) - my_errno= write_ndb_file(); + { + NDB_SHARE *share= 0; + pthread_mutex_lock(&ndbcluster_mutex); + /* + First make sure we get a "fresh" share here, not an old trailing one... + */ + { + uint length= (uint) strlen(name); + if ((share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (uchar*) name, length))) + handle_trailing_share(share); + } + /* + get a new share + */ + + /* ndb_share reference create */ + if (!(share= get_share(name, form, TRUE, TRUE))) + { + sql_print_error("NDB: allocating table share for %s failed", name); + /* my_errno is set */ + } + else + { + DBUG_PRINT("NDB_SHARE", ("%s binlog create use_count: %u", + share->key, share->use_count)); + } + pthread_mutex_unlock(&ndbcluster_mutex); + + while (!IS_TMP_PREFIX(m_tabname)) + { + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name,m_dbname,m_tabname); + int do_event_op= ndb_binlog_running; + if (!ndb_schema_share && + strcmp(share->db, NDB_REP_DB) == 0 && + strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) + do_event_op= 1; + + /* + Always create an event for the table, as other mysql servers + expect it to be there. + */ + if (!ndbcluster_create_event(ndb, m_table, event_name.c_ptr(), share, + share && do_event_op ? 2 : 1/* push warning */)) + { + if (ndb_extra_logging) + sql_print_information("NDB Binlog: CREATE TABLE Event: %s", + event_name.c_ptr()); + if (share && + ndbcluster_create_event_ops(share, m_table, event_name.c_ptr())) + { + sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations." + " Event: %s", name); + /* a warning has been issued to the client */ + } + } + /* + warning has been issued if ndbcluster_create_event failed + and (share && do_event_op) + */ + if (share && !do_event_op) + share->flags|= NSF_NO_BINLOG; + ndbcluster_log_schema_op(thd, share, + thd->query, thd->query_length, + share->db, share->table_name, + m_table->getObjectId(), + m_table->getObjectVersion(), + (is_truncate) ? + SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE, + 0, 0, 1); + break; + } + } +#endif /* HAVE_NDB_BINLOG */ + + m_table= 0; DBUG_RETURN(my_errno); } +int ha_ndbcluster::create_handler_files(const char *file, + const char *old_name, + int action_flag, + HA_CREATE_INFO *create_info) +{ + Ndb* ndb; + const NDBTAB *tab; + uchar *data= NULL, *pack_data= NULL; + size_t length, pack_length; + int error= 0; + + DBUG_ENTER("create_handler_files"); + + if (action_flag != CHF_INDEX_FLAG) + { + DBUG_RETURN(FALSE); + } + DBUG_PRINT("enter", ("file: %s", file)); + if (!(ndb= get_ndb())) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + NDBDICT *dict= ndb->getDictionary(); + if (!create_info->frm_only) + DBUG_RETURN(0); // Must be a create, ignore since frm is saved in create + + // TODO handle this + DBUG_ASSERT(m_table != 0); + + set_dbname(file); + set_tabname(file); + Ndb_table_guard ndbtab_g(dict, m_tabname); + DBUG_PRINT("info", ("m_dbname: %s, m_tabname: %s", m_dbname, m_tabname)); + if (!(tab= ndbtab_g.get_table())) + DBUG_RETURN(0); // Unkown table, must be temporary table + + DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED); + if (readfrm(file, &data, &length) || + packfrm(data, length, &pack_data, &pack_length)) + { + DBUG_PRINT("info", ("Missing frm for %s", m_tabname)); + my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR)); + error= 1; + } + else + { + DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb", + m_tabname)); + NdbDictionary::Table new_tab= *tab; + new_tab.setFrm(pack_data, pack_length); + if (dict->alterTableGlobal(*tab, new_tab)) + { + set_ndb_err(current_thd, dict->getNdbError()); + error= ndb_to_mysql_error(&dict->getNdbError()); + } + my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR)); + } + + set_ndb_share_state(m_share, NSS_INITIAL); + /* ndb_share reference schema(?) free */ + DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free use_count: %u", + m_share->key, m_share->use_count)); + free_share(&m_share); // Decrease ref_count + + DBUG_RETURN(error); +} + +int ha_ndbcluster::create_index(const char *name, KEY *key_info, + NDB_INDEX_TYPE idx_type, uint idx_no) +{ + int error= 0; + char unique_name[FN_LEN]; + static const char* unique_suffix= "$unique"; + DBUG_ENTER("ha_ndbcluster::create_ordered_index"); + DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name)); + + if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + { + strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS); + DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d", + unique_name, idx_no)); + } + + switch (idx_type){ + case PRIMARY_KEY_INDEX: + // Do nothing, already created + break; + case PRIMARY_KEY_ORDERED_INDEX: + error= create_ordered_index(name, key_info); + break; + case UNIQUE_ORDERED_INDEX: + if (!(error= create_ordered_index(name, key_info))) + error= create_unique_index(unique_name, key_info); + break; + case UNIQUE_INDEX: + if (check_index_fields_not_null(key_info)) + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NULL_COLUMN_IN_INDEX, + "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan"); + } + error= create_unique_index(unique_name, key_info); + break; + case ORDERED_INDEX: + if (key_info->algorithm == HA_KEY_ALG_HASH) + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_ILLEGAL_HA_CREATE_OPTION, + ER(ER_ILLEGAL_HA_CREATE_OPTION), + ndbcluster_hton_name, + "Ndb does not support non-unique " + "hash based indexes"); + error= HA_ERR_UNSUPPORTED; + break; + } + error= create_ordered_index(name, key_info); + break; + default: + DBUG_ASSERT(FALSE); + break; + } + + DBUG_RETURN(error); +} int ha_ndbcluster::create_ordered_index(const char *name, KEY *key_info) { DBUG_ENTER("ha_ndbcluster::create_ordered_index"); - DBUG_RETURN(create_index(name, key_info, FALSE)); + DBUG_RETURN(create_ndb_index(name, key_info, FALSE)); } int ha_ndbcluster::create_unique_index(const char *name, @@ -4689,7 +5664,7 @@ int ha_ndbcluster::create_unique_index(const char *name, { DBUG_ENTER("ha_ndbcluster::create_unique_index"); - DBUG_RETURN(create_index(name, key_info, TRUE)); + DBUG_RETURN(create_ndb_index(name, key_info, TRUE)); } @@ -4697,9 +5672,9 @@ int ha_ndbcluster::create_unique_index(const char *name, Create an index in NDB Cluster */ -int ha_ndbcluster::create_index(const char *name, - KEY *key_info, - bool unique) +int ha_ndbcluster::create_ndb_index(const char *name, + KEY *key_info, + bool unique) { Ndb *ndb= get_ndb(); NdbDictionary::Dictionary *dict= ndb->getDictionary(); @@ -4733,7 +5708,7 @@ int ha_ndbcluster::create_index(const char *name, } } - if (dict->createIndex(ndb_index)) + if (dict->createIndex(ndb_index, *m_table)) ERR_RETURN(dict->getNdbError()); // Success @@ -4742,14 +5717,112 @@ int ha_ndbcluster::create_index(const char *name, } /* + Prepare for an on-line alter table +*/ +void ha_ndbcluster::prepare_for_alter() +{ + /* ndb_share reference schema */ + ndbcluster_get_share(m_share); // Increase ref_count + DBUG_PRINT("NDB_SHARE", ("%s binlog schema use_count: %u", + m_share->key, m_share->use_count)); + set_ndb_share_state(m_share, NSS_ALTERED); +} + +/* + Add an index on-line to a table +*/ +int ha_ndbcluster::add_index(TABLE *table_arg, + KEY *key_info, uint num_of_keys) +{ + int error= 0; + uint idx; + DBUG_ENTER("ha_ndbcluster::add_index"); + DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str)); + DBUG_ASSERT(m_share->state == NSS_ALTERED); + + for (idx= 0; idx < num_of_keys; idx++) + { + KEY *key= key_info + idx; + KEY_PART_INFO *key_part= key->key_part; + KEY_PART_INFO *end= key_part + key->key_parts; + NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false); + DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name)); + // Add fields to key_part struct + for (; key_part != end; key_part++) + key_part->field= table->field[key_part->fieldnr]; + // Check index type + // Create index in ndb + if((error= create_index(key_info[idx].name, key, idx_type, idx))) + break; + } + if (error) + { + set_ndb_share_state(m_share, NSS_INITIAL); + /* ndb_share reference schema free */ + DBUG_PRINT("NDB_SHARE", ("%s binlog schema free use_count: %u", + m_share->key, m_share->use_count)); + free_share(&m_share); // Decrease ref_count + } + DBUG_RETURN(error); +} + +/* + Mark one or several indexes for deletion. and + renumber the remaining indexes +*/ +int ha_ndbcluster::prepare_drop_index(TABLE *table_arg, + uint *key_num, uint num_of_keys) +{ + DBUG_ENTER("ha_ndbcluster::prepare_drop_index"); + DBUG_ASSERT(m_share->state == NSS_ALTERED); + // Mark indexes for deletion + uint idx; + for (idx= 0; idx < num_of_keys; idx++) + { + DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num)); + m_index[*key_num++].status= TO_BE_DROPPED; + } + // Renumber indexes + THD *thd= current_thd; + Thd_ndb *thd_ndb= get_thd_ndb(thd); + Ndb *ndb= thd_ndb->ndb; + renumber_indexes(ndb, table_arg); + DBUG_RETURN(0); +} + +/* + Really drop all indexes marked for deletion +*/ +int ha_ndbcluster::final_drop_index(TABLE *table_arg) +{ + int error; + DBUG_ENTER("ha_ndbcluster::final_drop_index"); + DBUG_PRINT("info", ("ha_ndbcluster::final_drop_index")); + // Really drop indexes + THD *thd= current_thd; + Thd_ndb *thd_ndb= get_thd_ndb(thd); + Ndb *ndb= thd_ndb->ndb; + if((error= drop_indexes(ndb, table_arg))) + { + m_share->state= NSS_INITIAL; + /* ndb_share reference schema free */ + DBUG_PRINT("NDB_SHARE", ("%s binlog schema free use_count: %u", + m_share->key, m_share->use_count)); + free_share(&m_share); // Decrease ref_count + } + DBUG_RETURN(error); +} + +/* Rename a table in NDB Cluster */ int ha_ndbcluster::rename_table(const char *from, const char *to) { NDBDICT *dict; - char new_tabname[FN_HEADLEN]; + char old_dbname[FN_HEADLEN]; char new_dbname[FN_HEADLEN]; + char new_tabname[FN_HEADLEN]; const NDBTAB *orig_tab; int result; bool recreate_indexes= FALSE; @@ -4757,7 +5830,7 @@ int ha_ndbcluster::rename_table(const char *from, const char *to) DBUG_ENTER("ha_ndbcluster::rename_table"); DBUG_PRINT("info", ("Renaming %s to %s", from, to)); - set_dbname(from); + set_dbname(from, old_dbname); set_dbname(to, new_dbname); set_tabname(from); set_tabname(to, new_tabname); @@ -4766,106 +5839,352 @@ int ha_ndbcluster::rename_table(const char *from, const char *to) DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION); Ndb *ndb= get_ndb(); + ndb->setDatabaseName(old_dbname); dict= ndb->getDictionary(); - if (!(orig_tab= dict->getTable(m_tabname))) + Ndb_table_guard ndbtab_g(dict, m_tabname); + if (!(orig_tab= ndbtab_g.get_table())) ERR_RETURN(dict->getNdbError()); - // Check if thread has stale local cache - if (orig_tab->getObjectStatus() == NdbDictionary::Object::Invalid) + +#ifdef HAVE_NDB_BINLOG + int ndb_table_id= orig_tab->getObjectId(); + int ndb_table_version= orig_tab->getObjectVersion(); + + /* ndb_share reference temporary */ + NDB_SHARE *share= get_share(from, 0, FALSE); + if (share) { - dict->removeCachedTable(m_tabname); - if (!(orig_tab= dict->getTable(m_tabname))) - ERR_RETURN(dict->getNdbError()); + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); + IF_DBUG(int r=) rename_share(share, to); + DBUG_ASSERT(r == 0); } - if (my_strcasecmp(system_charset_info, new_dbname, m_dbname)) +#endif + if (my_strcasecmp(system_charset_info, new_dbname, old_dbname)) { - dict->listIndexes(index_list, m_tabname); + dict->listIndexes(index_list, *orig_tab); recreate_indexes= TRUE; } - - m_table= (void *)orig_tab; // Change current database to that of target table set_dbname(to); if (ndb->setDatabaseName(m_dbname)) { ERR_RETURN(ndb->getNdbError()); } - if (!(result= alter_table_name(new_tabname))) + + NdbDictionary::Table new_tab= *orig_tab; + new_tab.setName(new_tabname); + if (dict->alterTableGlobal(*orig_tab, new_tab) != 0) + { + NdbError ndb_error= dict->getNdbError(); +#ifdef HAVE_NDB_BINLOG + if (share) + { + IF_DBUG(int ret=) rename_share(share, from); + DBUG_ASSERT(ret == 0); + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); + } +#endif + ERR_RETURN(ndb_error); + } + + // Rename .ndb file + if ((result= handler::rename_table(from, to))) + { + // ToDo in 4.1 should rollback alter table... +#ifdef HAVE_NDB_BINLOG + if (share) + { + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); + free_share(&share); + } +#endif + DBUG_RETURN(result); + } + +#ifdef HAVE_NDB_BINLOG + int is_old_table_tmpfile= 1; + if (share && share->op) + dict->forceGCPWait(); + + /* handle old table */ + if (!IS_TMP_PREFIX(m_tabname)) { - // Rename .ndb file - result= handler::rename_table(from, to); + is_old_table_tmpfile= 0; + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name, from + sizeof(share_prefix) - 1, 0); + ndbcluster_handle_drop_table(ndb, event_name.c_ptr(), share, + "rename table"); + } + + if (!result && !IS_TMP_PREFIX(new_tabname)) + { + /* always create an event for the table */ + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name, to + sizeof(share_prefix) - 1, 0); + Ndb_table_guard ndbtab_g2(dict, new_tabname); + const NDBTAB *ndbtab= ndbtab_g2.get_table(); + + if (!ndbcluster_create_event(ndb, ndbtab, event_name.c_ptr(), share, + share && ndb_binlog_running ? 2 : 1/* push warning */)) + { + if (ndb_extra_logging) + sql_print_information("NDB Binlog: RENAME Event: %s", + event_name.c_ptr()); + if (share && + ndbcluster_create_event_ops(share, ndbtab, event_name.c_ptr())) + { + sql_print_error("NDB Binlog: FAILED create event operations " + "during RENAME. Event %s", event_name.c_ptr()); + /* a warning has been issued to the client */ + } + } + /* + warning has been issued if ndbcluster_create_event failed + and (share && ndb_binlog_running) + */ + if (!is_old_table_tmpfile) + ndbcluster_log_schema_op(current_thd, share, + current_thd->query, current_thd->query_length, + old_dbname, m_tabname, + ndb_table_id, ndb_table_version, + SOT_RENAME_TABLE, + m_dbname, new_tabname, 1); } // If we are moving tables between databases, we need to recreate // indexes if (recreate_indexes) { - const NDBTAB *new_tab; - set_tabname(to); - if (!(new_tab= dict->getTable(m_tabname))) - ERR_RETURN(dict->getNdbError()); - - for (unsigned i = 0; i < index_list.count; i++) { + for (unsigned i = 0; i < index_list.count; i++) + { NDBDICT::List::Element& index_el = index_list.elements[i]; - set_dbname(from); - if (ndb->setDatabaseName(m_dbname)) - { - ERR_RETURN(ndb->getNdbError()); - } - const NDBINDEX * index= dict->getIndex(index_el.name, *new_tab); - set_dbname(to); - if (ndb->setDatabaseName(m_dbname)) - { - ERR_RETURN(ndb->getNdbError()); - } - DBUG_PRINT("info", ("Creating index %s/%s", - m_dbname, index->getName())); - dict->createIndex(*index); - DBUG_PRINT("info", ("Dropping index %s/%s", - m_dbname, index->getName())); - - set_dbname(from); - if (ndb->setDatabaseName(m_dbname)) - { - ERR_RETURN(ndb->getNdbError()); - } - dict->dropIndex(*index); + // Recreate any indexes not stored in the system database + if (my_strcasecmp(system_charset_info, + index_el.database, NDB_SYSTEM_DATABASE)) + { + set_dbname(from); + ndb->setDatabaseName(m_dbname); + const NDBINDEX * index= dict->getIndexGlobal(index_el.name, new_tab); + DBUG_PRINT("info", ("Creating index %s/%s", + index_el.database, index->getName())); + dict->createIndex(*index, new_tab); + DBUG_PRINT("info", ("Dropping index %s/%s", + index_el.database, index->getName())); + set_dbname(from); + ndb->setDatabaseName(m_dbname); + dict->dropIndexGlobal(*index); + } } } + if (share) + { + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); + } +#endif DBUG_RETURN(result); } /* - Rename a table in NDB Cluster using alter table + Delete table from NDB Cluster + */ -int ha_ndbcluster::alter_table_name(const char *to) +/* static version which does not need a handler */ + +int +ha_ndbcluster::delete_table(ha_ndbcluster *h, Ndb *ndb, + const char *path, + const char *db, + const char *table_name) { - Ndb *ndb= get_ndb(); + THD *thd= current_thd; + DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table"); NDBDICT *dict= ndb->getDictionary(); - const NDBTAB *orig_tab= (const NDBTAB *) m_table; - DBUG_ENTER("alter_table_name_table"); + int ndb_table_id= 0; + int ndb_table_version= 0; +#ifdef HAVE_NDB_BINLOG + /* + Don't allow drop table unless + schema distribution table is setup + */ + if (!ndb_schema_share) + { + DBUG_PRINT("info", ("Schema distribution table not setup")); + DBUG_RETURN(HA_ERR_NO_CONNECTION); + } + /* ndb_share reference temporary */ + NDB_SHARE *share= get_share(path, 0, FALSE); + if (share) + { + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); + } +#endif - NdbDictionary::Table new_tab= *orig_tab; - if (new_tab.setName(to)) + /* Drop the table from NDB */ + + int res= 0; + if (h && h->m_table) { - DBUG_RETURN(my_errno= errno); +retry_temporary_error1: + if (dict->dropTableGlobal(*h->m_table) == 0) + { + ndb_table_id= h->m_table->getObjectId(); + ndb_table_version= h->m_table->getObjectVersion(); + DBUG_PRINT("info", ("success 1")); + } + else + { + switch (dict->getNdbError().status) + { + case NdbError::TemporaryError: + if (!thd->killed) + goto retry_temporary_error1; // retry indefinitly + break; + default: + break; + } + set_ndb_err(thd, dict->getNdbError()); + res= ndb_to_mysql_error(&dict->getNdbError()); + DBUG_PRINT("info", ("error(1) %u", res)); + } + h->release_metadata(thd, ndb); + } + else + { + ndb->setDatabaseName(db); + while (1) + { + Ndb_table_guard ndbtab_g(dict, table_name); + if (ndbtab_g.get_table()) + { + retry_temporary_error2: + if (dict->dropTableGlobal(*ndbtab_g.get_table()) == 0) + { + ndb_table_id= ndbtab_g.get_table()->getObjectId(); + ndb_table_version= ndbtab_g.get_table()->getObjectVersion(); + DBUG_PRINT("info", ("success 2")); + break; + } + else + { + switch (dict->getNdbError().status) + { + case NdbError::TemporaryError: + if (!thd->killed) + goto retry_temporary_error2; // retry indefinitly + break; + default: + if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT) + { + ndbtab_g.invalidate(); + continue; + } + break; + } + } + } + set_ndb_err(thd, dict->getNdbError()); + res= ndb_to_mysql_error(&dict->getNdbError()); + DBUG_PRINT("info", ("error(2) %u", res)); + break; + } } - if (dict->alterTable(new_tab) != 0) - ERR_RETURN(dict->getNdbError()); - m_table= NULL; - m_table_info= NULL; - - DBUG_RETURN(0); -} + if (res) + { +#ifdef HAVE_NDB_BINLOG + /* the drop table failed for some reason, drop the share anyways */ + if (share) + { + pthread_mutex_lock(&ndbcluster_mutex); + if (share->state != NSS_DROPPED) + { + /* + The share kept by the server has not been freed, free it + */ + share->state= NSS_DROPPED; + /* ndb_share reference create free */ + DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", + share->key, share->use_count)); + free_share(&share, TRUE); + } + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share, TRUE); + pthread_mutex_unlock(&ndbcluster_mutex); + } +#endif + DBUG_RETURN(res); + } +#ifdef HAVE_NDB_BINLOG + /* stop the logging of the dropped table, and cleanup */ -/* - Delete table from NDB Cluster + /* + drop table is successful even if table does not exist in ndb + and in case table was actually not dropped, there is no need + to force a gcp, and setting the event_name to null will indicate + that there is no event to be dropped + */ + int table_dropped= dict->getNdbError().code != 709; - */ + if (!IS_TMP_PREFIX(table_name) && share && + current_thd->lex->sql_command != SQLCOM_TRUNCATE) + { + ndbcluster_log_schema_op(thd, share, + thd->query, thd->query_length, + share->db, share->table_name, + ndb_table_id, ndb_table_version, + SOT_DROP_TABLE, 0, 0, 1); + } + else if (table_dropped && share && share->op) /* ndbcluster_log_schema_op + will do a force GCP */ + dict->forceGCPWait(); + + if (!IS_TMP_PREFIX(table_name)) + { + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name, path + sizeof(share_prefix) - 1, 0); + ndbcluster_handle_drop_table(ndb, + table_dropped ? event_name.c_ptr() : 0, + share, "delete table"); + } + + if (share) + { + pthread_mutex_lock(&ndbcluster_mutex); + if (share->state != NSS_DROPPED) + { + /* + The share kept by the server has not been freed, free it + */ + share->state= NSS_DROPPED; + /* ndb_share reference create free */ + DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", + share->key, share->use_count)); + free_share(&share, TRUE); + } + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share, TRUE); + pthread_mutex_unlock(&ndbcluster_mutex); + } +#endif + DBUG_RETURN(0); +} int ha_ndbcluster::delete_table(const char *name) { @@ -4874,57 +6193,36 @@ int ha_ndbcluster::delete_table(const char *name) set_dbname(name); set_tabname(name); +#ifdef HAVE_NDB_BINLOG + /* + Don't allow drop table unless + schema distribution table is setup + */ + if (!ndb_schema_share) + { + DBUG_PRINT("info", ("Schema distribution table not setup")); + DBUG_RETURN(HA_ERR_NO_CONNECTION); + } +#endif + if (check_ndb_connection()) DBUG_RETURN(HA_ERR_NO_CONNECTION); /* Call ancestor function to delete .ndb file */ handler::delete_table(name); - - /* Drop the table from NDB */ - DBUG_RETURN(drop_table()); -} - -/* - Drop table in NDB Cluster - */ - -int ha_ndbcluster::drop_table() -{ - THD *thd= current_thd; - Ndb *ndb= get_ndb(); - NdbDictionary::Dictionary *dict= ndb->getDictionary(); - - DBUG_ENTER("drop_table"); - DBUG_PRINT("enter", ("Deleting %s", m_tabname)); - - release_metadata(); - while (dict->dropTable(m_tabname)) - { - const NdbError err= dict->getNdbError(); - switch (err.status) - { - case NdbError::TemporaryError: - if (!thd->killed) - continue; // retry indefinitly - break; - default: - break; - } - ERR_RETURN(dict->getNdbError()); - } - - DBUG_RETURN(0); + DBUG_RETURN(delete_table(this, get_ndb(),name, m_dbname, m_tabname)); } -ulonglong ha_ndbcluster::get_auto_increment() -{ +void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) +{ uint cache_size; Uint64 auto_value; THD *thd= current_thd; - Uint64 step= thd->variables.auto_increment_increment; - Uint64 start= thd->variables.auto_increment_offset; DBUG_ENTER("get_auto_increment"); DBUG_PRINT("enter", ("m_tabname: %s", m_tabname)); Ndb *ndb= get_ndb(); @@ -4946,10 +6244,10 @@ ulonglong ha_ndbcluster::get_auto_increment() int retry_sleep= 30; /* 30 milliseconds, transaction */ for (;;) { + Ndb_tuple_id_range_guard g(m_share); if (m_skip_auto_increment && - ndb->readAutoIncrementValue((const NDBTAB *) m_table, auto_value) || - ndb->getAutoIncrementValue((const NDBTAB *) m_table, - auto_value, cache_size, step, start)) + ndb->readAutoIncrementValue(m_table, g.range, auto_value) || + ndb->getAutoIncrementValue(m_table, g.range, auto_value, cache_size, increment, offset)) { if (--retries && ndb->getNdbError().status == NdbError::TemporaryError) @@ -4960,11 +6258,15 @@ ulonglong ha_ndbcluster::get_auto_increment() const NdbError err= ndb->getNdbError(); sql_print_error("Error %lu in ::get_auto_increment(): %s", (ulong) err.code, err.message); - DBUG_RETURN(~(ulonglong) 0); + *first_value= ~(ulonglong) 0; + DBUG_VOID_RETURN; } break; } - DBUG_RETURN((longlong)auto_value); + *first_value= (longlong)auto_value; + /* From the point of view of MySQL, NDB reserves one row at a time */ + *nb_reserved_values= 1; + DBUG_VOID_RETURN; } @@ -4972,29 +6274,42 @@ ulonglong ha_ndbcluster::get_auto_increment() Constructor for the NDB Cluster table handler */ -ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): - handler(&ndbcluster_hton, table_arg), +/* + Normal flags for binlogging is that ndb has HA_HAS_OWN_BINLOGGING + and preferes HA_BINLOG_ROW_CAPABLE + Other flags are set under certain circumstaces in table_flags() +*/ +#define HA_NDBCLUSTER_TABLE_FLAGS \ + HA_REC_NOT_IN_SEQ | \ + HA_NULL_IN_KEY | \ + HA_AUTO_PART_KEY | \ + HA_NO_PREFIX_CHAR_KEYS | \ + HA_NEED_READ_RANGE_BUFFER | \ + HA_CAN_GEOMETRY | \ + HA_CAN_BIT_FIELD | \ + HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | \ + HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | \ + HA_PARTIAL_COLUMN_READ | \ + HA_HAS_OWN_BINLOGGING | \ + HA_BINLOG_ROW_CAPABLE | \ + HA_HAS_RECORDS + +ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg): + handler(hton, table_arg), m_active_trans(NULL), m_active_cursor(NULL), m_table(NULL), - m_table_version(-1), m_table_info(NULL), - m_table_flags(HA_REC_NOT_IN_SEQ | - HA_NULL_IN_KEY | - HA_AUTO_PART_KEY | - HA_NO_PREFIX_CHAR_KEYS | - HA_NEED_READ_RANGE_BUFFER | - HA_CAN_GEOMETRY | - HA_CAN_BIT_FIELD | - HA_PARTIAL_COLUMN_READ | - HA_EXTERNAL_AUTO_INCREMENT), + m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS), m_share(0), + m_part_info(NULL), + m_use_partition_function(FALSE), + m_sorted(FALSE), m_use_write(FALSE), m_ignore_dup_key(FALSE), m_has_unique_index(FALSE), m_primary_key_update(FALSE), - m_retrieve_all_fields(FALSE), - m_retrieve_primary_key(FALSE), + m_ignore_no_key(FALSE), m_rows_to_insert((ha_rows) 1), m_rows_inserted((ha_rows) 0), m_bulk_insert_rows((ha_rows) 1024), @@ -5023,32 +6338,44 @@ ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): m_tabname[0]= '\0'; m_dbname[0]= '\0'; - records= ~(ha_rows)0; // uninitialized - block_size= 1024; + stats.records= ~(ha_rows)0; // uninitialized + stats.block_size= 1024; for (i= 0; i < MAX_KEY; i++) - { - m_index[i].type= UNDEFINED_INDEX; - m_index[i].unique_index= NULL; - m_index[i].index= NULL; - m_index[i].unique_index_attrid_map= NULL; - } + ndb_init_index(m_index[i]); DBUG_VOID_RETURN; } +int ha_ndbcluster::ha_initialise() +{ + DBUG_ENTER("ha_ndbcluster::ha_initialise"); + if (check_ndb_in_thd(current_thd)) + { + DBUG_RETURN(FALSE); + } + DBUG_RETURN(TRUE); +} + /* Destructor for NDB Cluster table handler */ ha_ndbcluster::~ha_ndbcluster() { + THD *thd= current_thd; + Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb; DBUG_ENTER("~ha_ndbcluster"); if (m_share) - free_share(m_share); - release_metadata(); + { + /* ndb_share reference handler free */ + DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u", + m_share->key, m_share->use_count)); + free_share(&m_share); + } + release_metadata(thd, ndb); my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR)); m_blobs_buffer= 0; @@ -5077,37 +6404,58 @@ ha_ndbcluster::~ha_ndbcluster() Open a table for further use - fetch metadata for this table from NDB - check that table exists + + RETURN + 0 ok + < 0 Table has changed */ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) { int res; KEY *key; - DBUG_ENTER("open"); - DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", + DBUG_ENTER("ha_ndbcluster::open"); + DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", name, mode, test_if_locked)); - // Setup ref_length to make room for the whole - // primary key to be written in the ref variable + /* + Setup ref_length to make room for the whole + primary key to be written in the ref variable + */ - if (table->s->primary_key != MAX_KEY) + if (table_share->primary_key != MAX_KEY) { - key= table->key_info+table->s->primary_key; + key= table->key_info+table_share->primary_key; ref_length= key->key_length; - DBUG_PRINT("info", (" ref_length: %d", ref_length)); } + else // (table_share->primary_key == MAX_KEY) + { + if (m_use_partition_function) + { + ref_length+= sizeof(m_part_id); + } + } + + DBUG_PRINT("info", ("ref_length: %d", ref_length)); + // Init table lock structure - if (!(m_share=get_share(name))) + /* ndb_share reference handler */ + if (!(m_share=get_share(name, table))) DBUG_RETURN(1); + DBUG_PRINT("NDB_SHARE", ("%s handler use_count: %u", + m_share->key, m_share->use_count)); thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0); set_dbname(name); set_tabname(name); - if ((res= check_ndb_connection()) || + if ((res= check_ndb_connection()) || (res= get_metadata(name))) { - free_share(m_share); + /* ndb_share reference handler free */ + DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u", + m_share->key, m_share->use_count)); + free_share(&m_share); m_share= 0; DBUG_RETURN(res); } @@ -5116,26 +6464,55 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) Ndb *ndb= get_ndb(); if (ndb->setDatabaseName(m_dbname)) { + set_ndb_err(current_thd, ndb->getNdbError()); res= ndb_to_mysql_error(&ndb->getNdbError()); break; } struct Ndb_statistics stat; - res= ndb_get_table_statistics(NULL, false, ndb, m_tabname, &stat); - records= stat.row_count; + res= ndb_get_table_statistics(NULL, FALSE, ndb, m_table, &stat); + stats.mean_rec_length= stat.row_size; + stats.data_file_length= stat.fragment_memory; + stats.records= stat.row_count; if(!res) res= info(HA_STATUS_CONST); break; } if (res) { - free_share(m_share); + free_share(&m_share); m_share= 0; - release_metadata(); + release_metadata(current_thd, get_ndb()); DBUG_RETURN(res); } +#ifdef HAVE_NDB_BINLOG + if (!ndb_binlog_tables_inited && ndb_binlog_running) + table->db_stat|= HA_READ_ONLY; +#endif DBUG_RETURN(0); } +/* + Set partition info + + SYNOPSIS + set_part_info() + part_info + + RETURN VALUE + NONE + + DESCRIPTION + Set up partition info when handler object created +*/ + +void ha_ndbcluster::set_part_info(partition_info *part_info) +{ + m_part_info= part_info; + if (!(m_part_info->part_type == HASH_PARTITION && + m_part_info->list_of_part_fields && + !m_part_info->is_sub_partitioned())) + m_use_partition_function= TRUE; +} /* Close the table @@ -5144,9 +6521,15 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) int ha_ndbcluster::close(void) { - DBUG_ENTER("close"); - free_share(m_share); m_share= 0; - release_metadata(); + DBUG_ENTER("close"); + THD *thd= table->in_use; + Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb; + /* ndb_share reference handler free */ + DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u", + m_share->key, m_share->use_count)); + free_share(&m_share); + m_share= 0; + release_metadata(thd, ndb); DBUG_RETURN(0); } @@ -5162,9 +6545,6 @@ Thd_ndb* ha_ndbcluster::seize_thd_ndb() my_errno= HA_ERR_OUT_OF_MEM; return NULL; } - thd_ndb->ndb->getDictionary()->set_local_table_data_size( - sizeof(Ndb_local_table_statistics) - ); if (thd_ndb->ndb->init(max_transactions) != 0) { ERR_PRINT(thd_ndb->ndb->getNdbError()); @@ -5225,7 +6605,7 @@ int ha_ndbcluster::check_ndb_connection(THD* thd) } -int ndbcluster_close_connection(THD *thd) +static int ndbcluster_close_connection(handlerton *hton, THD *thd) { Thd_ndb *thd_ndb= get_thd_ndb(thd); DBUG_ENTER("ndbcluster_close_connection"); @@ -5242,13 +6622,17 @@ int ndbcluster_close_connection(THD *thd) Try to discover one table from NDB */ -int ndbcluster_discover(THD* thd, const char *db, const char *name, - const void** frmblob, uint* frmlen) +int ndbcluster_discover(handlerton *hton, THD* thd, const char *db, + const char *name, + uchar **frmblob, + size_t *frmlen) { - uint len; - const void* data; - const NDBTAB* tab; + int error= 0; + NdbError ndb_error; + size_t len; + uchar* data= NULL; Ndb* ndb; + char key[FN_REFLEN]; DBUG_ENTER("ndbcluster_discover"); DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); @@ -5259,34 +6643,88 @@ int ndbcluster_discover(THD* thd, const char *db, const char *name, ERR_RETURN(ndb->getNdbError()); } NDBDICT* dict= ndb->getDictionary(); - dict->set_local_table_data_size(sizeof(Ndb_local_table_statistics)); - dict->invalidateTable(name); - if (!(tab= dict->getTable(name))) - { - const NdbError err= dict->getNdbError(); - if (err.code == 709) - DBUG_RETURN(-1); - ERR_RETURN(err); + build_table_filename(key, sizeof(key), db, name, "", 0); + /* ndb_share reference temporary */ + NDB_SHARE *share= get_share(key, 0, FALSE); + if (share) + { + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); } - DBUG_PRINT("info", ("Found table %s", tab->getName())); - - len= tab->getFrmLength(); - if (len == 0 || tab->getFrmData() == NULL) + if (share && get_ndb_share_state(share) == NSS_ALTERED) { - DBUG_PRINT("error", ("No frm data found.")); - DBUG_RETURN(1); + // Frm has been altered on disk, but not yet written to ndb + if (readfrm(key, &data, &len)) + { + DBUG_PRINT("error", ("Could not read frm")); + error= 1; + goto err; + } } - - if (unpackfrm(&data, &len, tab->getFrmData())) + else { - DBUG_PRINT("error", ("Could not unpack table")); - DBUG_RETURN(1); + Ndb_table_guard ndbtab_g(dict, name); + const NDBTAB *tab= ndbtab_g.get_table(); + if (!tab) + { + const NdbError err= dict->getNdbError(); + if (err.code == 709 || err.code == 723) + { + error= -1; + DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code)); + } + else + { + error= -1; + ndb_error= err; + DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code)); + } + goto err; + } + DBUG_PRINT("info", ("Found table %s", tab->getName())); + + len= tab->getFrmLength(); + if (len == 0 || tab->getFrmData() == NULL) + { + DBUG_PRINT("error", ("No frm data found.")); + error= 1; + goto err; + } + + if (unpackfrm(&data, &len, (uchar*) tab->getFrmData())) + { + DBUG_PRINT("error", ("Could not unpack table")); + error= 1; + goto err; + } } *frmlen= len; *frmblob= data; + if (share) + { + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); + } + DBUG_RETURN(0); +err: + my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); + if (share) + { + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); + } + if (ndb_error.code) + { + ERR_RETURN(ndb_error); + } + DBUG_RETURN(error); } /* @@ -5294,46 +6732,49 @@ int ndbcluster_discover(THD* thd, const char *db, const char *name, */ -int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name) +int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd, + const char *db, + const char *name) { - const NDBTAB* tab; Ndb* ndb; DBUG_ENTER("ndbcluster_table_exists_in_engine"); - DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); + DBUG_PRINT("enter", ("db: %s name: %s", db, name)); if (!(ndb= check_ndb_in_thd(thd))) DBUG_RETURN(HA_ERR_NO_CONNECTION); - if (ndb->setDatabaseName(db)) - { - ERR_RETURN(ndb->getNdbError()); - } NDBDICT* dict= ndb->getDictionary(); - dict->set_local_table_data_size(sizeof(Ndb_local_table_statistics)); - dict->invalidateTable(name); - if (!(tab= dict->getTable(name))) - { + NdbDictionary::Dictionary::List list; + if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) ERR_RETURN(dict->getNdbError()); + for (uint i= 0 ; i < list.count ; i++) + { + NdbDictionary::Dictionary::List::Element& elmt= list.elements[i]; + if (my_strcasecmp(system_charset_info, elmt.database, db)) + continue; + if (my_strcasecmp(system_charset_info, elmt.name, name)) + continue; + DBUG_PRINT("info", ("Found table")); + DBUG_RETURN(HA_ERR_TABLE_EXIST); } - - DBUG_PRINT("info", ("Found table %s", tab->getName())); - DBUG_RETURN(HA_ERR_TABLE_EXIST); + DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); } -extern "C" byte* tables_get_key(const char *entry, uint *length, +extern "C" uchar* tables_get_key(const char *entry, size_t *length, my_bool not_used __attribute__((unused))) { *length= strlen(entry); - return (byte*) entry; + return (uchar*) entry; } /* Drop a database in NDB Cluster - */ + NOTE add a dummy void function, since stupid handlerton is returning void instead of int... +*/ -int ndbcluster_drop_database(const char *path) +int ndbcluster_drop_database_impl(const char *path) { DBUG_ENTER("ndbcluster_drop_database"); THD *thd= current_thd; @@ -5348,25 +6789,28 @@ int ndbcluster_drop_database(const char *path) DBUG_PRINT("enter", ("db: %s", dbname)); if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); + DBUG_RETURN(-1); // List tables in NDB NDBDICT *dict= ndb->getDictionary(); if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) - ERR_RETURN(dict->getNdbError()); + DBUG_RETURN(-1); for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NdbDictionary::Dictionary::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, dbname)) + if (my_strcasecmp(system_charset_info, elmt.database, dbname)) continue; - DBUG_PRINT("info", ("%s must be dropped", t.name)); - drop_list.push_back(thd->strdup(t.name)); + DBUG_PRINT("info", ("%s must be dropped", elmt.name)); + drop_list.push_back(thd->strdup(elmt.name)); } // Drop any tables belonging to database + char full_path[FN_REFLEN]; + char *tmp= full_path + + build_table_filename(full_path, sizeof(full_path), dbname, "", "", 0); if (ndb->setDatabaseName(dbname)) { ERR_RETURN(ndb->getNdbError()); @@ -5374,32 +6818,200 @@ int ndbcluster_drop_database(const char *path) List_iterator_fast<char> it(drop_list); while ((tabname=it++)) { - while (dict->dropTable(tabname)) + tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1); + VOID(pthread_mutex_lock(&LOCK_open)); + if (ha_ndbcluster::delete_table(0, ndb, full_path, dbname, tabname)) { const NdbError err= dict->getNdbError(); - switch (err.status) + if (err.code != 709 && err.code != 723) { - case NdbError::TemporaryError: - if (!thd->killed) - continue; // retry indefinitly - break; - default: - break; - } - if (err.code != 709) // 709: No such table existed - { - ERR_PRINT(err); + set_ndb_err(thd, err); ret= ndb_to_mysql_error(&err); } - break; } + VOID(pthread_mutex_unlock(&LOCK_open)); } DBUG_RETURN(ret); } +static void ndbcluster_drop_database(handlerton *hton, char *path) +{ + DBUG_ENTER("ndbcluster_drop_database"); +#ifdef HAVE_NDB_BINLOG + /* + Don't allow drop database unless + schema distribution table is setup + */ + if (!ndb_schema_share) + { + DBUG_PRINT("info", ("Schema distribution table not setup")); + DBUG_VOID_RETURN; + //DBUG_RETURN(HA_ERR_NO_CONNECTION); + } +#endif + ndbcluster_drop_database_impl(path); +#ifdef HAVE_NDB_BINLOG + char db[FN_REFLEN]; + THD *thd= current_thd; + ha_ndbcluster::set_dbname(path, db); + ndbcluster_log_schema_op(thd, 0, + thd->query, thd->query_length, + db, "", 0, 0, SOT_DROP_DB, 0, 0, 0); +#endif + DBUG_VOID_RETURN; +} + +int ndb_create_table_from_engine(THD *thd, const char *db, + const char *table_name) +{ + LEX *old_lex= thd->lex, newlex; + thd->lex= &newlex; + newlex.current_select= NULL; + lex_start(thd); + int res= ha_create_table_from_engine(thd, db, table_name); + thd->lex= old_lex; + return res; +} -int ndbcluster_find_files(THD *thd,const char *db,const char *path, - const char *wild, bool dir, List<char> *files) +/* + find all tables in ndb and discover those needed +*/ +int ndbcluster_find_all_files(THD *thd) +{ + Ndb* ndb; + char key[FN_REFLEN]; + NDBDICT *dict; + int unhandled, retries= 5, skipped; + DBUG_ENTER("ndbcluster_find_all_files"); + + if (!(ndb= check_ndb_in_thd(thd))) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + dict= ndb->getDictionary(); + + LINT_INIT(unhandled); + LINT_INIT(skipped); + do + { + NdbDictionary::Dictionary::List list; + if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) + ERR_RETURN(dict->getNdbError()); + unhandled= 0; + skipped= 0; + retries--; + for (uint i= 0 ; i < list.count ; i++) + { + NDBDICT::List::Element& elmt= list.elements[i]; + if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name)) + { + DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name)); + continue; + } + DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name)); + if (elmt.state != NDBOBJ::StateOnline && + elmt.state != NDBOBJ::StateBackup && + elmt.state != NDBOBJ::StateBuilding) + { + sql_print_information("NDB: skipping setup table %s.%s, in state %d", + elmt.database, elmt.name, elmt.state); + skipped++; + continue; + } + + ndb->setDatabaseName(elmt.database); + Ndb_table_guard ndbtab_g(dict, elmt.name); + const NDBTAB *ndbtab= ndbtab_g.get_table(); + if (!ndbtab) + { + if (retries == 0) + sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s", + elmt.database, elmt.name, + dict->getNdbError().code, + dict->getNdbError().message); + unhandled++; + continue; + } + + if (ndbtab->getFrmLength() == 0) + continue; + + /* check if database exists */ + char *end= key + + build_table_filename(key, sizeof(key), elmt.database, "", "", 0); + if (my_access(key, F_OK)) + { + /* no such database defined, skip table */ + continue; + } + /* finalize construction of path */ + end+= tablename_to_filename(elmt.name, end, + sizeof(key)-(end-key)); + uchar *data= 0, *pack_data= 0; + size_t length, pack_length; + int discover= 0; + if (readfrm(key, &data, &length) || + packfrm(data, length, &pack_data, &pack_length)) + { + discover= 1; + sql_print_information("NDB: missing frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + else if (cmp_frm(ndbtab, pack_data, pack_length)) + { + /* ndb_share reference temporary */ + NDB_SHARE *share= get_share(key, 0, FALSE); + if (share) + { + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); + } + if (!share || get_ndb_share_state(share) != NSS_ALTERED) + { + discover= 1; + sql_print_information("NDB: mismatch in frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + if (share) + { + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); + } + } + my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR)); + + pthread_mutex_lock(&LOCK_open); + if (discover) + { + /* ToDo 4.1 database needs to be created if missing */ + if (ndb_create_table_from_engine(thd, elmt.database, elmt.name)) + { + /* ToDo 4.1 handle error */ + } + } +#ifdef HAVE_NDB_BINLOG + else + { + /* set up replication for this table */ + ndbcluster_create_binlog_setup(ndb, key, end-key, + elmt.database, elmt.name, + TRUE); + } +#endif + pthread_mutex_unlock(&LOCK_open); + } + } + while (unhandled && retries); + + DBUG_RETURN(-(skipped + unhandled)); +} + +int ndbcluster_find_files(handlerton *hton, THD *thd, + const char *db, + const char *path, + const char *wild, bool dir, List<LEX_STRING> *files) { DBUG_ENTER("ndbcluster_find_files"); DBUG_PRINT("enter", ("db: %s", db)); @@ -5408,7 +7020,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, Ndb* ndb; char name[FN_REFLEN]; HASH ndb_tables, ok_tables; - NdbDictionary::Dictionary::List list; + NDBDICT::List list; if (!(ndb= check_ndb_in_thd(thd))) DBUG_RETURN(HA_ERR_NO_CONNECTION); @@ -5439,11 +7051,16 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NDBDICT::List::Element& elmt= list.elements[i]; + if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name)) + { + DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name)); + continue; + } + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, db)) + if (my_strcasecmp(system_charset_info, elmt.database, db)) continue; // Apply wildcard to list of tables in NDB @@ -5451,121 +7068,183 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, { if (lower_case_table_names) { - if (wild_case_compare(files_charset_info, t.name, wild)) + if (wild_case_compare(files_charset_info, elmt.name, wild)) continue; } - else if (wild_compare(t.name,wild,0)) + else if (wild_compare(elmt.name,wild,0)) continue; } - DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", t.name)); - my_hash_insert(&ndb_tables, (byte*)thd->strdup(t.name)); + DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name)); + my_hash_insert(&ndb_tables, (uchar*)thd->strdup(elmt.name)); } - char *file_name; - List_iterator<char> it(*files); + LEX_STRING *file_name; + List_iterator<LEX_STRING> it(*files); List<char> delete_list; + char *file_name_str; while ((file_name=it++)) { - bool file_on_disk= false; - DBUG_PRINT("info", ("%s", file_name)); - if (hash_search(&ndb_tables, file_name, strlen(file_name))) + bool file_on_disk= FALSE; + DBUG_PRINT("info", ("%s", file_name->str)); + if (hash_search(&ndb_tables, (uchar*) file_name->str, file_name->length)) { - DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name)); - file_on_disk= true; + build_table_filename(name, sizeof(name), db, file_name->str, reg_ext, 0); + if (my_access(name, F_OK)) + { + pthread_mutex_lock(&LOCK_open); + DBUG_PRINT("info", ("Table %s listed and need discovery", + file_name->str)); + if (ndb_create_table_from_engine(thd, db, file_name->str)) + { + pthread_mutex_unlock(&LOCK_open); + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_TABLE_EXISTS_ERROR, + "Discover of table %s.%s failed", + db, file_name->str); + continue; + } + pthread_mutex_unlock(&LOCK_open); + } + DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str)); + file_on_disk= TRUE; } // Check for .ndb file with this name - (void)strxnmov(name, FN_REFLEN, - mysql_data_home,"/",db,"/",file_name,ha_ndb_ext,NullS); + build_table_filename(name, sizeof(name), db, file_name->str, ha_ndb_ext, 0); DBUG_PRINT("info", ("Check access for %s", name)); - if (access(name, F_OK)) + if (my_access(name, F_OK)) { DBUG_PRINT("info", ("%s did not exist on disk", name)); // .ndb file did not exist on disk, another table type if (file_on_disk) { - // Ignore this ndb table - gptr record= hash_search(&ndb_tables, file_name, strlen(file_name)); + // Ignore this ndb table + uchar *record= hash_search(&ndb_tables, (uchar*) file_name->str, + file_name->length); DBUG_ASSERT(record); hash_delete(&ndb_tables, record); push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_TABLE_EXISTS_ERROR, "Local table %s.%s shadows ndb table", - db, file_name); + db, file_name->str); } continue; } if (file_on_disk) { // File existed in NDB and as frm file, put in ok_tables list - my_hash_insert(&ok_tables, (byte*)file_name); + my_hash_insert(&ok_tables, (uchar*) file_name->str); continue; } DBUG_PRINT("info", ("%s existed on disk", name)); // The .ndb file exists on disk, but it's not in list of tables in ndb // Verify that handler agrees table is gone. - if (ndbcluster_table_exists_in_engine(thd, db, file_name) == HA_ERR_NO_SUCH_TABLE) + if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) == + HA_ERR_NO_SUCH_TABLE) { - DBUG_PRINT("info", ("NDB says %s does not exists", file_name)); + DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str)); it.remove(); // Put in list of tables to remove from disk - delete_list.push_back(thd->strdup(file_name)); + delete_list.push_back(thd->strdup(file_name->str)); } } +#ifdef HAVE_NDB_BINLOG + /* setup logging to binlog for all discovered tables */ + { + char *end, *end1= name + + build_table_filename(name, sizeof(name), db, "", "", 0); + for (i= 0; i < ok_tables.records; i++) + { + file_name_str= (char*)hash_element(&ok_tables, i); + end= end1 + + tablename_to_filename(file_name_str, end1, sizeof(name) - (end1 - name)); + pthread_mutex_lock(&LOCK_open); + ndbcluster_create_binlog_setup(ndb, name, end-name, + db, file_name_str, TRUE); + pthread_mutex_unlock(&LOCK_open); + } + } +#endif + // Check for new files to discover DBUG_PRINT("info", ("Checking for new files to discover")); List<char> create_list; for (i= 0 ; i < ndb_tables.records ; i++) { - file_name= hash_element(&ndb_tables, i); - if (!hash_search(&ok_tables, file_name, strlen(file_name))) + file_name_str= (char*) hash_element(&ndb_tables, i); + if (!hash_search(&ok_tables, (uchar*) file_name_str, strlen(file_name_str))) { - DBUG_PRINT("info", ("%s must be discovered", file_name)); - // File is in list of ndb tables and not in ok_tables - // This table need to be created - create_list.push_back(thd->strdup(file_name)); + build_table_filename(name, sizeof(name), db, file_name_str, reg_ext, 0); + if (my_access(name, F_OK)) + { + DBUG_PRINT("info", ("%s must be discovered", file_name_str)); + // File is in list of ndb tables and not in ok_tables + // This table need to be created + create_list.push_back(thd->strdup(file_name_str)); + } } } - // Lock mutex before deleting and creating frm files - pthread_mutex_lock(&LOCK_open); - if (!global_read_lock) { // Delete old files List_iterator_fast<char> it3(delete_list); - while ((file_name=it3++)) + while ((file_name_str= it3++)) { - DBUG_PRINT("info", ("Remove table %s/%s", db, file_name)); + DBUG_PRINT("info", ("Remove table %s/%s", db, file_name_str)); // Delete the table and all related files TABLE_LIST table_list; bzero((char*) &table_list,sizeof(table_list)); table_list.db= (char*) db; - table_list.alias= table_list.table_name= (char*)file_name; + table_list.alias= table_list.table_name= (char*)file_name_str; (void)mysql_rm_table_part2(thd, &table_list, - /* if_exists */ FALSE, - /* drop_temporary */ FALSE, - /* drop_view */ FALSE, - /* dont_log_query*/ TRUE); + FALSE, /* if_exists */ + FALSE, /* drop_temporary */ + FALSE, /* drop_view */ + TRUE /* dont_log_query*/); + /* Clear error message that is returned when table is deleted */ thd->clear_error(); } } + pthread_mutex_lock(&LOCK_open); // Create new files List_iterator_fast<char> it2(create_list); - while ((file_name=it2++)) + while ((file_name_str=it2++)) { - DBUG_PRINT("info", ("Table %s need discovery", file_name)); - if (ha_create_table_from_engine(thd, db, file_name) == 0) - files->push_back(thd->strdup(file_name)); + DBUG_PRINT("info", ("Table %s need discovery", file_name_str)); + if (ndb_create_table_from_engine(thd, db, file_name_str) == 0) + { + LEX_STRING *tmp_file_name= 0; + tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str, + strlen(file_name_str), TRUE); + files->push_back(tmp_file_name); + } } - pthread_mutex_unlock(&LOCK_open); - + pthread_mutex_unlock(&LOCK_open); + hash_free(&ok_tables); hash_free(&ndb_tables); + + // Delete schema file from files + if (!strcmp(db, NDB_REP_DB)) + { + uint count = 0; + while (count++ < files->elements) + { + file_name = (LEX_STRING *)files->pop(); + if (!strcmp(file_name->str, NDB_SCHEMA_TABLE)) + { + DBUG_PRINT("info", ("skip %s.%s table, it should be hidden to user", + NDB_REP_DB, NDB_SCHEMA_TABLE)); + continue; + } + files->push_back(file_name); + } + } } // extra bracket to avoid gcc 2.95.3 warning DBUG_RETURN(0); } @@ -5579,17 +7258,73 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, /* Call back after cluster connect */ static int connect_callback() { + pthread_mutex_lock(&LOCK_ndb_util_thread); update_status_variables(g_ndb_cluster_connection); + + uint node_id, i= 0; + Ndb_cluster_connection_node_iter node_iter; + memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map)); + while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter))) + g_node_id_map[node_id]= i++; + + pthread_cond_signal(&COND_ndb_util_thread); + pthread_mutex_unlock(&LOCK_ndb_util_thread); return 0; } -bool ndbcluster_init() +extern int ndb_dictionary_is_mysqld; +extern pthread_mutex_t LOCK_plugin; + +static int ndbcluster_init(void *p) { int res; DBUG_ENTER("ndbcluster_init"); - if (have_ndbcluster != SHOW_OPTION_YES) - goto ndbcluster_init_error; + if (ndbcluster_inited) + DBUG_RETURN(FALSE); + + /* + Below we create new THD's. They'll need LOCK_plugin, but it's taken now by + plugin initialization code. Release it to avoid deadlocks. It's safe, as + there're no threads that may concurrently access plugin control structures. + */ + pthread_mutex_unlock(&LOCK_plugin); + + pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST); + pthread_cond_init(&COND_ndb_util_thread, NULL); + pthread_cond_init(&COND_ndb_util_ready, NULL); + ndb_util_thread_running= -1; + ndbcluster_terminating= 0; + ndb_dictionary_is_mysqld= 1; + ndbcluster_hton= (handlerton *)p; + + { + handlerton *h= ndbcluster_hton; + h->state= SHOW_OPTION_YES; + h->db_type= DB_TYPE_NDBCLUSTER; + h->close_connection= ndbcluster_close_connection; + h->commit= ndbcluster_commit; + h->rollback= ndbcluster_rollback; + h->create= ndbcluster_create_handler; /* Create a new handler */ + h->drop_database= ndbcluster_drop_database; /* Drop a database */ + h->panic= ndbcluster_end; /* Panic call */ + h->show_status= ndbcluster_show_status; /* Show status */ + h->alter_tablespace= ndbcluster_alter_tablespace; /* Show status */ + h->partition_flags= ndbcluster_partition_flags; /* Partition flags */ + h->alter_table_flags=ndbcluster_alter_table_flags; /* Alter table flags */ + h->fill_files_table= ndbcluster_fill_files_table; +#ifdef HAVE_NDB_BINLOG + ndbcluster_binlog_init_handlerton(); +#endif + h->flags= HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED; + h->discover= ndbcluster_discover; + h->find_files= ndbcluster_find_files; + h->table_exists_in_engine= ndbcluster_table_exists_in_engine; + } + + // Initialize ndb interface + ndb_init_internal(); // Set connectstring if specified if (opt_ndbcluster_connectstring != 0) @@ -5617,7 +7352,6 @@ bool ndbcluster_init() my_errno= HA_ERR_OUT_OF_MEM; goto ndbcluster_init_error; } - g_ndb->getDictionary()->set_local_table_data_size(sizeof(Ndb_local_table_statistics)); if (g_ndb->init() != 0) { ERR_PRINT (g_ndb->getNdbError()); @@ -5659,10 +7393,11 @@ bool ndbcluster_init() (void) hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0, (hash_get_key) ndbcluster_get_key,0,0); - pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST); - pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST); - pthread_cond_init(&COND_ndb_util_thread, NULL); - +#ifdef HAVE_NDB_BINLOG + /* start the ndb injector thread */ + if (ndbcluster_binlog_start()) + goto ndbcluster_init_error; +#endif /* HAVE_NDB_BINLOG */ ndb_cache_check_time = opt_ndb_cache_check_time; // Create utility thread @@ -5674,9 +7409,29 @@ bool ndbcluster_init() pthread_mutex_destroy(&ndbcluster_mutex); pthread_mutex_destroy(&LOCK_ndb_util_thread); pthread_cond_destroy(&COND_ndb_util_thread); + pthread_cond_destroy(&COND_ndb_util_ready); goto ndbcluster_init_error; } + + /* Wait for the util thread to start */ + pthread_mutex_lock(&LOCK_ndb_util_thread); + while (ndb_util_thread_running < 0) + pthread_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread); + pthread_mutex_unlock(&LOCK_ndb_util_thread); + if (!ndb_util_thread_running) + { + DBUG_PRINT("error", ("ndb utility thread exited prematurely")); + hash_free(&ndbcluster_open_tables); + pthread_mutex_destroy(&ndbcluster_mutex); + pthread_mutex_destroy(&LOCK_ndb_util_thread); + pthread_cond_destroy(&COND_ndb_util_thread); + pthread_cond_destroy(&COND_ndb_util_ready); + goto ndbcluster_init_error; + } + + pthread_mutex_lock(&LOCK_plugin); + ndbcluster_inited= 1; DBUG_RETURN(FALSE); @@ -5687,29 +7442,48 @@ ndbcluster_init_error: if (g_ndb_cluster_connection) delete g_ndb_cluster_connection; g_ndb_cluster_connection= NULL; - have_ndbcluster= SHOW_OPTION_DISABLED; // If we couldn't use handler - DBUG_RETURN(TRUE); -} + ndbcluster_hton->state= SHOW_OPTION_DISABLED; // If we couldn't use handler + pthread_mutex_lock(&LOCK_plugin); -/* - End use of the NDB Cluster table handler - - free all global variables allocated by - ndbcluster_init() -*/ + DBUG_RETURN(TRUE); +} -bool ndbcluster_end() +static int ndbcluster_end(handlerton *hton, ha_panic_function type) { DBUG_ENTER("ndbcluster_end"); if (!ndbcluster_inited) DBUG_RETURN(0); + ndbcluster_inited= 0; + + /* wait for util thread to finish */ + sql_print_information("Stopping Cluster Utility thread"); + pthread_mutex_lock(&LOCK_ndb_util_thread); + ndbcluster_terminating= 1; + pthread_cond_signal(&COND_ndb_util_thread); + while (ndb_util_thread_running > 0) + pthread_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread); + pthread_mutex_unlock(&LOCK_ndb_util_thread); - // Kill ndb utility thread - (void) pthread_mutex_lock(&LOCK_ndb_util_thread); - DBUG_PRINT("exit",("killing ndb util thread: %lx", ndb_util_thread)); - (void) pthread_cond_signal(&COND_ndb_util_thread); - (void) pthread_mutex_unlock(&LOCK_ndb_util_thread); + +#ifdef HAVE_NDB_BINLOG + { + pthread_mutex_lock(&ndbcluster_mutex); + while (ndbcluster_open_tables.records) + { + NDB_SHARE *share= + (NDB_SHARE*) hash_element(&ndbcluster_open_tables, 0); +#ifndef DBUG_OFF + fprintf(stderr, "NDB: table share %s with use_count %d not freed\n", + share->key, share->use_count); +#endif + ndbcluster_real_free_share(&share); + } + pthread_mutex_unlock(&ndbcluster_mutex); + } +#endif + hash_free(&ndbcluster_open_tables); if (g_ndb) { @@ -5732,14 +7506,29 @@ bool ndbcluster_end() delete g_ndb_cluster_connection; g_ndb_cluster_connection= NULL; - hash_free(&ndbcluster_open_tables); + // cleanup ndb interface + ndb_end_internal(); + pthread_mutex_destroy(&ndbcluster_mutex); pthread_mutex_destroy(&LOCK_ndb_util_thread); pthread_cond_destroy(&COND_ndb_util_thread); - ndbcluster_inited= 0; + pthread_cond_destroy(&COND_ndb_util_ready); DBUG_RETURN(0); } +void ha_ndbcluster::print_error(int error, myf errflag) +{ + DBUG_ENTER("ha_ndbcluster::print_error"); + DBUG_PRINT("enter", ("error: %d", error)); + + if (error == HA_ERR_NO_PARTITION_FOUND) + m_part_info->print_no_partition_found(table); + else + handler::print_error(error, errflag); + DBUG_VOID_RETURN; +} + + /* Static error print function called from static handler method ndbcluster_commit @@ -5749,11 +7538,13 @@ bool ndbcluster_end() void ndbcluster_print_error(int error, const NdbOperation *error_op) { DBUG_ENTER("ndbcluster_print_error"); - TABLE tab; + TABLE_SHARE share; const char *tab_name= (error_op) ? error_op->getTableName() : ""; - tab.alias= (char *) tab_name; - ha_ndbcluster error_handler(&tab); - tab.file= &error_handler; + share.db.str= (char*) ""; + share.db.length= 0; + share.table_name.str= (char *) tab_name; + share.table_name.length= strlen(tab_name); + ha_ndbcluster error_handler(ndbcluster_hton, &share); error_handler.print_error(error, MYF(0)); DBUG_VOID_RETURN; } @@ -5764,8 +7555,10 @@ void ndbcluster_print_error(int error, const NdbOperation *error_op) */ void ha_ndbcluster::set_dbname(const char *path_name, char *dbname) { - char *end, *ptr; - + char *end, *ptr, *tmp_name; + char tmp_buff[FN_REFLEN]; + + tmp_name= tmp_buff; /* Scan name from the end */ ptr= strend(path_name)-1; while (ptr >= path_name && *ptr != '\\' && *ptr != '/') { @@ -5777,18 +7570,19 @@ void ha_ndbcluster::set_dbname(const char *path_name, char *dbname) ptr--; } uint name_len= end - ptr; - memcpy(dbname, ptr + 1, name_len); - dbname[name_len]= '\0'; + memcpy(tmp_name, ptr + 1, name_len); + tmp_name[name_len]= '\0'; #ifdef __WIN__ /* Put to lower case */ - ptr= dbname; + ptr= tmp_name; while (*ptr != '\0') { *ptr= tolower(*ptr); ptr++; } #endif + filename_to_tablename(tmp_name, dbname, FN_REFLEN); } /* @@ -5807,8 +7601,10 @@ void ha_ndbcluster::set_dbname(const char *path_name) void ha_ndbcluster::set_tabname(const char *path_name, char * tabname) { - char *end, *ptr; - + char *end, *ptr, *tmp_name; + char tmp_buff[FN_REFLEN]; + + tmp_name= tmp_buff; /* Scan name from the end */ end= strend(path_name)-1; ptr= end; @@ -5816,17 +7612,18 @@ ha_ndbcluster::set_tabname(const char *path_name, char * tabname) ptr--; } uint name_len= end - ptr; - memcpy(tabname, ptr + 1, end - ptr); - tabname[name_len]= '\0'; + memcpy(tmp_name, ptr + 1, end - ptr); + tmp_name[name_len]= '\0'; #ifdef __WIN__ /* Put to lower case */ - ptr= tabname; + ptr= tmp_name; while (*ptr != '\0') { *ptr= tolower(*ptr); ptr++; } #endif + filename_to_tablename(tmp_name, tabname, FN_REFLEN); } /* @@ -5861,19 +7658,104 @@ ha_ndbcluster::records_in_range(uint inx, key_range *min_key, (max_key && max_key->length == key_length))) DBUG_RETURN(1); + if ((idx_type == PRIMARY_KEY_ORDERED_INDEX || + idx_type == UNIQUE_ORDERED_INDEX || + idx_type == ORDERED_INDEX) && + m_index[inx].index_stat != NULL) + { + NDB_INDEX_DATA& d=m_index[inx]; + const NDBINDEX* index= d.index; + Ndb* ndb=get_ndb(); + NdbTransaction* trans=NULL; + NdbIndexScanOperation* op=NULL; + int res=0; + Uint64 rows; + + do + { + // We must provide approx table rows + Uint64 table_rows=0; + Ndb_local_table_statistics *ndb_info= m_table_info; + if (ndb_info->records != ~(ha_rows)0 && ndb_info->records != 0) + { + table_rows = ndb_info->records; + DBUG_PRINT("info", ("use info->records: %lu", (ulong) table_rows)); + } + else + { + Ndb_statistics stat; + if ((res=ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat))) + break; + table_rows=stat.row_count; + DBUG_PRINT("info", ("use db row_count: %lu", (ulong) table_rows)); + if (table_rows == 0) { + // Problem if autocommit=0 +#ifdef ndb_get_table_statistics_uses_active_trans + rows=0; + break; +#endif + } + } + + // Define scan op for the range + if ((trans=m_active_trans) == NULL || + trans->commitStatus() != NdbTransaction::Started) + { + DBUG_PRINT("info", ("no active trans")); + if (! (trans=ndb->startTransaction())) + ERR_BREAK(ndb->getNdbError(), res); + } + if (! (op=trans->getNdbIndexScanOperation(index, (NDBTAB*)m_table))) + ERR_BREAK(trans->getNdbError(), res); + if ((op->readTuples(NdbOperation::LM_CommittedRead)) == -1) + ERR_BREAK(op->getNdbError(), res); + const key_range *keys[2]={ min_key, max_key }; + if ((res=set_bounds(op, inx, TRUE, keys)) != 0) + break; + + // Decide if db should be contacted + int flags=0; + if (d.index_stat_query_count < d.index_stat_cache_entries || + (d.index_stat_update_freq != 0 && + d.index_stat_query_count % d.index_stat_update_freq == 0)) + { + DBUG_PRINT("info", ("force stat from db")); + flags|=NdbIndexStat::RR_UseDb; + } + if (d.index_stat->records_in_range(index, op, table_rows, &rows, flags) == -1) + ERR_BREAK(d.index_stat->getNdbError(), res); + d.index_stat_query_count++; + } while (0); + + if (trans != m_active_trans && rows == 0) + rows = 1; + if (trans != m_active_trans && trans != NULL) + ndb->closeTransaction(trans); + if (res != 0) + DBUG_RETURN(HA_POS_ERROR); + DBUG_RETURN(rows); + } + DBUG_RETURN(10); /* Good guess when you don't know anything */ } -ulong ha_ndbcluster::table_flags(void) const +ulonglong ha_ndbcluster::table_flags(void) const { + THD *thd= current_thd; + ulonglong f= m_table_flags; if (m_ha_not_exact_count) - return m_table_flags | HA_NOT_EXACT_COUNT; - else - return m_table_flags; + f= f & ~HA_STATS_RECORDS_IS_EXACT; + /* + To allow for logging of ndb tables during stmt based logging; + flag cabablity, but also turn off flag for OWN_BINLOGGING + */ + if (thd->variables.binlog_format == BINLOG_FORMAT_STMT) + f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING; + return f; } const char * ha_ndbcluster::table_type() const { - return("ndbcluster"); + return("NDBCLUSTER"); } uint ha_ndbcluster::max_supported_record_length() const { @@ -5903,10 +7785,6 @@ bool ha_ndbcluster::low_byte_first() const return TRUE; #endif } -bool ha_ndbcluster::has_transactions() -{ - return TRUE; -} const char* ha_ndbcluster::index_type(uint key_number) { switch (get_index_type(key_number)) { @@ -5931,23 +7809,25 @@ uint8 ha_ndbcluster::table_cache_type() uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, Uint64 *commit_count) { - DBUG_ENTER("ndb_get_commitcount"); - char name[FN_REFLEN]; NDB_SHARE *share; - (void)strxnmov(name, FN_REFLEN, "./",dbname,"/",tabname,NullS); + DBUG_ENTER("ndb_get_commitcount"); + + build_table_filename(name, sizeof(name), dbname, tabname, "", 0); DBUG_PRINT("enter", ("name: %s", name)); pthread_mutex_lock(&ndbcluster_mutex); if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, - (byte*) name, + (uchar*) name, strlen(name)))) { pthread_mutex_unlock(&ndbcluster_mutex); - DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", - name)); + DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", name)); DBUG_RETURN(1); } + /* ndb_share reference temporary, free below */ share->use_count++; + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); pthread_mutex_unlock(&ndbcluster_mutex); pthread_mutex_lock(&share->mutex); @@ -5962,7 +7842,10 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, DBUG_PRINT("info", ("Getting commit_count: %s from share", llstr(share->commit_count, buff))); pthread_mutex_unlock(&share->mutex); - free_share(share); + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); DBUG_RETURN(0); } } @@ -5978,10 +7861,17 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, pthread_mutex_unlock(&share->mutex); struct Ndb_statistics stat; - if (ndb_get_table_statistics(NULL, false, ndb, tabname, &stat)) { - free_share(share); - DBUG_RETURN(1); + Ndb_table_guard ndbtab_g(ndb->getDictionary(), tabname); + if (ndbtab_g.get_table() == 0 + || ndb_get_table_statistics(NULL, FALSE, ndb, ndbtab_g.get_table(), &stat)) + { + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); + DBUG_RETURN(1); + } } pthread_mutex_lock(&share->mutex); @@ -6001,7 +7891,10 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, *commit_count= 0; } pthread_mutex_unlock(&share->mutex); - free_share(share); + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); DBUG_RETURN(0); } @@ -6124,14 +8017,14 @@ ha_ndbcluster::register_query_cache_table(THD *thd, if (!is_autocommit) { - DBUG_PRINT("exit", ("Can't register table during transaction")) + DBUG_PRINT("exit", ("Can't register table during transaction")); DBUG_RETURN(FALSE); } if (ndb_get_commitcount(thd, m_dbname, m_tabname, &commit_count)) { *engine_data= 0; - DBUG_PRINT("exit", ("Error, could not get commitcount")) + DBUG_PRINT("exit", ("Error, could not get commitcount")); DBUG_RETURN(FALSE); } *engine_data= commit_count; @@ -6149,192 +8042,421 @@ ha_ndbcluster::register_query_cache_table(THD *thd, data we want to or can share. */ -static byte* ndbcluster_get_key(NDB_SHARE *share,uint *length, +static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length, my_bool not_used __attribute__((unused))) { - *length=share->table_name_length; - return (byte*) share->table_name; + *length= share->key_length; + return (uchar*) share->key; +} + + +#ifndef DBUG_OFF + +static void print_share(const char* where, NDB_SHARE* share) +{ + fprintf(DBUG_FILE, + "%s %s.%s: use_count: %u, commit_count: %lu\n", + where, share->db, share->table_name, share->use_count, + (ulong) share->commit_count); + fprintf(DBUG_FILE, + " - key: %s, key_length: %d\n", + share->key, share->key_length); + +#ifdef HAVE_NDB_BINLOG + if (share->table) + fprintf(DBUG_FILE, + " - share->table: %p %s.%s\n", + share->table, share->table->s->db.str, + share->table->s->table_name.str); +#endif +} + + +static void print_ndbcluster_open_tables() +{ + DBUG_LOCK_FILE; + fprintf(DBUG_FILE, ">ndbcluster_open_tables\n"); + for (uint i= 0; i < ndbcluster_open_tables.records; i++) + print_share("", + (NDB_SHARE*)hash_element(&ndbcluster_open_tables, i)); + fprintf(DBUG_FILE, "<ndbcluster_open_tables\n"); + DBUG_UNLOCK_FILE; } -static NDB_SHARE* get_share(const char *table_name) +#endif + + +#define dbug_print_open_tables() \ + DBUG_EXECUTE("info", \ + print_ndbcluster_open_tables();); + +#define dbug_print_share(t, s) \ + DBUG_LOCK_FILE; \ + DBUG_EXECUTE("info", \ + print_share((t), (s));); \ + DBUG_UNLOCK_FILE; + + +#ifdef HAVE_NDB_BINLOG +/* + For some reason a share is still around, try to salvage the situation + by closing all cached tables. If the share still exists, there is an + error somewhere but only report this to the error log. Keep this + "trailing share" but rename it since there are still references to it + to avoid segmentation faults. There is a risk that the memory for + this trailing share leaks. + + Must be called with previous pthread_mutex_lock(&ndbcluster_mutex) +*/ +int handle_trailing_share(NDB_SHARE *share) { - NDB_SHARE *share; + THD *thd= current_thd; + static ulong trailing_share_id= 0; + DBUG_ENTER("handle_trailing_share"); + + /* ndb_share reference temporary, free below */ + ++share->use_count; + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); + pthread_mutex_unlock(&ndbcluster_mutex); + + TABLE_LIST table_list; + bzero((char*) &table_list,sizeof(table_list)); + table_list.db= share->db; + table_list.alias= table_list.table_name= share->table_name; + safe_mutex_assert_owner(&LOCK_open); + close_cached_tables(thd, 0, &table_list, TRUE); + pthread_mutex_lock(&ndbcluster_mutex); - uint length=(uint) strlen(table_name); - if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, - (byte*) table_name, - length))) + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + if (!--share->use_count) { - if ((share=(NDB_SHARE *) my_malloc(sizeof(*share)+length+1, - MYF(MY_WME | MY_ZEROFILL)))) - { - share->table_name_length=length; - share->table_name=(char*) (share+1); - strmov(share->table_name,table_name); - if (my_hash_insert(&ndbcluster_open_tables, (byte*) share)) - { - pthread_mutex_unlock(&ndbcluster_mutex); - my_free((gptr) share,0); - return 0; - } - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - share->commit_count= 0; - share->commit_count_lock= 0; + if (ndb_extra_logging) + sql_print_information("NDB_SHARE: trailing share " + "%s(connect_count: %u) " + "released by close_cached_tables at " + "connect_count: %u", + share->key, + share->connect_count, + g_ndb_cluster_connection->get_connect_count()); + ndbcluster_real_free_share(&share); + DBUG_RETURN(0); + } + + /* + share still exists, if share has not been dropped by server + release that share + */ + if (share->state != NSS_DROPPED) + { + share->state= NSS_DROPPED; + /* ndb_share reference create free */ + DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", + share->key, share->use_count)); + --share->use_count; + + if (share->use_count == 0) + { + if (ndb_extra_logging) + sql_print_information("NDB_SHARE: trailing share " + "%s(connect_count: %u) " + "released after NSS_DROPPED check " + "at connect_count: %u", + share->key, + share->connect_count, + g_ndb_cluster_connection->get_connect_count()); + ndbcluster_real_free_share(&share); + DBUG_RETURN(0); } - else + } + + sql_print_warning("NDB_SHARE: %s already exists use_count=%d." + " Moving away for safety, but possible memleak.", + share->key, share->use_count); + dbug_print_open_tables(); + + /* + Ndb share has not been released as it should + */ +#ifdef NOT_YET + DBUG_ASSERT(FALSE); +#endif + + /* + This is probably an error. We can however save the situation + at the cost of a possible mem leak, by "renaming" the share + - First remove from hash + */ + hash_delete(&ndbcluster_open_tables, (uchar*) share); + + /* + now give it a new name, just a running number + if space is not enough allocate some more + */ + { + const uint min_key_length= 10; + if (share->key_length < min_key_length) { - DBUG_PRINT("error", ("Failed to alloc share")); - pthread_mutex_unlock(&ndbcluster_mutex); - sql_print_error("get_share: my_malloc(%u) failed", - (unsigned int)(sizeof(*share)+length+1)); - return 0; + share->key= (char*) alloc_root(&share->mem_root, min_key_length + 1); + share->key_length= min_key_length; } + share->key_length= + my_snprintf(share->key, min_key_length + 1, "#leak%lu", + trailing_share_id++); } - share->use_count++; + /* Keep it for possible the future trailing free */ + my_hash_insert(&ndbcluster_open_tables, (uchar*) share); - DBUG_PRINT("share", - ("table_name: %s length: %d use_count: %d commit_count: %lu", - share->table_name, share->table_name_length, share->use_count, - (ulong) share->commit_count)); - pthread_mutex_unlock(&ndbcluster_mutex); - return share; + DBUG_RETURN(0); } - -static void free_share(NDB_SHARE *share) +/* + Rename share is used during rename table. +*/ +static int rename_share(NDB_SHARE *share, const char *new_key) { + NDB_SHARE *tmp; pthread_mutex_lock(&ndbcluster_mutex); - if (!--share->use_count) + uint new_length= (uint) strlen(new_key); + DBUG_PRINT("rename_share", ("old_key: %s old__length: %d", + share->key, share->key_length)); + if ((tmp= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (uchar*) new_key, new_length))) + handle_trailing_share(tmp); + + /* remove the share from hash */ + hash_delete(&ndbcluster_open_tables, (uchar*) share); + dbug_print_open_tables(); + + /* save old stuff if insert should fail */ + uint old_length= share->key_length; + char *old_key= share->key; + + /* + now allocate and set the new key, db etc + enough space for key, db, and table_name + */ + share->key= (char*) alloc_root(&share->mem_root, 2 * (new_length + 1)); + strmov(share->key, new_key); + share->key_length= new_length; + + if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share)) { - hash_delete(&ndbcluster_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); + // ToDo free the allocated stuff above? + DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed", + share->key)); + share->key= old_key; + share->key_length= old_length; + if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share)) + { + sql_print_error("rename_share: failed to recover %s", share->key); + DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed", + share->key)); + } + dbug_print_open_tables(); + pthread_mutex_unlock(&ndbcluster_mutex); + return -1; } + dbug_print_open_tables(); + + share->db= share->key + new_length + 1; + ha_ndbcluster::set_dbname(new_key, share->db); + share->table_name= share->db + strlen(share->db) + 1; + ha_ndbcluster::set_tabname(new_key, share->table_name); + + dbug_print_share("rename_share:", share); + if (share->table) + { + if (share->op == 0) + { + share->table->s->db.str= share->db; + share->table->s->db.length= strlen(share->db); + share->table->s->table_name.str= share->table_name; + share->table->s->table_name.length= strlen(share->table_name); + } + } + /* else rename will be handled when the ALTER event comes */ + share->old_names= old_key; + // ToDo free old_names after ALTER EVENT + pthread_mutex_unlock(&ndbcluster_mutex); + return 0; } +#endif +/* + Increase refcount on existing share. + Always returns share and cannot fail. +*/ +NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share) +{ + pthread_mutex_lock(&ndbcluster_mutex); + share->use_count++; + + dbug_print_open_tables(); + dbug_print_share("ndbcluster_get_share:", share); + pthread_mutex_unlock(&ndbcluster_mutex); + return share; +} /* - Internal representation of the frm blob - -*/ + Get a share object for key -struct frm_blob_struct -{ - struct frm_blob_header - { - uint ver; // Version of header - uint orglen; // Original length of compressed data - uint complen; // Compressed length of data, 0=uncompressed - } head; - char data[1]; -}; + Returns share for key, and increases the refcount on the share. + create_if_not_exists == TRUE: + creates share if it does not alreade exist + returns 0 only due to out of memory, and then sets my_error + create_if_not_exists == FALSE: + returns 0 if share does not exist -static int packfrm(const void *data, uint len, - const void **pack_data, uint *pack_len) + have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken +*/ + +NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table, + bool create_if_not_exists, + bool have_lock) { - int error; - ulong org_len, comp_len; - uint blob_len; - frm_blob_struct* blob; - DBUG_ENTER("packfrm"); - DBUG_PRINT("enter", ("data: 0x%lx len: %d", (long) data, len)); - - error= 1; - org_len= len; - if (my_compress((byte*)data, &org_len, &comp_len)) - { - sql_print_error("packfrm: my_compress(org_len: %u)", - (unsigned int)org_len); - goto err; - } + NDB_SHARE *share; + uint length= (uint) strlen(key); + DBUG_ENTER("ndbcluster_get_share"); + DBUG_PRINT("enter", ("key: '%s'", key)); - DBUG_PRINT("info", ("org_len: %lu comp_len: %lu", org_len, comp_len)); - DBUG_DUMP("compressed", (char*)data, org_len); - - error= 2; - blob_len= sizeof(frm_blob_struct::frm_blob_header)+org_len; - if (!(blob= (frm_blob_struct*) my_malloc(blob_len,MYF(MY_WME)))) + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + if (!(share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (uchar*) key, + length))) { - sql_print_error("packfrm: my_malloc(%u)", blob_len); - goto err; + if (!create_if_not_exists) + { + DBUG_PRINT("error", ("get_share: %s does not exist", key)); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(0); + } + if ((share= (NDB_SHARE*) my_malloc(sizeof(*share), + MYF(MY_WME | MY_ZEROFILL)))) + { + MEM_ROOT **root_ptr= + my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + MEM_ROOT *old_root= *root_ptr; + init_sql_alloc(&share->mem_root, 1024, 0); + *root_ptr= &share->mem_root; // remember to reset before return + share->state= NSS_INITIAL; + /* enough space for key, db, and table_name */ + share->key= (char*) alloc_root(*root_ptr, 2 * (length + 1)); + share->key_length= length; + strmov(share->key, key); + if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share)) + { + free_root(&share->mem_root, MYF(0)); + my_free((uchar*) share, 0); + *root_ptr= old_root; + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(0); + } + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + share->commit_count= 0; + share->commit_count_lock= 0; + share->db= share->key + length + 1; + ha_ndbcluster::set_dbname(key, share->db); + share->table_name= share->db + strlen(share->db) + 1; + ha_ndbcluster::set_tabname(key, share->table_name); +#ifdef HAVE_NDB_BINLOG + if (ndbcluster_binlog_init_share(share, table)) + { + DBUG_PRINT("error", ("get_share: %s could not init share", key)); + ndbcluster_real_free_share(&share); + *root_ptr= old_root; + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(0); + } +#endif + *root_ptr= old_root; + } + else + { + DBUG_PRINT("error", ("get_share: failed to alloc share")); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*share)); + DBUG_RETURN(0); + } } - // Store compressed blob in machine independent format - int4store((char*)(&blob->head.ver), 1); - int4store((char*)(&blob->head.orglen), comp_len); - int4store((char*)(&blob->head.complen), org_len); - - // Copy frm data into blob, already in machine independent format - memcpy(blob->data, data, org_len); - - *pack_data= blob; - *pack_len= blob_len; - error= 0; - - DBUG_PRINT("exit", ("pack_data: 0x%lx pack_len: %d", (long) *pack_data, - *pack_len)); -err: - DBUG_RETURN(error); - + share->use_count++; + + dbug_print_open_tables(); + dbug_print_share("ndbcluster_get_share:", share); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(share); } -static int unpackfrm(const void **unpack_data, uint *unpack_len, - const void *pack_data) +void ndbcluster_real_free_share(NDB_SHARE **share) { - const frm_blob_struct *blob= (frm_blob_struct*)pack_data; - byte *data; - ulong complen, orglen, ver; - DBUG_ENTER("unpackfrm"); - DBUG_PRINT("enter", ("pack_data: 0x%lx", (long) pack_data)); + DBUG_ENTER("ndbcluster_real_free_share"); + dbug_print_share("ndbcluster_real_free_share:", *share); - complen= uint4korr((char*)&blob->head.complen); - orglen= uint4korr((char*)&blob->head.orglen); - ver= uint4korr((char*)&blob->head.ver); - - DBUG_PRINT("blob",("ver: %lu complen: %lu orglen: %lu", - ver,complen,orglen)); - DBUG_DUMP("blob->data", (char*) blob->data, complen); - - if (ver != 1) - { - sql_print_error("unpackfrm: ver != 1"); - DBUG_RETURN(1); - } - if (!(data= my_malloc(max(orglen, complen), MYF(MY_WME)))) - { - sql_print_error("unpackfrm: my_malloc(%u)", - (unsigned int)max(orglen, complen)); - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - } - memcpy(data, blob->data, complen); - - if (my_uncompress(data, &complen, &orglen)) - { - my_free((char*)data, MYF(0)); - sql_print_error("unpackfrm: my_uncompress(complen: %u, orglen: %u)", - (unsigned int)complen, (unsigned int)orglen); - DBUG_RETURN(3); - } + hash_delete(&ndbcluster_open_tables, (uchar*) *share); + thr_lock_delete(&(*share)->lock); + pthread_mutex_destroy(&(*share)->mutex); + +#ifdef HAVE_NDB_BINLOG + if ((*share)->table) + { + // (*share)->table->mem_root is freed by closefrm + closefrm((*share)->table, 0); + // (*share)->table_share->mem_root is freed by free_table_share + free_table_share((*share)->table_share); +#ifndef DBUG_OFF + bzero((uchar*)(*share)->table_share, sizeof(*(*share)->table_share)); + bzero((uchar*)(*share)->table, sizeof(*(*share)->table)); + (*share)->table_share= 0; + (*share)->table= 0; +#endif + } +#endif + free_root(&(*share)->mem_root, MYF(0)); + my_free((uchar*) *share, MYF(0)); + *share= 0; - *unpack_data= data; - *unpack_len= complen; + dbug_print_open_tables(); + DBUG_VOID_RETURN; +} - DBUG_PRINT("exit", ("frmdata: 0x%lx len: %d", (long) *unpack_data, - *unpack_len)); - DBUG_RETURN(0); +void ndbcluster_free_share(NDB_SHARE **share, bool have_lock) +{ + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + if ((*share)->util_lock == current_thd) + (*share)->util_lock= 0; + if (!--(*share)->use_count) + { + ndbcluster_real_free_share(share); + } + else + { + dbug_print_open_tables(); + dbug_print_share("ndbcluster_free_share:", *share); + } + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); } + static int -ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, - const char* table, +ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, const NDBTAB *ndbtab, struct Ndb_statistics * ndbstat) { NdbTransaction* pTrans; @@ -6346,11 +8468,13 @@ ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, char buff[22], buff2[22], buff3[22], buff4[22]; #endif DBUG_ENTER("ndb_get_table_statistics"); - DBUG_PRINT("enter", ("table: %s", table)); + DBUG_PRINT("enter", ("table: %s", ndbtab->getName())); + + DBUG_ASSERT(ndbtab != 0); do { - Uint64 rows, commits, mem; + Uint64 rows, commits, fixed_mem, var_mem; Uint32 size; Uint32 count= 0; Uint64 sum_rows= 0; @@ -6366,7 +8490,7 @@ ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, goto retry; } - if ((pOp= pTrans->getNdbScanOperation(table)) == NULL) + if ((pOp= pTrans->getNdbScanOperation(ndbtab)) == NULL) { error= pTrans->getNdbError(); goto retry; @@ -6387,10 +8511,13 @@ ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, pOp->getValue(NdbDictionary::Column::ROW_COUNT, (char*)&rows); pOp->getValue(NdbDictionary::Column::COMMIT_COUNT, (char*)&commits); pOp->getValue(NdbDictionary::Column::ROW_SIZE, (char*)&size); - pOp->getValue(NdbDictionary::Column::FRAGMENT_MEMORY, (char*)&mem); + pOp->getValue(NdbDictionary::Column::FRAGMENT_FIXED_MEMORY, + (char*)&fixed_mem); + pOp->getValue(NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY, + (char*)&var_mem); if (pTrans->execute(NdbTransaction::NoCommit, - NdbTransaction::AbortOnError, + NdbOperation::AbortOnError, TRUE) == -1) { error= pTrans->getNdbError(); @@ -6403,7 +8530,7 @@ ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, sum_commits+= commits; if (sum_row_size < size) sum_row_size= size; - sum_mem+= mem; + sum_mem+= fixed_mem + var_mem; count++; } @@ -6458,6 +8585,7 @@ retry: my_sleep(retry_sleep); continue; } + set_ndb_err(current_thd, error); break; } while(1); DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr, @@ -6470,17 +8598,17 @@ retry: that the table with this name is a ndb table */ -int ha_ndbcluster::write_ndb_file() +int ha_ndbcluster::write_ndb_file(const char *name) { File file; bool error=1; char path[FN_REFLEN]; DBUG_ENTER("write_ndb_file"); - DBUG_PRINT("enter", ("db: %s, name: %s", m_dbname, m_tabname)); + DBUG_PRINT("enter", ("name: %s", name)); - (void)strxnmov(path, FN_REFLEN, - mysql_data_home,"/",m_dbname,"/",m_tabname,ha_ndb_ext,NullS); + (void)strxnmov(path, FN_REFLEN-1, + mysql_data_home,"/",name,ha_ndb_ext,NullS); if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0) { @@ -6524,19 +8652,19 @@ ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges, KEY* key_info= table->key_info + active_index; KEY_MULTI_RANGE *range= ranges; ulong reclength= table->s->reclength; - byte *curr= (byte*)buffer->buffer; - byte *end_of_buffer= (byte*)buffer->buffer_end; + uchar *curr= (uchar*)buffer->buffer; + uchar *end_of_buffer= (uchar*)buffer->buffer_end; for (; range<end_range && curr+reclength <= end_of_buffer; range++) { - const byte *key= range->start_key.key; + const uchar *key= range->start_key.key; uint key_len= range->start_key.length; if (check_null_in_key(key_info, key, key_len)) - DBUG_RETURN(true); + DBUG_RETURN(TRUE); curr += reclength; } - DBUG_RETURN(false); + DBUG_RETURN(FALSE); } int @@ -6546,10 +8674,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, bool sorted, HANDLER_BUFFER *buffer) { + m_write_op= FALSE; int res; KEY* key_info= table->key_info + active_index; NDB_INDEX_TYPE cur_index_type= get_index_type(active_index); - ulong reclength= table->s->reclength; + ulong reclength= table_share->reclength; NdbOperation* op; Thd_ndb *thd_ndb= get_thd_ndb(current_thd); DBUG_ENTER("ha_ndbcluster::read_multi_range_first"); @@ -6557,8 +8686,8 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, /** * blobs and unique hash index with NULL can't be batched currently */ - if (uses_blob_value(m_retrieve_all_fields) || - (cur_index_type == UNIQUE_INDEX && + if (uses_blob_value() || + (cur_index_type == UNIQUE_INDEX && has_null_in_unique_index(active_index) && null_value_index_search(ranges, ranges+range_count, buffer)) || m_delete_cannot_batch || m_update_cannot_batch) @@ -6598,56 +8727,83 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, /** * Variables for loop */ - byte *curr= (byte*)buffer->buffer; - byte *end_of_buffer= (byte*)buffer->buffer_end; + uchar *curr= (uchar*)buffer->buffer; + uchar *end_of_buffer= (uchar*)buffer->buffer_end; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); bool need_pk = (lm == NdbOperation::LM_Read); - const NDBTAB *tab= (const NDBTAB *) m_table; - const NDBINDEX *unique_idx= (NDBINDEX *) m_index[active_index].unique_index; - const NDBINDEX *idx= (NDBINDEX *) m_index[active_index].index; + const NDBTAB *tab= m_table; + const NDBINDEX *unique_idx= m_index[active_index].unique_index; + const NDBINDEX *idx= m_index[active_index].index; const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation(); NdbIndexScanOperation* scanOp= 0; for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; multi_range_curr++) { + part_id_range part_spec; + if (m_use_partition_function) + { + get_partition_set(table, curr, active_index, + &multi_range_curr->start_key, + &part_spec); + DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u", + part_spec.start_part, part_spec.end_part)); + /* + If partition pruning has found no partition in set + we can skip this scan + */ + if (part_spec.start_part > part_spec.end_part) + { + /* + We can skip this partition since the key won't fit into any + partition + */ + curr += reclength; + multi_range_curr->range_flag |= SKIP_RANGE; + continue; + } + } switch (cur_index_type) { case PRIMARY_KEY_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) + goto range; + // else fall through case PRIMARY_KEY_INDEX: + { multi_range_curr->range_flag |= UNIQUE_RANGE; if ((op= m_active_trans->getNdbOperation(tab)) && !op->readTuple(lm) && !set_primary_key(op, multi_range_curr->start_key.key) && !define_read_attrs(curr, op) && - (op->setAbortOption(AO_IgnoreError), TRUE)) + (!m_use_partition_function || + (op->setPartitionId(part_spec.start_part), TRUE))) curr += reclength; else ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); break; + } + break; case UNIQUE_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && - !check_null_in_key(key_info, multi_range_curr->start_key.key, - multi_range_curr->start_key.length))) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && + !check_null_in_key(key_info, multi_range_curr->start_key.key, + multi_range_curr->start_key.length))) + goto range; + // else fall through case UNIQUE_INDEX: + { multi_range_curr->range_flag |= UNIQUE_RANGE; if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) && - !op->readTuple(lm) && - !set_index_key(op, key_info, multi_range_curr->start_key.key) && - !define_read_attrs(curr, op) && - (op->setAbortOption(AO_IgnoreError), TRUE)) - curr += reclength; + !op->readTuple(lm) && + !set_index_key(op, key_info, multi_range_curr->start_key.key) && + !define_read_attrs(curr, op)) + curr += reclength; else - ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); + ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); break; - case ORDERED_INDEX: - { + } + case ORDERED_INDEX: { range: multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE; if (scanOp == 0) @@ -6681,7 +8837,8 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, const key_range *keys[2]= { &multi_range_curr->start_key, &multi_range_curr->end_key }; - if ((res= set_bounds(scanOp, keys, multi_range_curr-ranges))) + if ((res= set_bounds(scanOp, active_index, FALSE, keys, + multi_range_curr-ranges))) DBUG_RETURN(res); break; } @@ -6700,7 +8857,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, * This as we don't want mysqld to reuse the buffer when we read * the remaining ranges */ - buffer->end_of_used_area= (byte*)buffer->buffer_end; + buffer->end_of_used_area= (uchar*)buffer->buffer_end; } else { @@ -6712,18 +8869,18 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, */ m_current_multi_operation= lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation(); - if (!(res= execute_no_commit_ie(this, m_active_trans, true))) + if (!(res= execute_no_commit_ie(this, m_active_trans,true))) { m_multi_range_defined= multi_range_curr; multi_range_curr= ranges; - m_multi_range_result_ptr= (byte*)buffer->buffer; + m_multi_range_result_ptr= (uchar*)buffer->buffer; DBUG_RETURN(read_multi_range_next(found_range_p)); } ERR_RETURN(m_active_trans->getNdbError()); } #if 0 -#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x); +#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x)); #else #define DBUG_MULTI_RANGE(x) #endif @@ -6734,19 +8891,26 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_ENTER("ha_ndbcluster::read_multi_range_next"); if (m_disable_multi_read) { + DBUG_MULTI_RANGE(11); DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p)); } int res; int range_no; - ulong reclength= table->s->reclength; + ulong reclength= table_share->reclength; const NdbOperation* op= m_current_multi_operation; for (;multi_range_curr < m_multi_range_defined; multi_range_curr++) { + DBUG_MULTI_RANGE(12); + if (multi_range_curr->range_flag & SKIP_RANGE) + continue; if (multi_range_curr->range_flag & UNIQUE_RANGE) { if (op->getNdbError().code == 0) + { + DBUG_MULTI_RANGE(13); goto found_next; + } op= m_active_trans->getNextCompletedOperation(op); m_multi_range_result_ptr += reclength; @@ -6763,6 +8927,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) } else { + DBUG_MULTI_RANGE(14); goto close_scan; } } @@ -6794,8 +8959,9 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_MULTI_RANGE(6); // First fetch from cursor DBUG_ASSERT(range_no == -1); - if ((res= m_multi_cursor->nextResult(true))) + if ((res= m_multi_cursor->nextResult(TRUE))) { + DBUG_MULTI_RANGE(15); goto close_scan; } multi_range_curr--; // Will be increased in for-loop @@ -6823,12 +8989,14 @@ close_scan: } else { + DBUG_MULTI_RANGE(9); DBUG_RETURN(ndb_err(m_active_trans)); } } if (multi_range_curr == multi_range_end) { + DBUG_MULTI_RANGE(16); Thd_ndb *thd_ndb= get_thd_ndb(current_thd); thd_ndb->query_state&= NDB_QUERY_NORMAL; DBUG_RETURN(HA_ERR_END_OF_FILE); @@ -6880,7 +9048,7 @@ ha_ndbcluster::setup_recattr(const NdbRecAttr* curr) Field **field, **end; NdbValue *value= m_value; - end= table->field + table->s->fields; + end= table->field + table_share->fields; for (field= table->field; field < end; field++, value++) { @@ -6918,17 +9086,13 @@ ha_ndbcluster::update_table_comment( { return((char*)comment); } - NDBDICT* dict= ndb->getDictionary(); - const NDBTAB* tab; - if (!(tab= dict->getTable(m_tabname))) - { - return((char*)comment); - } + const NDBTAB* tab= m_table; + DBUG_ASSERT(tab != NULL); char *str; const char *fmt="%s%snumber_of_replicas: %d"; const unsigned fmt_len_plus_extra= length + strlen(fmt); - if ((str= my_malloc(fmt_len_plus_extra, MYF(0))) == NULL) + if ((str= (char*) my_malloc(fmt_len_plus_extra, MYF(0))) == NULL) { sql_print_error("ha_ndbcluster::update_table_comment: " "my_malloc(%u) failed", (unsigned int)fmt_len_plus_extra); @@ -6946,12 +9110,16 @@ ha_ndbcluster::update_table_comment( pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) { THD *thd; /* needs to be first for thread_stack */ - Ndb* ndb; struct timespec abstime; + Thd_ndb *thd_ndb; + uint share_list_size= 0; + NDB_SHARE **share_list= NULL; my_thread_init(); DBUG_ENTER("ndb_util_thread"); DBUG_PRINT("enter", ("ndb_cache_check_time: %lu", ndb_cache_check_time)); + + pthread_mutex_lock(&LOCK_ndb_util_thread); thd= new THD; /* note that contructor of THD uses DBUG_ */ if (thd == NULL) @@ -6960,45 +9128,111 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) DBUG_RETURN(NULL); } THD_CHECK_SENTRY(thd); - ndb= new Ndb(g_ndb_cluster_connection, ""); - if (ndb == NULL) - { - thd->cleanup(); - delete thd; - DBUG_RETURN(NULL); - } pthread_detach_this_thread(); ndb_util_thread= pthread_self(); thd->thread_stack= (char*)&thd; /* remember where our stack is */ - if (thd->store_globals() || (ndb->init() != 0)) + if (thd->store_globals()) + goto ndb_util_thread_fail; + thd->init_for_queries(); + thd->version=refresh_version; + thd->main_security_ctx.host_or_ip= ""; + thd->client_capabilities = 0; + my_net_init(&thd->net, 0); + thd->main_security_ctx.master_access= ~0; + thd->main_security_ctx.priv_user = 0; + + CHARSET_INFO *charset_connection; + charset_connection= get_charset_by_csname("utf8", + MY_CS_PRIMARY, MYF(MY_WME)); + thd->variables.character_set_client= charset_connection; + thd->variables.character_set_results= charset_connection; + thd->variables.collation_connection= charset_connection; + thd->update_charset(); + + /* Signal successful initialization */ + ndb_util_thread_running= 1; + pthread_cond_signal(&COND_ndb_util_ready); + pthread_mutex_unlock(&LOCK_ndb_util_thread); + + /* + wait for mysql server to start + */ + pthread_mutex_lock(&LOCK_server_started); + while (!mysqld_server_started) { - thd->cleanup(); - delete thd; - delete ndb; - DBUG_RETURN(NULL); + set_timespec(abstime, 1); + pthread_cond_timedwait(&COND_server_started, &LOCK_server_started, + &abstime); + if (ndbcluster_terminating) + { + pthread_mutex_unlock(&LOCK_server_started); + pthread_mutex_lock(&LOCK_ndb_util_thread); + goto ndb_util_thread_end; + } } + pthread_mutex_unlock(&LOCK_server_started); - uint share_list_size= 0; - NDB_SHARE **share_list= NULL; - set_timespec(abstime, 0); - for (;;) + /* + Wait for cluster to start + */ + pthread_mutex_lock(&LOCK_ndb_util_thread); + while (!ndb_cluster_node_id && (ndbcluster_hton->slot != ~(uint)0)) { + /* ndb not connected yet */ + pthread_cond_wait(&COND_ndb_util_thread, &LOCK_ndb_util_thread); + if (ndbcluster_terminating) + goto ndb_util_thread_end; + } + pthread_mutex_unlock(&LOCK_ndb_util_thread); - if (abort_loop) - break; /* Shutting down server */ + /* Get thd_ndb for this thread */ + if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) + { + sql_print_error("Could not allocate Thd_ndb object"); + pthread_mutex_lock(&LOCK_ndb_util_thread); + goto ndb_util_thread_end; + } + set_thd_ndb(thd, thd_ndb); + thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP; +#ifdef HAVE_NDB_BINLOG + if (ndb_extra_logging && ndb_binlog_running) + sql_print_information("NDB Binlog: Ndb tables initially read only."); + /* create tables needed by the replication */ + ndbcluster_setup_binlog_table_shares(thd); +#else + /* + Get all table definitions from the storage node + */ + ndbcluster_find_all_files(thd); +#endif + + set_timespec(abstime, 0); + for (;;) + { pthread_mutex_lock(&LOCK_ndb_util_thread); - pthread_cond_timedwait(&COND_ndb_util_thread, - &LOCK_ndb_util_thread, - &abstime); + if (!ndbcluster_terminating) + pthread_cond_timedwait(&COND_ndb_util_thread, + &LOCK_ndb_util_thread, + &abstime); + if (ndbcluster_terminating) /* Shutting down server */ + goto ndb_util_thread_end; pthread_mutex_unlock(&LOCK_ndb_util_thread); - +#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %lu", ndb_cache_check_time)); +#endif - if (abort_loop) - break; /* Shutting down server */ +#ifdef HAVE_NDB_BINLOG + /* + Check that the ndb_apply_status_share and ndb_schema_share + have been created. + If not try to create it + */ + if (!ndb_binlog_tables_inited) + ndbcluster_setup_binlog_table_shares(thd); +#endif if (ndb_cache_check_time == 0) { @@ -7010,7 +9244,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) /* Lock mutex and fill list with pointers to all open tables */ NDB_SHARE *share; pthread_mutex_lock(&ndbcluster_mutex); - uint i, record_count= ndbcluster_open_tables.records; + uint i, open_count, record_count= ndbcluster_open_tables.records; if (share_list_size < record_count) { NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count]; @@ -7025,62 +9259,82 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) share_list_size= record_count; share_list= new_share_list; } - for (i= 0; i < record_count; i++) + for (i= 0, open_count= 0; i < record_count; i++) { share= (NDB_SHARE *)hash_element(&ndbcluster_open_tables, i); +#ifdef HAVE_NDB_BINLOG + if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0)) + <= 0) + continue; // injector thread is the only user, skip statistics + share->util_lock= current_thd; // Mark that util thread has lock +#endif /* HAVE_NDB_BINLOG */ + /* ndb_share reference temporary, free below */ share->use_count++; /* Make sure the table can't be closed */ + DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", + share->key, share->use_count)); DBUG_PRINT("ndb_util_thread", ("Found open table[%d]: %s, use_count: %d", i, share->table_name, share->use_count)); /* Store pointer to table */ - share_list[i]= share; + share_list[open_count++]= share; } pthread_mutex_unlock(&ndbcluster_mutex); /* Iterate through the open files list */ - for (i= 0; i < record_count; i++) + for (i= 0; i < open_count; i++) { share= share_list[i]; - /* Split tab- and dbname */ - char buf[FN_REFLEN]; - char *tabname, *db; - uint length= dirname_length(share->table_name); - tabname= share->table_name+length; - memcpy(buf, share->table_name, length-1); - buf[length-1]= 0; - db= buf+dirname_length(buf); +#ifdef HAVE_NDB_BINLOG + if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0)) + <= 1) + { + /* + Util thread and injector thread is the only user, skip statistics + */ + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); + continue; + } +#endif /* HAVE_NDB_BINLOG */ DBUG_PRINT("ndb_util_thread", - ("Fetching commit count for: %s", - share->table_name)); + ("Fetching commit count for: %s", share->key)); - /* Contact NDB to get commit count for table */ struct Ndb_statistics stat; uint lock; pthread_mutex_lock(&share->mutex); lock= share->commit_count_lock; pthread_mutex_unlock(&share->mutex); - if (ndb->setDatabaseName(db)) - { - goto loop_next; - } - if (ndb_get_table_statistics(NULL, false, ndb, tabname, &stat) == 0) { + /* Contact NDB to get commit count for table */ + Ndb* ndb= thd_ndb->ndb; + if (ndb->setDatabaseName(share->db)) + { + goto loop_next; + } + Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name); + if (ndbtab_g.get_table() && + ndb_get_table_statistics(NULL, FALSE, ndb, + ndbtab_g.get_table(), &stat) == 0) + { #ifndef DBUG_OFF - char buff[22], buff2[22]; + char buff[22], buff2[22]; #endif - DBUG_PRINT("ndb_util_thread", - ("Table: %s commit_count: %s rows: %s", - share->table_name, - llstr(stat.commit_count, buff), - llstr(stat.row_count, buff2))); - } - else - { - DBUG_PRINT("ndb_util_thread", - ("Error: Could not get commit count for table %s", - share->table_name)); - stat.commit_count= 0; + DBUG_PRINT("info", + ("Table: %s commit_count: %s rows: %s", + share->key, + llstr(stat.commit_count, buff), + llstr(stat.row_count, buff2))); + } + else + { + DBUG_PRINT("ndb_util_thread", + ("Error: Could not get commit count for table %s", + share->key)); + stat.commit_count= 0; + } } loop_next: pthread_mutex_lock(&share->mutex); @@ -7088,8 +9342,10 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) share->commit_count= stat.commit_count; pthread_mutex_unlock(&share->mutex); - /* Decrease the use count and possibly free share */ - free_share(share); + /* ndb_share reference temporary free */ + DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", + share->key, share->use_count)); + free_share(&share); } next: /* Calculate new time to wake up */ @@ -7114,64 +9370,26 @@ next: } } + pthread_mutex_lock(&LOCK_ndb_util_thread); + +ndb_util_thread_end: + net_end(&thd->net); +ndb_util_thread_fail: if (share_list) delete [] share_list; thd->cleanup(); delete thd; - delete ndb; + + /* signal termination */ + ndb_util_thread_running= 0; + pthread_cond_signal(&COND_ndb_util_ready); + pthread_mutex_unlock(&LOCK_ndb_util_thread); DBUG_PRINT("exit", ("ndb_util_thread")); my_thread_end(); pthread_exit(0); DBUG_RETURN(NULL); } -int -ndbcluster_show_status(THD* thd) -{ - Protocol *protocol= thd->protocol; - DBUG_ENTER("ndbcluster_show_status"); - - if (have_ndbcluster != SHOW_OPTION_YES) - { - my_message(ER_NOT_SUPPORTED_YET, - "Cannot call SHOW NDBCLUSTER STATUS because skip-ndbcluster is " - "defined", - MYF(0)); - DBUG_RETURN(TRUE); - } - - List<Item> field_list; - field_list.push_back(new Item_empty_string("free_list", 255)); - field_list.push_back(new Item_return_int("created", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("free", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("sizeof", 10,MYSQL_TYPE_LONG)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); - - if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb) - { - Ndb* ndb= (get_thd_ndb(thd))->ndb; - Ndb::Free_list_usage tmp; - tmp.m_name= 0; - while (ndb->get_free_list_usage(&tmp)) - { - protocol->prepare_for_resend(); - - protocol->store(tmp.m_name, &my_charset_bin); - protocol->store((uint)tmp.m_created); - protocol->store((uint)tmp.m_free); - protocol->store((uint)tmp.m_sizeof); - if (protocol->write()) - DBUG_RETURN(TRUE); - } - } - send_eof(thd); - - DBUG_RETURN(FALSE); -} - /* Condition pushdown */ @@ -7218,4 +9436,1094 @@ ha_ndbcluster::cond_pop() m_cond->cond_pop(); } -#endif /* HAVE_NDBCLUSTER_DB */ + +/* + get table space info for SHOW CREATE TABLE +*/ +char* ha_ndbcluster::get_tablespace_name(THD *thd, char* name, uint name_len) +{ + Ndb *ndb= check_ndb_in_thd(thd); + NDBDICT *ndbdict= ndb->getDictionary(); + NdbError ndberr; + Uint32 id; + ndb->setDatabaseName(m_dbname); + const NDBTAB *ndbtab= m_table; + DBUG_ASSERT(ndbtab != NULL); + if (!ndbtab->getTablespace(&id)) + { + return 0; + } + { + NdbDictionary::Tablespace ts= ndbdict->getTablespace(id); + ndberr= ndbdict->getNdbError(); + if(ndberr.classification != NdbError::NoError) + goto err; + DBUG_PRINT("info", ("Found tablespace '%s'", ts.getName())); + if (name) + { + strxnmov(name, name_len, ts.getName(), NullS); + return name; + } + else + return (my_strdup(ts.getName(), MYF(0))); + } +err: + if (ndberr.status == NdbError::TemporaryError) + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), + ndberr.code, ndberr.message, "NDB"); + else + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndberr.code, ndberr.message, "NDB"); + return 0; +} + +/* + Implements the SHOW NDB STATUS command. +*/ +bool +ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) +{ + char buf[IO_SIZE]; + uint buflen; + DBUG_ENTER("ndbcluster_show_status"); + + if (stat_type != HA_ENGINE_STATUS) + { + DBUG_RETURN(FALSE); + } + + update_status_variables(g_ndb_cluster_connection); + buflen= + my_snprintf(buf, sizeof(buf), + "cluster_node_id=%ld, " + "connected_host=%s, " + "connected_port=%ld, " + "number_of_data_nodes=%ld, " + "number_of_ready_data_nodes=%ld, " + "connect_count=%ld", + ndb_cluster_node_id, + ndb_connected_host, + ndb_connected_port, + ndb_number_of_data_nodes, + ndb_number_of_ready_data_nodes, + ndb_connect_count); + if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length, + STRING_WITH_LEN("connection"), buf, buflen)) + DBUG_RETURN(TRUE); + + if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb) + { + Ndb* ndb= (get_thd_ndb(thd))->ndb; + Ndb::Free_list_usage tmp; + tmp.m_name= 0; + while (ndb->get_free_list_usage(&tmp)) + { + buflen= + my_snprintf(buf, sizeof(buf), + "created=%u, free=%u, sizeof=%u", + tmp.m_created, tmp.m_free, tmp.m_sizeof); + if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length, + tmp.m_name, strlen(tmp.m_name), buf, buflen)) + DBUG_RETURN(TRUE); + } + } +#ifdef HAVE_NDB_BINLOG + ndbcluster_show_status_binlog(thd, stat_print, stat_type); +#endif + + DBUG_RETURN(FALSE); +} + + +/* + Create a table in NDB Cluster + */ +static uint get_no_fragments(ulonglong max_rows) +{ +#if MYSQL_VERSION_ID >= 50000 + uint acc_row_size= 25 + /*safety margin*/ 2; +#else + uint acc_row_size= pk_length*4; + /* add acc overhead */ + if (pk_length <= 8) /* main page will set the limit */ + acc_row_size+= 25 + /*safety margin*/ 2; + else /* overflow page will set the limit */ + acc_row_size+= 4 + /*safety margin*/ 4; +#endif + ulonglong acc_fragment_size= 512*1024*1024; +#if MYSQL_VERSION_ID >= 50100 + return (max_rows*acc_row_size)/acc_fragment_size+1; +#else + return ((max_rows*acc_row_size)/acc_fragment_size+1 + +1/*correct rounding*/)/2; +#endif +} + + +/* + Routine to adjust default number of partitions to always be a multiple + of number of nodes and never more than 4 times the number of nodes. + +*/ +static bool adjusted_frag_count(uint no_fragments, uint no_nodes, + uint &reported_frags) +{ + uint i= 0; + reported_frags= no_nodes; + while (reported_frags < no_fragments && ++i < 4 && + (reported_frags + no_nodes) < MAX_PARTITIONS) + reported_frags+= no_nodes; + return (reported_frags < no_fragments); +} + +int ha_ndbcluster::get_default_no_partitions(HA_CREATE_INFO *create_info) +{ + ha_rows max_rows, min_rows; + if (create_info) + { + max_rows= create_info->max_rows; + min_rows= create_info->min_rows; + } + else + { + max_rows= table_share->max_rows; + min_rows= table_share->min_rows; + } + uint reported_frags; + uint no_fragments= + get_no_fragments(max_rows >= min_rows ? max_rows : min_rows); + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); + if (adjusted_frag_count(no_fragments, no_nodes, reported_frags)) + { + push_warning(current_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Ndb might have problems storing the max amount of rows specified"); + } + return (int)reported_frags; +} + + +/* + Set-up auto-partitioning for NDB Cluster + + SYNOPSIS + set_auto_partitions() + part_info Partition info struct to set-up + + RETURN VALUE + NONE + + DESCRIPTION + Set-up auto partitioning scheme for tables that didn't define any + partitioning. We'll use PARTITION BY KEY() in this case which + translates into partition by primary key if a primary key exists + and partition by hidden key otherwise. +*/ + +void ha_ndbcluster::set_auto_partitions(partition_info *part_info) +{ + DBUG_ENTER("ha_ndbcluster::set_auto_partitions"); + part_info->list_of_part_fields= TRUE; + part_info->part_type= HASH_PARTITION; + switch (opt_ndb_distribution_id) + { + case ND_KEYHASH: + part_info->linear_hash_ind= FALSE; + break; + case ND_LINHASH: + part_info->linear_hash_ind= TRUE; + break; + } + DBUG_VOID_RETURN; +} + + +int ha_ndbcluster::set_range_data(void *tab_ref, partition_info *part_info) +{ + NDBTAB *tab= (NDBTAB*)tab_ref; + int32 *range_data= (int32*)my_malloc(part_info->no_parts*sizeof(int32), + MYF(0)); + uint i; + int error= 0; + bool unsigned_flag= part_info->part_expr->unsigned_flag; + DBUG_ENTER("set_range_data"); + + if (!range_data) + { + mem_alloc_error(part_info->no_parts*sizeof(int32)); + DBUG_RETURN(1); + } + for (i= 0; i < part_info->no_parts; i++) + { + longlong range_val= part_info->range_int_array[i]; + if (unsigned_flag) + range_val-= 0x8000000000000000ULL; + if (range_val < INT_MIN32 || range_val >= INT_MAX32) + { + if ((i != part_info->no_parts - 1) || + (range_val != LONGLONG_MAX)) + { + my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB"); + error= 1; + goto error; + } + range_val= INT_MAX32; + } + range_data[i]= (int32)range_val; + } + tab->setRangeListData(range_data, sizeof(int32)*part_info->no_parts); +error: + my_free((char*)range_data, MYF(0)); + DBUG_RETURN(error); +} + +int ha_ndbcluster::set_list_data(void *tab_ref, partition_info *part_info) +{ + NDBTAB *tab= (NDBTAB*)tab_ref; + int32 *list_data= (int32*)my_malloc(part_info->no_list_values * 2 + * sizeof(int32), MYF(0)); + uint32 *part_id, i; + int error= 0; + bool unsigned_flag= part_info->part_expr->unsigned_flag; + DBUG_ENTER("set_list_data"); + + if (!list_data) + { + mem_alloc_error(part_info->no_list_values*2*sizeof(int32)); + DBUG_RETURN(1); + } + for (i= 0; i < part_info->no_list_values; i++) + { + LIST_PART_ENTRY *list_entry= &part_info->list_array[i]; + longlong list_val= list_entry->list_value; + if (unsigned_flag) + list_val-= 0x8000000000000000ULL; + if (list_val < INT_MIN32 || list_val > INT_MAX32) + { + my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB"); + error= 1; + goto error; + } + list_data[2*i]= (int32)list_val; + part_id= (uint32*)&list_data[2*i+1]; + *part_id= list_entry->partition_id; + } + tab->setRangeListData(list_data, 2*sizeof(int32)*part_info->no_list_values); +error: + my_free((char*)list_data, MYF(0)); + DBUG_RETURN(error); +} + +/* + User defined partitioning set-up. We need to check how many fragments the + user wants defined and which node groups to put those into. Later we also + want to attach those partitions to a tablespace. + + All the functionality of the partition function, partition limits and so + forth are entirely handled by the MySQL Server. There is one exception to + this rule for PARTITION BY KEY where NDB handles the hash function and + this type can thus be handled transparently also by NDB API program. + For RANGE, HASH and LIST and subpartitioning the NDB API programs must + implement the function to map to a partition. +*/ + +uint ha_ndbcluster::set_up_partition_info(partition_info *part_info, + TABLE *table, + void *tab_par) +{ + uint16 frag_data[MAX_PARTITIONS]; + char *ts_names[MAX_PARTITIONS]; + ulong fd_index= 0, i, j; + NDBTAB *tab= (NDBTAB*)tab_par; + NDBTAB::FragmentType ftype= NDBTAB::UserDefined; + partition_element *part_elem; + bool first= TRUE; + uint tot_ts_name_len; + List_iterator<partition_element> part_it(part_info->partitions); + int error; + DBUG_ENTER("ha_ndbcluster::set_up_partition_info"); + + if (part_info->part_type == HASH_PARTITION && + part_info->list_of_part_fields == TRUE) + { + Field **fields= part_info->part_field_array; + + if (part_info->linear_hash_ind) + ftype= NDBTAB::DistrKeyLin; + else + ftype= NDBTAB::DistrKeyHash; + + for (i= 0; i < part_info->part_field_list.elements; i++) + { + NDBCOL *col= tab->getColumn(fields[i]->field_index); + DBUG_PRINT("info",("setting dist key on %s", col->getName())); + col->setPartitionKey(TRUE); + } + } + else + { + if (!current_thd->variables.new_mode) + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_ILLEGAL_HA_CREATE_OPTION, + ER(ER_ILLEGAL_HA_CREATE_OPTION), + ndbcluster_hton_name, + "LIST, RANGE and HASH partition disabled by default," + " use --new option to enable"); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + /* + Create a shadow field for those tables that have user defined + partitioning. This field stores the value of the partition + function such that NDB can handle reorganisations of the data + even when the MySQL Server isn't available to assist with + calculation of the partition function value. + */ + NDBCOL col; + DBUG_PRINT("info", ("Generating partition func value field")); + col.setName("$PART_FUNC_VALUE"); + col.setType(NdbDictionary::Column::Int); + col.setLength(1); + col.setNullable(FALSE); + col.setPrimaryKey(FALSE); + col.setAutoIncrement(FALSE); + tab->addColumn(col); + if (part_info->part_type == RANGE_PARTITION) + { + if ((error= set_range_data((void*)tab, part_info))) + { + DBUG_RETURN(error); + } + } + else if (part_info->part_type == LIST_PARTITION) + { + if ((error= set_list_data((void*)tab, part_info))) + { + DBUG_RETURN(error); + } + } + } + tab->setFragmentType(ftype); + i= 0; + tot_ts_name_len= 0; + do + { + uint ng; + part_elem= part_it++; + if (!part_info->is_sub_partitioned()) + { + ng= part_elem->nodegroup_id; + if (first && ng == UNDEF_NODEGROUP) + ng= 0; + ts_names[fd_index]= part_elem->tablespace_name; + frag_data[fd_index++]= ng; + } + else + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + j= 0; + do + { + part_elem= sub_it++; + ng= part_elem->nodegroup_id; + if (first && ng == UNDEF_NODEGROUP) + ng= 0; + ts_names[fd_index]= part_elem->tablespace_name; + frag_data[fd_index++]= ng; + } while (++j < part_info->no_subparts); + } + first= FALSE; + } while (++i < part_info->no_parts); + tab->setDefaultNoPartitionsFlag(part_info->use_default_no_partitions); + tab->setLinearFlag(part_info->linear_hash_ind); + { + ha_rows max_rows= table_share->max_rows; + ha_rows min_rows= table_share->min_rows; + if (max_rows < min_rows) + max_rows= min_rows; + if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */ + { + tab->setMaxRows(max_rows); + tab->setMinRows(min_rows); + } + } + tab->setTablespaceNames(ts_names, fd_index*sizeof(char*)); + tab->setFragmentCount(fd_index); + tab->setFragmentData(&frag_data, fd_index*2); + DBUG_RETURN(0); +} + + +bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) +{ + DBUG_ENTER("ha_ndbcluster::check_if_incompatible_data"); + uint i; + const NDBTAB *tab= (const NDBTAB *) m_table; + + if (current_thd->variables.ndb_use_copying_alter_table) + { + DBUG_PRINT("info", ("On-line alter table disabled")); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + + int pk= 0; + int ai= 0; + + if (create_info->tablespace) + create_info->storage_media = HA_SM_DISK; + else + create_info->storage_media = HA_SM_MEMORY; + + for (i= 0; i < table->s->fields; i++) + { + Field *field= table->field[i]; + const NDBCOL *col= tab->getColumn(i); + if (col->getStorageType() == NDB_STORAGETYPE_MEMORY && create_info->storage_media != HA_SM_MEMORY || + col->getStorageType() == NDB_STORAGETYPE_DISK && create_info->storage_media != HA_SM_DISK) + { + DBUG_PRINT("info", ("Column storage media is changed")); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + + if (field->flags & FIELD_IS_RENAMED) + { + DBUG_PRINT("info", ("Field has been renamed, copy table")); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + if ((field->flags & FIELD_IN_ADD_INDEX) && + col->getStorageType() == NdbDictionary::Column::StorageTypeDisk) + { + DBUG_PRINT("info", ("add/drop index not supported for disk stored column")); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + + if (field->flags & PRI_KEY_FLAG) + pk=1; + if (field->flags & FIELD_IN_ADD_INDEX) + ai=1; + } + + char tablespace_name[FN_LEN]; + if (get_tablespace_name(current_thd, tablespace_name, FN_LEN)) + { + if (create_info->tablespace) + { + if (strcmp(create_info->tablespace, tablespace_name)) + { + DBUG_PRINT("info", ("storage media is changed, old tablespace=%s, new tablespace=%s", + tablespace_name, create_info->tablespace)); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + } + else + { + DBUG_PRINT("info", ("storage media is changed, old is DISK and tablespace=%s, new is MEM", + tablespace_name)); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + } + else + { + if (create_info->storage_media != HA_SM_MEMORY) + { + DBUG_PRINT("info", ("storage media is changed, old is MEM, new is DISK and tablespace=%s", + create_info->tablespace)); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + } + + if (table_changes != IS_EQUAL_YES) + DBUG_RETURN(COMPATIBLE_DATA_NO); + + /** + * Changing from/to primary key + * + * This is _not_ correct, but check_if_incompatible_data-interface + * doesnt give more info, so I guess that we can't do any + * online add index if not using primary key + * + * This as mysql will handle a unique not null index as primary + * even wo/ user specifiying it... :-( + * + */ + if ((table_share->primary_key == MAX_KEY && pk) || + (table_share->primary_key != MAX_KEY && !pk) || + (table_share->primary_key == MAX_KEY && !pk && ai)) + { + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + + /* Check that auto_increment value was not changed */ + if ((create_info->used_fields & HA_CREATE_USED_AUTO) && + create_info->auto_increment_value != 0) + DBUG_RETURN(COMPATIBLE_DATA_NO); + + /* Check that row format didn't change */ + if ((create_info->used_fields & HA_CREATE_USED_AUTO) && + get_row_type() != create_info->row_type) + DBUG_RETURN(COMPATIBLE_DATA_NO); + + DBUG_RETURN(COMPATIBLE_DATA_YES); +} + +bool set_up_tablespace(st_alter_tablespace *alter_info, + NdbDictionary::Tablespace *ndb_ts) +{ + ndb_ts->setName(alter_info->tablespace_name); + ndb_ts->setExtentSize(alter_info->extent_size); + ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name); + return FALSE; +} + +bool set_up_datafile(st_alter_tablespace *alter_info, + NdbDictionary::Datafile *ndb_df) +{ + if (alter_info->max_size > 0) + { + my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0)); + return TRUE; + } + ndb_df->setPath(alter_info->data_file_name); + ndb_df->setSize(alter_info->initial_size); + ndb_df->setTablespace(alter_info->tablespace_name); + return FALSE; +} + +bool set_up_logfile_group(st_alter_tablespace *alter_info, + NdbDictionary::LogfileGroup *ndb_lg) +{ + ndb_lg->setName(alter_info->logfile_group_name); + ndb_lg->setUndoBufferSize(alter_info->undo_buffer_size); + return FALSE; +} + +bool set_up_undofile(st_alter_tablespace *alter_info, + NdbDictionary::Undofile *ndb_uf) +{ + ndb_uf->setPath(alter_info->undo_file_name); + ndb_uf->setSize(alter_info->initial_size); + ndb_uf->setLogfileGroup(alter_info->logfile_group_name); + return FALSE; +} + +int ndbcluster_alter_tablespace(handlerton *hton, + THD* thd, st_alter_tablespace *alter_info) +{ + int is_tablespace= 0; + NdbError err; + NDBDICT *dict; + int error; + const char *errmsg; + Ndb *ndb; + DBUG_ENTER("ha_ndbcluster::alter_tablespace"); + LINT_INIT(errmsg); + + ndb= check_ndb_in_thd(thd); + if (ndb == NULL) + { + DBUG_RETURN(HA_ERR_NO_CONNECTION); + } + dict= ndb->getDictionary(); + + switch (alter_info->ts_cmd_type){ + case (CREATE_TABLESPACE): + { + error= ER_CREATE_FILEGROUP_FAILED; + + NdbDictionary::Tablespace ndb_ts; + NdbDictionary::Datafile ndb_df; + NdbDictionary::ObjectId objid; + if (set_up_tablespace(alter_info, &ndb_ts)) + { + DBUG_RETURN(1); + } + if (set_up_datafile(alter_info, &ndb_df)) + { + DBUG_RETURN(1); + } + errmsg= "TABLESPACE"; + if (dict->createTablespace(ndb_ts, &objid)) + { + DBUG_PRINT("error", ("createTablespace returned %d", error)); + goto ndberror; + } + DBUG_PRINT("alter_info", ("Successfully created Tablespace")); + errmsg= "DATAFILE"; + if (dict->createDatafile(ndb_df)) + { + err= dict->getNdbError(); + NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName()); + if (dict->getNdbError().code == 0 && + tmp.getObjectId() == objid.getObjectId() && + tmp.getObjectVersion() == objid.getObjectVersion()) + { + dict->dropTablespace(tmp); + } + + DBUG_PRINT("error", ("createDatafile returned %d", error)); + goto ndberror2; + } + is_tablespace= 1; + break; + } + case (ALTER_TABLESPACE): + { + error= ER_ALTER_FILEGROUP_FAILED; + if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE) + { + NdbDictionary::Datafile ndb_df; + if (set_up_datafile(alter_info, &ndb_df)) + { + DBUG_RETURN(1); + } + errmsg= " CREATE DATAFILE"; + if (dict->createDatafile(ndb_df)) + { + goto ndberror; + } + } + else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE) + { + NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name); + NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name); + NdbDictionary::ObjectId objid; + df.getTablespaceId(&objid); + if (ts.getObjectId() == objid.getObjectId() && + strcmp(df.getPath(), alter_info->data_file_name) == 0) + { + errmsg= " DROP DATAFILE"; + if (dict->dropDatafile(df)) + { + goto ndberror; + } + } + else + { + DBUG_PRINT("error", ("No such datafile")); + my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE"); + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("error", ("Unsupported alter tablespace: %d", + alter_info->ts_alter_tablespace_type)); + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + is_tablespace= 1; + break; + } + case (CREATE_LOGFILE_GROUP): + { + error= ER_CREATE_FILEGROUP_FAILED; + NdbDictionary::LogfileGroup ndb_lg; + NdbDictionary::Undofile ndb_uf; + NdbDictionary::ObjectId objid; + if (alter_info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + if (set_up_logfile_group(alter_info, &ndb_lg)) + { + DBUG_RETURN(1); + } + errmsg= "LOGFILE GROUP"; + if (dict->createLogfileGroup(ndb_lg, &objid)) + { + goto ndberror; + } + DBUG_PRINT("alter_info", ("Successfully created Logfile Group")); + if (set_up_undofile(alter_info, &ndb_uf)) + { + DBUG_RETURN(1); + } + errmsg= "UNDOFILE"; + if (dict->createUndofile(ndb_uf)) + { + err= dict->getNdbError(); + NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName()); + if (dict->getNdbError().code == 0 && + tmp.getObjectId() == objid.getObjectId() && + tmp.getObjectVersion() == objid.getObjectVersion()) + { + dict->dropLogfileGroup(tmp); + } + goto ndberror2; + } + break; + } + case (ALTER_LOGFILE_GROUP): + { + error= ER_ALTER_FILEGROUP_FAILED; + if (alter_info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + NdbDictionary::Undofile ndb_uf; + if (set_up_undofile(alter_info, &ndb_uf)) + { + DBUG_RETURN(1); + } + errmsg= "CREATE UNDOFILE"; + if (dict->createUndofile(ndb_uf)) + { + goto ndberror; + } + break; + } + case (DROP_TABLESPACE): + { + error= ER_DROP_FILEGROUP_FAILED; + errmsg= "TABLESPACE"; + if (dict->dropTablespace(dict->getTablespace(alter_info->tablespace_name))) + { + goto ndberror; + } + is_tablespace= 1; + break; + } + case (DROP_LOGFILE_GROUP): + { + error= ER_DROP_FILEGROUP_FAILED; + errmsg= "LOGFILE GROUP"; + if (dict->dropLogfileGroup(dict->getLogfileGroup(alter_info->logfile_group_name))) + { + goto ndberror; + } + break; + } + case (CHANGE_FILE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + case (ALTER_ACCESS_MODE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + default: + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + } +#ifdef HAVE_NDB_BINLOG + if (is_tablespace) + ndbcluster_log_schema_op(thd, 0, + thd->query, thd->query_length, + "", alter_info->tablespace_name, + 0, 0, + SOT_TABLESPACE, 0, 0, 0); + else + ndbcluster_log_schema_op(thd, 0, + thd->query, thd->query_length, + "", alter_info->logfile_group_name, + 0, 0, + SOT_LOGFILE_GROUP, 0, 0, 0); +#endif + DBUG_RETURN(FALSE); + +ndberror: + err= dict->getNdbError(); +ndberror2: + set_ndb_err(thd, err); + ndb_to_mysql_error(&err); + + my_error(error, MYF(0), errmsg); + DBUG_RETURN(1); +} + + +bool ha_ndbcluster::get_no_parts(const char *name, uint *no_parts) +{ + Ndb *ndb; + NDBDICT *dict; + int err; + DBUG_ENTER("ha_ndbcluster::get_no_parts"); + LINT_INIT(err); + + set_dbname(name); + set_tabname(name); + for (;;) + { + if (check_ndb_connection()) + { + err= HA_ERR_NO_CONNECTION; + break; + } + ndb= get_ndb(); + ndb->setDatabaseName(m_dbname); + Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname); + if (!ndbtab_g.get_table()) + ERR_BREAK(dict->getNdbError(), err); + *no_parts= ndbtab_g.get_table()->getFragmentCount(); + DBUG_RETURN(FALSE); + } + + print_error(err, MYF(0)); + DBUG_RETURN(TRUE); +} + +static int ndbcluster_fill_files_table(handlerton *hton, + THD *thd, + TABLE_LIST *tables, + COND *cond) +{ + TABLE* table= tables->table; + Ndb *ndb= check_ndb_in_thd(thd); + NdbDictionary::Dictionary* dict= ndb->getDictionary(); + NdbDictionary::Dictionary::List dflist; + NdbError ndberr; + uint i; + DBUG_ENTER("ndbcluster_fill_files_table"); + + dict->listObjects(dflist, NdbDictionary::Object::Datafile); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + ERR_RETURN(ndberr); + + for (i= 0; i < dflist.count; i++) + { + NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i]; + Ndb_cluster_connection_node_iter iter; + uint id; + + g_ndb_cluster_connection->init_get_next_node(iter); + + while ((id= g_ndb_cluster_connection->get_next_node(iter))) + { + init_fill_schema_files_row(table); + NdbDictionary::Datafile df= dict->getDatafile(id, elt.name); + ndberr= dict->getNdbError(); + if(ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + + if (ndberr.classification == NdbError::UnknownResultError) + continue; + + ERR_RETURN(ndberr); + } + NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace()); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + ERR_RETURN(ndberr); + } + + table->field[IS_FILES_FILE_NAME]->set_notnull(); + table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name), + system_charset_info); + table->field[IS_FILES_FILE_TYPE]->set_notnull(); + table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8, + system_charset_info); + table->field[IS_FILES_TABLESPACE_NAME]->set_notnull(); + table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(), + strlen(df.getTablespace()), + system_charset_info); + table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull(); + table->field[IS_FILES_LOGFILE_GROUP_NAME]-> + store(ts.getDefaultLogfileGroup(), + strlen(ts.getDefaultLogfileGroup()), + system_charset_info); + table->field[IS_FILES_ENGINE]->set_notnull(); + table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name, + ndbcluster_hton_name_length, + system_charset_info); + + table->field[IS_FILES_FREE_EXTENTS]->set_notnull(); + table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree() + / ts.getExtentSize()); + table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull(); + table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize() + / ts.getExtentSize()); + table->field[IS_FILES_EXTENT_SIZE]->set_notnull(); + table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize()); + table->field[IS_FILES_INITIAL_SIZE]->set_notnull(); + table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize()); + table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull(); + table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize()); + table->field[IS_FILES_VERSION]->set_notnull(); + table->field[IS_FILES_VERSION]->store(df.getObjectVersion()); + + table->field[IS_FILES_ROW_FORMAT]->set_notnull(); + table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info); + + char extra[30]; + int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id); + table->field[IS_FILES_EXTRA]->set_notnull(); + table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info); + schema_table_store_record(thd, table); + } + } + + NdbDictionary::Dictionary::List uflist; + dict->listObjects(uflist, NdbDictionary::Object::Undofile); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + ERR_RETURN(ndberr); + + for (i= 0; i < uflist.count; i++) + { + NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i]; + Ndb_cluster_connection_node_iter iter; + unsigned id; + + g_ndb_cluster_connection->init_get_next_node(iter); + + while ((id= g_ndb_cluster_connection->get_next_node(iter))) + { + NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + if (ndberr.classification == NdbError::UnknownResultError) + continue; + ERR_RETURN(ndberr); + } + NdbDictionary::LogfileGroup lfg= + dict->getLogfileGroup(uf.getLogfileGroup()); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + ERR_RETURN(ndberr); + } + + init_fill_schema_files_row(table); + table->field[IS_FILES_FILE_NAME]->set_notnull(); + table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name), + system_charset_info); + table->field[IS_FILES_FILE_TYPE]->set_notnull(); + table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8, + system_charset_info); + NdbDictionary::ObjectId objid; + uf.getLogfileGroupId(&objid); + table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull(); + table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(), + strlen(uf.getLogfileGroup()), + system_charset_info); + table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull(); + table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId()); + table->field[IS_FILES_ENGINE]->set_notnull(); + table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name, + ndbcluster_hton_name_length, + system_charset_info); + + table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull(); + table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4); + table->field[IS_FILES_EXTENT_SIZE]->set_notnull(); + table->field[IS_FILES_EXTENT_SIZE]->store(4); + + table->field[IS_FILES_INITIAL_SIZE]->set_notnull(); + table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize()); + table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull(); + table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize()); + + table->field[IS_FILES_VERSION]->set_notnull(); + table->field[IS_FILES_VERSION]->store(uf.getObjectVersion()); + + char extra[100]; + int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu", + id, (ulong) lfg.getUndoBufferSize()); + table->field[IS_FILES_EXTRA]->set_notnull(); + table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info); + schema_table_store_record(thd, table); + } + } + + // now for LFGs + NdbDictionary::Dictionary::List lfglist; + dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + ERR_RETURN(ndberr); + + for (i= 0; i < lfglist.count; i++) + { + NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i]; + + NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + ERR_RETURN(ndberr); + } + + init_fill_schema_files_row(table); + table->field[IS_FILES_FILE_TYPE]->set_notnull(); + table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8, + system_charset_info); + + table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull(); + table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name, + strlen(elt.name), + system_charset_info); + table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull(); + table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId()); + table->field[IS_FILES_ENGINE]->set_notnull(); + table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name, + ndbcluster_hton_name_length, + system_charset_info); + + table->field[IS_FILES_FREE_EXTENTS]->set_notnull(); + table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords()); + table->field[IS_FILES_EXTENT_SIZE]->set_notnull(); + table->field[IS_FILES_EXTENT_SIZE]->store(4); + + table->field[IS_FILES_VERSION]->set_notnull(); + table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion()); + + char extra[100]; + int len= my_snprintf(extra,sizeof(extra), + "UNDO_BUFFER_SIZE=%lu", + (ulong) lfg.getUndoBufferSize()); + table->field[IS_FILES_EXTRA]->set_notnull(); + table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info); + schema_table_store_record(thd, table); + } + DBUG_RETURN(0); +} + +SHOW_VAR ndb_status_variables_export[]= { + {"Ndb", (char*) &ndb_status_variables, SHOW_ARRAY}, + {NullS, NullS, SHOW_LONG} +}; + +struct st_mysql_storage_engine ndbcluster_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + +mysql_declare_plugin(ndbcluster) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &ndbcluster_storage_engine, + ndbcluster_hton_name, + "MySQL AB", + "Clustered, fault-tolerant tables", + PLUGIN_LICENSE_GPL, + ndbcluster_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + ndb_status_variables_export,/* status variables */ + NULL, /* system variables */ + NULL /* config options */ +} +mysql_declare_plugin_end; + +#endif |