summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <mronstrom@mysql.com>2005-07-18 13:31:02 +0200
committerunknown <mronstrom@mysql.com>2005-07-18 13:31:02 +0200
commitcd483c5520949ee9840628b68cd78b9a8c88e6b5 (patch)
tree49a4797f25aaf50e6e6c5ab9d193608d969a612e /sql
parent22545f477752987c8f70c0bc4740d2e8b67a6578 (diff)
downloadmariadb-git-cd483c5520949ee9840628b68cd78b9a8c88e6b5.tar.gz
Patch for push of wl1354 Partitioning
Diffstat (limited to 'sql')
-rw-r--r--sql/Makefile.am3
-rw-r--r--sql/field.cc43
-rw-r--r--sql/field.h19
-rw-r--r--sql/ha_berkeley.cc8
-rw-r--r--sql/ha_berkeley.h4
-rw-r--r--sql/ha_federated.cc2
-rw-r--r--sql/ha_federated.h2
-rw-r--r--sql/ha_innodb.cc5
-rw-r--r--sql/ha_innodb.h2
-rw-r--r--sql/ha_ndbcluster.cc535
-rw-r--r--sql/ha_ndbcluster.h31
-rw-r--r--sql/ha_partition.cc3162
-rw-r--r--sql/ha_partition.h916
-rw-r--r--sql/handler.cc97
-rw-r--r--sql/handler.h249
-rw-r--r--sql/item_subselect.cc4
-rw-r--r--sql/key.cc83
-rw-r--r--sql/lex.h13
-rw-r--r--sql/mysql_priv.h19
-rw-r--r--sql/mysqld.cc18
-rw-r--r--sql/opt_range.cc43
-rw-r--r--sql/opt_sum.cc4
-rw-r--r--sql/records.cc68
-rw-r--r--sql/set_var.cc1
-rw-r--r--sql/share/errmsg.txt78
-rw-r--r--sql/sp.cc4
-rw-r--r--sql/sql_acl.cc10
-rw-r--r--sql/sql_base.cc41
-rw-r--r--sql/sql_handler.cc6
-rw-r--r--sql/sql_help.cc4
-rw-r--r--sql/sql_lex.cc1
-rw-r--r--sql/sql_lex.h3
-rw-r--r--sql/sql_partition.cc3117
-rw-r--r--sql/sql_select.cc67
-rw-r--r--sql/sql_select.h1
-rw-r--r--sql/sql_show.cc34
-rw-r--r--sql/sql_table.cc134
-rw-r--r--sql/sql_update.cc11
-rw-r--r--sql/sql_yacc.yy458
-rw-r--r--sql/table.cc44
-rw-r--r--sql/table.h7
-rw-r--r--sql/tztime.cc10
-rw-r--r--sql/unireg.cc58
-rw-r--r--sql/unireg.h1
44 files changed, 9106 insertions, 314 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am
index b1b14db4cb7..2eab9052ba7 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -63,7 +63,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
parse_file.h sql_view.h sql_trigger.h \
examples/ha_example.h examples/ha_archive.h \
examples/ha_tina.h ha_blackhole.h \
- ha_federated.h
+ ha_federated.h ha_partition.h
mysqld_SOURCES = sql_lex.cc sql_handler.cc \
item.cc item_sum.cc item_buff.cc item_func.cc \
item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \
@@ -100,6 +100,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \
sp_cache.cc parse_file.cc sql_trigger.cc \
examples/ha_example.cc examples/ha_archive.cc \
examples/ha_tina.cc ha_blackhole.cc \
+ ha_partition.cc sql_partition.cc \
ha_federated.cc
gen_lex_hash_SOURCES = gen_lex_hash.cc
diff --git a/sql/field.cc b/sql/field.cc
index 1a3b0f70498..0a8f0a39262 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -6311,7 +6311,8 @@ my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value)
}
-int Field_varstring::cmp(const char *a_ptr, const char *b_ptr)
+int Field_varstring::cmp_max(const char *a_ptr, const char *b_ptr,
+ uint max_len)
{
uint a_length, b_length;
int diff;
@@ -6326,6 +6327,8 @@ int Field_varstring::cmp(const char *a_ptr, const char *b_ptr)
a_length= uint2korr(a_ptr);
b_length= uint2korr(b_ptr);
}
+ set_if_smaller(a_length, max_len);
+ set_if_smaller(b_length, max_len);
diff= field_charset->coll->strnncollsp(field_charset,
(const uchar*) a_ptr+
length_bytes,
@@ -6956,13 +6959,16 @@ int Field_blob::cmp(const char *a,uint32 a_length, const char *b,
}
-int Field_blob::cmp(const char *a_ptr, const char *b_ptr)
+int Field_blob::cmp_max(const char *a_ptr, const char *b_ptr,
+ uint max_length)
{
char *blob1,*blob2;
memcpy_fixed(&blob1,a_ptr+packlength,sizeof(char*));
memcpy_fixed(&blob2,b_ptr+packlength,sizeof(char*));
- return Field_blob::cmp(blob1,get_length(a_ptr),
- blob2,get_length(b_ptr));
+ uint a_len= get_length(a_ptr), b_len= get_length(b_ptr);
+ set_if_smaller(a_len, max_length);
+ set_if_smaller(b_len, max_length);
+ return Field_blob::cmp(blob1,a_len,blob2,b_len);
}
@@ -7979,6 +7985,35 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value)
}
+/*
+ Compare two bit fields using pointers within the record.
+ SYNOPSIS
+ cmp_max()
+ a Pointer to field->ptr in first record
+ b Pointer to field->ptr in second record
+ max_len Maximum length used in index
+ DESCRIPTION
+ This method is used from key_rec_cmp used by merge sorts used
+ by partitioned index read and later other similar places.
+ The a and b pointer must be pointers to the field in a record
+ (not the table->record[0] necessarily)
+*/
+int Field_bit::cmp_max(const char *a, const char *b, uint max_len)
+{
+ my_ptrdiff_t a_diff= a - ptr;
+ my_ptrdiff_t b_diff= b - ptr;
+ if (bit_len)
+ {
+ int flag;
+ uchar bits_a= get_rec_bits(bit_ptr+a_diff, bit_ofs, bit_len);
+ uchar bits_b= get_rec_bits(bit_ptr+b_diff, bit_ofs, bit_len);
+ if ((flag= (int) (bits_a - bits_b)))
+ return flag;
+ }
+ return memcmp(a, b, field_length);
+}
+
+
int Field_bit::key_cmp(const byte *str, uint length)
{
if (bit_len)
diff --git a/sql/field.h b/sql/field.h
index 3f6b88198db..9b6df35de43 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -87,7 +87,7 @@ public:
utype unireg_check;
uint32 field_length; // Length of field
uint field_index; // field number in fields array
- uint16 flags;
+ uint32 flags;
/* fieldnr is the id of the field (first field = 1) as is also
used in key_part.
*/
@@ -154,6 +154,8 @@ public:
virtual enum_field_types type() const =0;
virtual enum_field_types real_type() const { return type(); }
inline int cmp(const char *str) { return cmp(ptr,str); }
+ virtual int cmp_max(const char *a, const char *b, uint max_len)
+ { return cmp(a, b); }
virtual int cmp(const char *,const char *)=0;
virtual int cmp_binary(const char *a,const char *b, uint32 max_length=~0L)
{ return memcmp(a,b,pack_length()); }
@@ -1059,7 +1061,11 @@ public:
longlong val_int(void);
String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *);
- int cmp(const char *,const char*);
+ int cmp_max(const char *, const char *, uint max_length);
+ int cmp(const char *a,const char*b)
+ {
+ return cmp_max(a, b, ~0);
+ }
void sort_string(char *buff,uint length);
void get_key_image(char *buff,uint length, imagetype type);
void set_key_image(char *buff,uint length);
@@ -1115,7 +1121,9 @@ public:
longlong val_int(void);
String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *);
- int cmp(const char *,const char*);
+ int cmp_max(const char *, const char *, uint max_length);
+ int cmp(const char *a,const char*b)
+ { return cmp_max(a, b, ~0); }
int cmp(const char *a, uint32 a_length, const char *b, uint32 b_length);
int cmp_binary(const char *a,const char *b, uint32 max_length=~0L);
int key_cmp(const byte *,const byte*);
@@ -1139,6 +1147,10 @@ public:
{
memcpy_fixed(str,ptr+packlength,sizeof(char*));
}
+ inline void get_ptr(char **str, uint row_offset)
+ {
+ memcpy_fixed(str,ptr+packlength+row_offset,sizeof(char*));
+ }
inline void set_ptr(char *length,char *data)
{
memcpy(ptr,length,packlength);
@@ -1307,6 +1319,7 @@ public:
my_decimal *val_decimal(my_decimal *);
int cmp(const char *a, const char *b)
{ return cmp_binary(a, b); }
+ int cmp_max(const char *a, const char *b, uint max_length);
int key_cmp(const byte *a, const byte *b)
{ return cmp_binary((char *) a, (char *) b); }
int key_cmp(const byte *str, uint length);
diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc
index 568fb727e63..6b283079072 100644
--- a/sql/ha_berkeley.cc
+++ b/sql/ha_berkeley.cc
@@ -1360,7 +1360,7 @@ int ha_berkeley::delete_row(const byte * record)
}
-int ha_berkeley::index_init(uint keynr)
+int ha_berkeley::index_init(uint keynr, bool sorted)
{
int error;
DBUG_ENTER("ha_berkeley::index_init");
@@ -1638,7 +1638,7 @@ int ha_berkeley::rnd_init(bool scan)
{
DBUG_ENTER("rnd_init");
current_row.flags=DB_DBT_REALLOC;
- DBUG_RETURN(index_init(primary_key));
+ DBUG_RETURN(index_init(primary_key, 0));
}
int ha_berkeley::rnd_end()
@@ -2146,7 +2146,7 @@ ulonglong ha_berkeley::get_auto_increment()
(void) ha_berkeley::extra(HA_EXTRA_KEYREAD);
/* Set 'active_index' */
- ha_berkeley::index_init(table->s->next_number_index);
+ ha_berkeley::index_init(table->s->next_number_index, 0);
if (!table->s->next_number_key_offset)
{ // Autoincrement at key-start
@@ -2485,7 +2485,7 @@ void ha_berkeley::get_status()
if (!(share->status & STATUS_PRIMARY_KEY_INIT))
{
(void) extra(HA_EXTRA_KEYREAD);
- index_init(primary_key);
+ index_init(primary_key, 0);
if (!index_last(table->record[1]))
share->auto_ident=uint5korr(current_ident);
index_end();
diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h
index f6376939445..596a59b4d43 100644
--- a/sql/ha_berkeley.h
+++ b/sql/ha_berkeley.h
@@ -98,7 +98,7 @@ class ha_berkeley: public handler
const char **bas_ext() const;
ulong table_flags(void) const { return int_table_flags; }
uint max_supported_keys() const { return MAX_KEY-1; }
- uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
+ uint extra_rec_buf_length() const { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
ha_rows estimate_rows_upper_bound();
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
bool has_transactions() { return 1;}
@@ -109,7 +109,7 @@ class ha_berkeley: public handler
int write_row(byte * buf);
int update_row(const byte * old_data, byte * new_data);
int delete_row(const byte * buf);
- int index_init(uint index);
+ int index_init(uint index, bool sorted);
int index_end();
int index_read(byte * buf, const byte * key,
uint key_len, enum ha_rkey_function find_flag);
diff --git a/sql/ha_federated.cc b/sql/ha_federated.cc
index bbcae0613e7..1cec6faea04 100644
--- a/sql/ha_federated.cc
+++ b/sql/ha_federated.cc
@@ -1512,7 +1512,7 @@ int ha_federated::index_read_idx(byte *buf, uint index, const byte *key,
}
/* Initialized at each key walk (called multiple times unlike rnd_init()) */
-int ha_federated::index_init(uint keynr)
+int ha_federated::index_init(uint keynr, bool sorted)
{
DBUG_ENTER("ha_federated::index_init");
DBUG_PRINT("info",
diff --git a/sql/ha_federated.h b/sql/ha_federated.h
index f084976718c..5908fe5afba 100644
--- a/sql/ha_federated.h
+++ b/sql/ha_federated.h
@@ -154,7 +154,7 @@ public:
int write_row(byte * buf);
int update_row(const byte * old_data, byte * new_data);
int delete_row(const byte * buf);
- int index_init(uint keynr);
+ int index_init(uint keynr, bool sorted);
int index_read(byte * buf, const byte * key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_idx(byte * buf, uint idx, const byte * key,
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 4167b7c2dde..1dfc64c9137 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -3595,7 +3595,8 @@ int
ha_innobase::index_init(
/*====================*/
/* out: 0 or error number */
- uint keynr) /* in: key (index) number */
+ uint keynr, /* in: key (index) number */
+ bool sorted) /* in: 1 if result MUST be sorted according to index */
{
int error = 0;
DBUG_ENTER("index_init");
@@ -6646,7 +6647,7 @@ ha_innobase::innobase_read_and_init_auto_inc(
}
(void) extra(HA_EXTRA_KEYREAD);
- index_init(table->s->next_number_index);
+ index_init(table->s->next_number_index, 1);
/* Starting from 5.0.9, we use a consistent read to read the auto-inc
column maximum value. This eliminates the spurious deadlocks caused
diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h
index 98496e748b4..150eae63730 100644
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@@ -136,7 +136,7 @@ class ha_innobase: public handler
int delete_row(const byte * buf);
void unlock_row();
- int index_init(uint index);
+ int index_init(uint index, bool sorted);
int index_end();
int index_read(byte * buf, const byte * key,
uint key_len, enum ha_rkey_function find_flag);
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 7d182d07087..1f830342b73 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -34,6 +34,7 @@
// options from from mysqld.cc
extern my_bool opt_ndb_optimized_node_selection;
+extern my_bool opt_ndb_linear_hash;
extern const char *opt_ndbcluster_connectstring;
// Default value for parallelism
@@ -99,6 +100,7 @@ static HASH ndbcluster_open_tables;
static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
my_bool not_used __attribute__((unused)));
+static void ndb_set_fragmentation(NDBTAB & tab, TABLE *table, uint pk_len);
static NDB_SHARE *get_share(const char *table_name);
static void free_share(NDB_SHARE *share);
@@ -861,11 +863,9 @@ bool ha_ndbcluster::uses_blob_value()
{
uint no_fields= table->s->fields;
int i;
- THD *thd= current_thd;
// They always put blobs at the end..
for (i= no_fields - 1; i >= 0; i--)
{
- Field *field= table->field[i];
if ((m_write_op && ha_get_bit_in_write_set(i+1)) ||
(!m_write_op && ha_get_bit_in_read_set(i+1)))
{
@@ -1292,8 +1292,6 @@ inline
int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
{
uint i;
- THD *thd= current_thd;
-
DBUG_ENTER("define_read_attrs");
// Define attributes to read
@@ -1333,7 +1331,8 @@ int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
Read one record from NDB using primary key
*/
-int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf)
+int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf,
+ uint32 part_id)
{
uint no_fields= table->s->fields;
NdbConnection *trans= m_active_trans;
@@ -1351,6 +1350,8 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf)
op->readTuple(lm) != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ op->setPartitionId(part_id);
if (table->s->primary_key == MAX_KEY)
{
// This table has no primary key, use "hidden" primary key
@@ -1388,12 +1389,12 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf)
Read one complementing record from NDB using primary key from old_data
*/
-int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data)
+int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data,
+ uint32 old_part_id)
{
uint no_fields= table->s->fields, i;
NdbTransaction *trans= m_active_trans;
NdbOperation *op;
- THD *thd= current_thd;
DBUG_ENTER("complemented_pk_read");
m_write_op= FALSE;
@@ -1411,6 +1412,10 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data)
int res;
if ((res= set_primary_key_from_record(op, old_data)))
ERR_RETURN(trans->getNdbError());
+
+ if (m_use_partition_function)
+ op->setPartitionId(old_part_id);
+
// Read all unreferenced non-key field(s)
for (i= 0; i < no_fields; i++)
{
@@ -1469,6 +1474,17 @@ int ha_ndbcluster::peek_row(const byte *record)
if ((res= set_primary_key_from_record(op, record)))
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ {
+ uint32 part_id;
+ int error;
+ if ((error= m_part_info->get_partition_id(m_part_info, &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ op->setPartitionId(part_id);
+ }
+
if (execute_no_commit_ie(this,trans) != 0)
{
table->status= STATUS_NOT_FOUND;
@@ -1807,7 +1823,8 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op,
int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
const key_range *end_key,
- bool sorted, bool descending, byte* buf)
+ bool sorted, bool descending,
+ byte* buf, part_id_range *part_spec)
{
int res;
bool restart;
@@ -1833,11 +1850,17 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
(const NDBTAB *) m_table)) ||
op->readTuples(lm, 0, parallelism, sorted, descending))
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function && part_spec != NULL &&
+ part_spec->start_part == part_spec->end_part)
+ op->setPartitionId(part_spec->start_part);
m_active_cursor= op;
} else {
restart= TRUE;
op= (NdbIndexScanOperation*)m_active_cursor;
+ if (m_use_partition_function && part_spec != NULL &&
+ part_spec->start_part == part_spec->end_part)
+ op->setPartitionId(part_spec->start_part);
DBUG_ASSERT(op->getSorted() == sorted);
DBUG_ASSERT(op->getLockMode() ==
(NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
@@ -1937,6 +1960,17 @@ int ha_ndbcluster::write_row(byte *record)
if (res != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ {
+ uint32 part_id;
+ int error;
+ if ((error= m_part_info->get_partition_id(m_part_info, &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ op->setPartitionId(part_id);
+ }
+
if (table->s->primary_key == MAX_KEY)
{
// Table has hidden primary key
@@ -2094,6 +2128,8 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
NdbScanOperation* cursor= m_active_cursor;
NdbOperation *op;
uint i;
+ uint32 old_part_id= 0, new_part_id= 0;
+ int error;
DBUG_ENTER("update_row");
m_write_op= TRUE;
@@ -2104,15 +2140,23 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
ha_set_bit_in_write_set(table->timestamp_field->fieldnr);
}
+ if (m_use_partition_function &&
+ (error= get_parts_for_update(old_data, new_data, table->record[0],
+ m_part_info, &old_part_id, &new_part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+
/* Check for update of primary key for special handling */
if ((table->s->primary_key != MAX_KEY) &&
- (key_cmp(table->s->primary_key, old_data, new_data)))
+ (key_cmp(table->s->primary_key, old_data, new_data)) ||
+ (old_part_id != new_part_id))
{
int read_res, insert_res, delete_res, undo_res;
DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert"));
// Get all old fields, since we optimize away fields not in query
- read_res= complemented_pk_read(old_data, new_data);
+ read_res= complemented_pk_read(old_data, new_data, old_part_id);
if (read_res)
{
DBUG_PRINT("info", ("pk read failed"));
@@ -2168,6 +2212,8 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
m_ops_pending++;
if (uses_blob_value())
m_blobs_pending= TRUE;
+ if (m_use_partition_function)
+ cursor->setPartitionId(new_part_id);
}
else
{
@@ -2175,6 +2221,8 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
op->updateTuple() != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ op->setPartitionId(new_part_id);
if (table->s->primary_key == MAX_KEY)
{
// This table has no primary key, use "hidden" primary key
@@ -2230,12 +2278,21 @@ int ha_ndbcluster::delete_row(const byte *record)
NdbTransaction *trans= m_active_trans;
NdbScanOperation* cursor= m_active_cursor;
NdbOperation *op;
+ uint32 part_id;
+ int error;
DBUG_ENTER("delete_row");
m_write_op= TRUE;
statistic_increment(thd->status_var.ha_delete_count,&LOCK_status);
m_rows_changed++;
+ if (m_use_partition_function &&
+ (error= get_part_for_delete(record, table->record[0], m_part_info,
+ &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+
if (cursor)
{
/*
@@ -2250,6 +2307,9 @@ int ha_ndbcluster::delete_row(const byte *record)
ERR_RETURN(trans->getNdbError());
m_ops_pending++;
+ if (m_use_partition_function)
+ cursor->setPartitionId(part_id);
+
no_uncommitted_rows_update(-1);
if (!m_primary_key_update)
@@ -2263,6 +2323,9 @@ int ha_ndbcluster::delete_row(const byte *record)
op->deleteTuple() != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ op->setPartitionId(part_id);
+
no_uncommitted_rows_update(-1);
if (table->s->primary_key == MAX_KEY)
@@ -2388,8 +2451,6 @@ void ha_ndbcluster::print_results()
DBUG_ENTER("print_results");
#ifndef DBUG_OFF
- const NDBTAB *tab= (const NDBTAB*) m_table;
-
if (!_db_on_)
DBUG_VOID_RETURN;
@@ -2444,11 +2505,13 @@ print_value:
}
-int ha_ndbcluster::index_init(uint index)
+int ha_ndbcluster::index_init(uint index, bool sorted)
{
DBUG_ENTER("ha_ndbcluster::index_init");
- DBUG_PRINT("enter", ("index: %u", index));
- DBUG_RETURN(handler::index_init(index));
+ DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted));
+ active_index= index;
+ m_sorted= sorted;
+ DBUG_RETURN(0);
}
@@ -2485,56 +2548,16 @@ int ha_ndbcluster::index_read(byte *buf,
const byte *key, uint key_len,
enum ha_rkey_function find_flag)
{
+ key_range start_key;
+ bool descending= FALSE;
DBUG_ENTER("ha_ndbcluster::index_read");
DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d",
active_index, key_len, find_flag));
- int error;
- ndb_index_type type= get_index_type(active_index);
- const KEY* key_info= table->key_info+active_index;
- m_write_op= FALSE;
- switch (type){
- case PRIMARY_KEY_ORDERED_INDEX:
- case PRIMARY_KEY_INDEX:
- if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len)
- {
- if (m_active_cursor && (error= close_scan()))
- DBUG_RETURN(error);
- DBUG_RETURN(pk_read(key, key_len, buf));
- }
- else if (type == PRIMARY_KEY_INDEX)
- {
- DBUG_RETURN(1);
- }
- break;
- case UNIQUE_ORDERED_INDEX:
- case UNIQUE_INDEX:
- if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len &&
- !check_null_in_key(key_info, key, key_len))
- {
- if (m_active_cursor && (error= close_scan()))
- DBUG_RETURN(error);
- DBUG_RETURN(unique_index_read(key, key_len, buf));
- }
- else if (type == UNIQUE_INDEX)
- {
- DBUG_RETURN(1);
- }
- break;
- case ORDERED_INDEX:
- break;
- default:
- case UNDEFINED_INDEX:
- DBUG_ASSERT(FALSE);
- DBUG_RETURN(1);
- break;
- }
-
- key_range start_key;
start_key.key= key;
start_key.length= key_len;
start_key.flag= find_flag;
- bool descending= FALSE;
+ descending= FALSE;
switch (find_flag) {
case HA_READ_KEY_OR_PREV:
case HA_READ_BEFORE_KEY:
@@ -2545,8 +2568,8 @@ int ha_ndbcluster::index_read(byte *buf,
default:
break;
}
- error= ordered_index_scan(&start_key, 0, TRUE, descending, buf);
- DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error);
+ DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending,
+ m_sorted, buf));
}
@@ -2557,7 +2580,7 @@ int ha_ndbcluster::index_read_idx(byte *buf, uint index_no,
statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status);
DBUG_ENTER("ha_ndbcluster::index_read_idx");
DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len));
- index_init(index_no);
+ index_init(index_no, 0);
DBUG_RETURN(index_read(buf, key, key_len, find_flag));
}
@@ -2588,7 +2611,7 @@ int ha_ndbcluster::index_first(byte *buf)
// Start the ordered index scan and fetch the first row
// Only HA_READ_ORDER indexes get called by index_first
- DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf));
+ DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL));
}
@@ -2596,7 +2619,7 @@ int ha_ndbcluster::index_last(byte *buf)
{
DBUG_ENTER("ha_ndbcluster::index_last");
statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status);
- DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf));
+ DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL));
}
int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
@@ -2605,67 +2628,76 @@ int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
}
-inline
int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
const key_range *end_key,
- bool eq_r, bool sorted,
+ bool desc, bool sorted,
byte* buf)
{
- KEY* key_info;
- int error= 1;
+ part_id_range part_spec;
+ ndb_index_type type= get_index_type(active_index);
+ const KEY* key_info= table->key_info+active_index;
+ int error;
DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
- DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted));
+ DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
- switch (get_index_type(active_index)){
+ if (m_use_partition_function)
+ {
+ get_partition_set(table, buf, active_index, start_key, &part_spec);
+ if (part_spec.start_part > part_spec.end_part)
+ {
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ else if (part_spec.start_part == part_spec.end_part)
+ {
+ /*
+ Only one partition is required to scan, if sorted is required we
+ don't need it any more since output from one ordered partitioned
+ index is always sorted.
+ */
+ sorted= FALSE;
+ }
+ }
+ m_write_op= FALSE;
+ switch (type){
case PRIMARY_KEY_ORDERED_INDEX:
case PRIMARY_KEY_INDEX:
- key_info= table->key_info + active_index;
if (start_key &&
start_key->length == key_info->key_length &&
start_key->flag == HA_READ_KEY_EXACT)
{
if (m_active_cursor && (error= close_scan()))
DBUG_RETURN(error);
- error= pk_read(start_key->key, start_key->length, buf);
- DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
+ DBUG_RETURN(pk_read(start_key->key, start_key->length, buf,
+ part_spec.start_part));
}
break;
case UNIQUE_ORDERED_INDEX:
case UNIQUE_INDEX:
- key_info= table->key_info + active_index;
if (start_key && start_key->length == key_info->key_length &&
start_key->flag == HA_READ_KEY_EXACT &&
!check_null_in_key(key_info, start_key->key, start_key->length))
{
if (m_active_cursor && (error= close_scan()))
DBUG_RETURN(error);
- error= unique_index_read(start_key->key, start_key->length, buf);
- DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
+ DBUG_RETURN(unique_index_read(start_key->key, start_key->length, buf));
}
break;
default:
break;
}
-
// Start the ordered index scan and fetch the first row
- error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf);
- DBUG_RETURN(error);
+ DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
+ &part_spec));
}
-
int ha_ndbcluster::read_range_first(const key_range *start_key,
const key_range *end_key,
bool eq_r, bool sorted)
{
byte* buf= table->record[0];
DBUG_ENTER("ha_ndbcluster::read_range_first");
- m_write_op= FALSE;
-
- DBUG_RETURN(read_range_first_to_buf(start_key,
- end_key,
- eq_r,
- sorted,
- buf));
+ DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
+ sorted, buf));
}
int ha_ndbcluster::read_range_next()
@@ -2691,7 +2723,7 @@ int ha_ndbcluster::rnd_init(bool scan)
DBUG_RETURN(-1);
}
}
- index_init(table->s->primary_key);
+ index_init(table->s->primary_key, 0);
DBUG_RETURN(0);
}
@@ -2758,7 +2790,20 @@ int ha_ndbcluster::rnd_pos(byte *buf, byte *pos)
&LOCK_status);
// The primary key for the record is stored in pos
// Perform a pk_read using primary key "index"
- DBUG_RETURN(pk_read(pos, ref_length, buf));
+ {
+ part_id_range part_spec;
+ if (m_use_partition_function)
+ {
+ key_range key_spec;
+ KEY *key_info= table->key_info + active_index;
+ key_spec.key= pos;
+ key_spec.length= ref_length;
+ key_spec.flag= HA_READ_KEY_EXACT;
+ get_full_part_id_from_key(table, buf, key_info, &key_spec, &part_spec);
+ DBUG_ASSERT(part_spec.start_part == part_spec.end_part);
+ }
+ DBUG_RETURN(pk_read(pos, ref_length, buf, part_spec.start_part));
+ }
}
@@ -2904,6 +2949,8 @@ int ha_ndbcluster::extra(enum ha_extra_function operation)
m_use_write= FALSE;
m_ignore_dup_key= FALSE;
break;
+ default:
+ break;
}
DBUG_RETURN(0);
@@ -3691,56 +3738,6 @@ static int create_ndb_column(NDBCOL &col,
return 0;
}
-/*
- Create a table in NDB Cluster
- */
-
-static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length)
-{
- if (form->s->max_rows == (ha_rows) 0) /* default setting, don't set fragmentation */
- return;
- /**
- * get the number of fragments right
- */
- uint no_fragments;
- {
-#if MYSQL_VERSION_ID >= 50000
- uint acc_row_size= 25 + /*safety margin*/ 2;
-#else
- uint acc_row_size= pk_length*4;
- /* add acc overhead */
- if (pk_length <= 8) /* main page will set the limit */
- acc_row_size+= 25 + /*safety margin*/ 2;
- else /* overflow page will set the limit */
- acc_row_size+= 4 + /*safety margin*/ 4;
-#endif
- ulonglong acc_fragment_size= 512*1024*1024;
- ulonglong max_rows= form->s->max_rows;
-#if MYSQL_VERSION_ID >= 50100
- no_fragments= (max_rows*acc_row_size)/acc_fragment_size+1;
-#else
- no_fragments= ((max_rows*acc_row_size)/acc_fragment_size+1
- +1/*correct rounding*/)/2;
-#endif
- }
- {
- uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
- NDBTAB::FragmentType ftype;
- if (no_fragments > 2*no_nodes)
- {
- ftype= NDBTAB::FragAllLarge;
- if (no_fragments > 4*no_nodes)
- push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
- "Ndb might have problems storing the max amount of rows specified");
- }
- else if (no_fragments > no_nodes)
- ftype= NDBTAB::FragAllMedium;
- else
- ftype= NDBTAB::FragAllSmall;
- tab.setFragmentType(ftype);
- }
-}
-
int ha_ndbcluster::create(const char *name,
TABLE *form,
HA_CREATE_INFO *info)
@@ -3843,7 +3840,22 @@ int ha_ndbcluster::create(const char *name,
}
}
- ndb_set_fragmentation(tab, form, pk_length);
+ // Check partition info
+ partition_info *part_info= form->s->part_info;
+ if (part_info)
+ {
+ int error;
+ if ((error= set_up_partition_info(part_info, form, (void*)&tab)))
+ {
+ DBUG_RETURN(error);
+ }
+ }
+ else
+ {
+ ndb_set_fragmentation(tab, form, pk_length);
+ }
+
+
if ((my_errno= check_ndb_connection()))
DBUG_RETURN(my_errno);
@@ -4092,6 +4104,9 @@ ha_ndbcluster::ha_ndbcluster(TABLE *table_arg):
HA_NEED_READ_RANGE_BUFFER |
HA_CAN_BIT_FIELD),
m_share(0),
+ m_part_info(NULL),
+ m_use_partition_function(FALSE),
+ m_sorted(FALSE),
m_use_write(FALSE),
m_ignore_dup_key(FALSE),
m_primary_key_update(FALSE),
@@ -4206,6 +4221,15 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
if (!res)
info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ if (table->s->part_info)
+ {
+ m_part_info= table->s->part_info;
+ if (!(m_part_info->part_type == HASH_PARTITION &&
+ m_part_info->list_of_part_fields &&
+ !is_sub_partitioned(m_part_info)))
+ m_use_partition_function= TRUE;
+ }
+
DBUG_RETURN(res);
}
@@ -5478,12 +5502,29 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer;
multi_range_curr++)
{
- switch (index_type){
+ part_id_range part_spec;
+ if (m_use_partition_function)
+ {
+ get_partition_set(table, curr, active_index,
+ &multi_range_curr->start_key,
+ &part_spec);
+ if (part_spec.start_part > part_spec.end_part)
+ {
+ /*
+ We can skip this partition since the key won't fit into any
+ partition
+ */
+ curr += reclength;
+ multi_range_curr->range_flag |= SKIP_RANGE;
+ continue;
+ }
+ }
+ switch(index_type){
case PRIMARY_KEY_ORDERED_INDEX:
if (!(multi_range_curr->start_key.length == key_info->key_length &&
- multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
- goto range;
- /* fall through */
+ multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
+ goto range;
+ // else fall through
case PRIMARY_KEY_INDEX:
{
multi_range_curr->range_flag |= UNIQUE_RANGE;
@@ -5491,7 +5532,9 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
!op->readTuple(lm) &&
!set_primary_key(op, multi_range_curr->start_key.key) &&
!define_read_attrs(curr, op) &&
- (op->setAbortOption(AO_IgnoreError), TRUE))
+ (op->setAbortOption(AO_IgnoreError), TRUE) &&
+ (!m_use_partition_function ||
+ (op->setPartitionId(part_spec.start_part), true)))
curr += reclength;
else
ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
@@ -5500,11 +5543,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
break;
case UNIQUE_ORDERED_INDEX:
if (!(multi_range_curr->start_key.length == key_info->key_length &&
- multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
- !check_null_in_key(key_info, multi_range_curr->start_key.key,
- multi_range_curr->start_key.length)))
- goto range;
- /* fall through */
+ multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
+ !check_null_in_key(key_info, multi_range_curr->start_key.key,
+ multi_range_curr->start_key.length)))
+ goto range;
+ // else fall through
case UNIQUE_INDEX:
{
multi_range_curr->range_flag |= UNIQUE_RANGE;
@@ -5518,8 +5561,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
break;
}
- case ORDERED_INDEX:
- {
+ case ORDERED_INDEX: {
range:
multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
if (scanOp == 0)
@@ -5594,7 +5636,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
}
#if 0
-#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x);
+#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x));
#else
#define DBUG_MULTI_RANGE(x)
#endif
@@ -5605,6 +5647,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
if (m_disable_multi_read)
{
+ DBUG_MULTI_RANGE(11);
DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
}
@@ -5614,10 +5657,16 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
const NdbOperation* op= m_current_multi_operation;
for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
{
+ DBUG_MULTI_RANGE(12);
+ if (multi_range_curr->range_flag & SKIP_RANGE)
+ continue;
if (multi_range_curr->range_flag & UNIQUE_RANGE)
{
if (op->getNdbError().code == 0)
+ {
+ DBUG_MULTI_RANGE(13);
goto found_next;
+ }
op= m_active_trans->getNextCompletedOperation(op);
m_multi_range_result_ptr += reclength;
@@ -5634,6 +5683,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
}
else
{
+ DBUG_MULTI_RANGE(14);
goto close_scan;
}
}
@@ -5667,6 +5717,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
DBUG_ASSERT(range_no == -1);
if ((res= m_multi_cursor->nextResult(true)))
{
+ DBUG_MULTI_RANGE(15);
goto close_scan;
}
multi_range_curr--; // Will be increased in for-loop
@@ -5694,12 +5745,16 @@ close_scan:
}
else
{
+ DBUG_MULTI_RANGE(9);
DBUG_RETURN(ndb_err(m_active_trans));
}
}
if (multi_range_curr == multi_range_end)
+ {
+ DBUG_MULTI_RANGE(16);
DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
/**
* Read remaining ranges
@@ -6916,6 +6971,8 @@ ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond,
: NULL;
break;
default:
+ field= NULL; //Keep compiler happy
+ DBUG_ASSERT(0);
break;
}
switch ((negated) ?
@@ -7263,4 +7320,178 @@ ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack,
DBUG_RETURN(0);
}
+
+/*
+ Create a table in NDB Cluster
+ */
+static uint get_no_fragments(ulonglong max_rows)
+{
+#if MYSQL_VERSION_ID >= 50000
+ uint acc_row_size= 25 + /*safety margin*/ 2;
+#else
+ uint acc_row_size= pk_length*4;
+ /* add acc overhead */
+ if (pk_length <= 8) /* main page will set the limit */
+ acc_row_size+= 25 + /*safety margin*/ 2;
+ else /* overflow page will set the limit */
+ acc_row_size+= 4 + /*safety margin*/ 4;
+#endif
+ ulonglong acc_fragment_size= 512*1024*1024;
+#if MYSQL_VERSION_ID >= 50100
+ return (max_rows*acc_row_size)/acc_fragment_size+1;
+#else
+ return ((max_rows*acc_row_size)/acc_fragment_size+1
+ +1/*correct rounding*/)/2;
+#endif
+}
+
+
+/*
+ Routine to adjust default number of partitions to always be a multiple
+ of number of nodes and never more than 4 times the number of nodes.
+
+*/
+static bool adjusted_frag_count(uint no_fragments, uint no_nodes,
+ uint &reported_frags)
+{
+ uint i= 0;
+ reported_frags= no_nodes;
+ while (reported_frags < no_fragments && ++i < 4 &&
+ (reported_frags + no_nodes) < MAX_PARTITIONS)
+ reported_frags+= no_nodes;
+ return (reported_frags < no_fragments);
+}
+
+int ha_ndbcluster::get_default_no_partitions(ulonglong max_rows)
+{
+ uint reported_frags;
+ uint no_fragments= get_no_fragments(max_rows);
+ uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
+ adjusted_frag_count(no_fragments, no_nodes, reported_frags);
+ return (int)reported_frags;
+}
+
+
+/*
+ User defined partitioning set-up. We need to check how many fragments the
+ user wants defined and which node groups to put those into. Later we also
+ want to attach those partitions to a tablespace.
+
+ All the functionality of the partition function, partition limits and so
+ forth are entirely handled by the MySQL Server. There is one exception to
+ this rule for PARTITION BY KEY where NDB handles the hash function and
+ this type can thus be handled transparently also by NDB API program.
+ For RANGE, HASH and LIST and subpartitioning the NDB API programs must
+ implement the function to map to a partition.
+*/
+
+uint ha_ndbcluster::set_up_partition_info(partition_info *part_info,
+ TABLE *table,
+ void *tab_par)
+{
+ DBUG_ENTER("ha_ndbcluster::set_up_partition_info");
+ ushort node_group[MAX_PARTITIONS];
+ ulong ng_index= 0, i, j;
+ NDBTAB *tab= (NDBTAB*)tab_par;
+ NDBTAB::FragmentType ftype= NDBTAB::UserDefined;
+ partition_element *part_elem;
+
+ if (part_info->part_type == HASH_PARTITION &&
+ part_info->list_of_part_fields == TRUE)
+ {
+ Field **fields= part_info->part_field_array;
+
+ if (part_info->linear_hash_ind)
+ ftype= NDBTAB::DistrKeyLin;
+ else
+ ftype= NDBTAB::DistrKeyHash;
+
+ for (i= 0; i < part_info->part_field_list.elements; i++)
+ {
+ NDBCOL *col= tab->getColumn(fields[i]->fieldnr - 1);
+ DBUG_PRINT("info",("setting dist key on %s", col->getName()));
+ col->setPartitionKey(TRUE);
+ }
+ }
+ List_iterator<partition_element> part_it(part_info->partitions);
+ for (i= 0; i < part_info->no_parts; i++)
+ {
+ part_elem= part_it++;
+ if (!is_sub_partitioned(part_info))
+ {
+ node_group[ng_index++]= part_elem->nodegroup_id;
+ //Here we should insert tablespace id based on tablespace name
+ }
+ else
+ {
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < part_info->no_subparts; j++)
+ {
+ part_elem= sub_it++;
+ node_group[ng_index++]= part_elem->nodegroup_id;
+ //Here we should insert tablespace id based on tablespace name
+ }
+ }
+ }
+ {
+ uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
+ if (ng_index > 4 * no_nodes)
+ {
+ DBUG_RETURN(1300);
+ }
+ }
+ tab->setNodeGroupIds(&node_group, ng_index);
+ tab->setFragmentType(ftype);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This routine is used to set-up fragmentation when the user has only specified
+ ENGINE = NDB and no user defined partitioning what so ever. Thus all values
+ will be based on default values. We will choose Linear Hash or Hash with
+ perfect spread dependent on a session variable defined in MySQL.
+*/
+
+static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length)
+{
+ NDBTAB::FragmentType ftype;
+ ushort node_group[MAX_PARTITIONS];
+ uint no_nodes= g_ndb_cluster_connection->no_db_nodes(), no_fragments, i;
+ DBUG_ENTER("ndb_set_fragmentation");
+
+ if (form->s->max_rows == (ha_rows) 0)
+ {
+ no_fragments= no_nodes;
+ }
+ else
+ {
+ /*
+ Ensure that we get enough fragments to handle all rows and ensure that
+ the table is fully distributed by keeping the number of fragments a
+ multiple of the number of nodes.
+ */
+ uint fragments= get_no_fragments(form->s->max_rows);
+ if (adjusted_frag_count(fragments, no_nodes, no_fragments))
+ {
+ push_warning(current_thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+ "Ndb might have problems storing the max amount of rows specified");
+ }
+ }
+ /*
+ Always start with node group 0 and continue with next node group from
+ there
+ */
+ node_group[0]= 0;
+ for (i= 1; i < no_fragments; i++)
+ node_group[i]= UNDEF_NODEGROUP;
+ if (opt_ndb_linear_hash)
+ ftype= NDBTAB::DistrKeyLin;
+ else
+ ftype= NDBTAB::DistrKeyHash;
+ tab.setFragmentType(ftype);
+ tab.setNodeGroupIds(&node_group, no_fragments);
+ DBUG_VOID_RETURN;
+}
#endif /* HAVE_NDBCLUSTER_DB */
diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h
index 6efc18f2f6a..f85b0fa8a04 100644
--- a/sql/ha_ndbcluster.h
+++ b/sql/ha_ndbcluster.h
@@ -420,7 +420,7 @@ class ha_ndbcluster: public handler
int write_row(byte *buf);
int update_row(const byte *old_data, byte *new_data);
int delete_row(const byte *buf);
- int index_init(uint index);
+ int index_init(uint index, bool sorted);
int index_end();
int index_read(byte *buf, const byte *key, uint key_len,
enum ha_rkey_function find_flag);
@@ -462,6 +462,11 @@ class ha_ndbcluster: public handler
const char * table_type() const;
const char ** bas_ext() const;
ulong table_flags(void) const;
+ ulong partition_flags(void) const
+ {
+ return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY |
+ HA_CAN_PARTITION_UNIQUE);
+ }
ulong index_flags(uint idx, uint part, bool all_parts) const;
uint max_supported_record_length() const;
uint max_supported_keys() const;
@@ -471,6 +476,7 @@ class ha_ndbcluster: public handler
int rename_table(const char *from, const char *to);
int delete_table(const char *name);
int create(const char *name, TABLE *form, HA_CREATE_INFO *info);
+ int get_default_no_partitions(ulonglong max_rows);
THR_LOCK_DATA **store_lock(THD *thd,
THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
@@ -549,15 +555,21 @@ private:
NDB_INDEX_TYPE get_index_type_from_table(uint index_no) const;
int check_index_fields_not_null(uint index_no);
- int pk_read(const byte *key, uint key_len, byte *buf);
- int complemented_pk_read(const byte *old_data, byte *new_data);
- int peek_row(const byte *record);
- int unique_index_read(const byte *key, uint key_len,
- byte *buf);
+ uint set_up_partition_info(partition_info *part_info,
+ TABLE *table,
+ void *tab);
+ int complemented_pk_read(const byte *old_data, byte *new_data,
+ uint32 old_part_id);
+ int pk_read(const byte *key, uint key_len, byte *buf, uint32 part_id);
int ordered_index_scan(const key_range *start_key,
const key_range *end_key,
- bool sorted, bool descending, byte* buf);
+ bool sorted, bool descending, byte* buf,
+ part_id_range *part_spec);
int full_table_scan(byte * buf);
+
+ int peek_row(const byte *record);
+ int unique_index_read(const byte *key, uint key_len,
+ byte *buf);
int fetch_next(NdbScanOperation* op);
int next_result(byte *buf);
int define_read_attrs(byte* buf, NdbOperation* op);
@@ -637,6 +649,11 @@ private:
// NdbRecAttr has no reference to blob
typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue;
NdbValue m_value[NDB_MAX_ATTRIBUTES_IN_TABLE];
+ partition_info *m_part_info;
+ byte *m_rec0;
+ Field **m_part_field_array;
+ bool m_use_partition_function;
+ bool m_sorted;
bool m_use_write;
bool m_ignore_dup_key;
bool m_primary_key_update;
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
new file mode 100644
index 00000000000..30dd79551b4
--- /dev/null
+++ b/sql/ha_partition.cc
@@ -0,0 +1,3162 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ This handler was developed by Mikael Ronström for version 5.1 of MySQL.
+ It is an abstraction layer on top of other handlers such as MyISAM,
+ InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
+ be handled by a storage engine. The current example of this is NDB
+ Cluster that has internally handled partitioning. This have benefits in
+ that many loops needed in the partition handler can be avoided.
+
+ Partitioning has an inherent feature which in some cases is positive and
+ in some cases is negative. It splits the data into chunks. This makes
+ the data more manageable, queries can easily be parallelised towards the
+ parts and indexes are split such that there are less levels in the
+ index trees. The inherent disadvantage is that to use a split index
+ one has to scan all index parts which is ok for large queries but for
+ small queries it can be a disadvantage.
+
+ Partitioning lays the foundation for more manageable databases that are
+ extremely large. It does also lay the foundation for more parallelism
+ in the execution of queries. This functionality will grow with later
+ versions of MySQL.
+
+ You can enable it in your buld by doing the following during your build
+ process:
+ ./configure --with-partition
+
+ The partition is setup to use table locks. It implements an partition "SHARE"
+ that is inserted into a hash by table name. You can use this to store
+ information of state that any partition handler object will be able to see
+ if it is using the same table.
+
+ Please read the object definition in ha_partition.h before reading the rest
+ if this file.
+*/
+
+#ifdef __GNUC__
+#pragma implementation // gcc: Class implementation
+#endif
+
+#include <mysql_priv.h>
+
+#ifdef HAVE_PARTITION_DB
+#include "ha_partition.h"
+
+static const char *ha_par_ext= ".par";
+#ifdef NOT_USED
+static int free_share(PARTITION_SHARE * share);
+static PARTITION_SHARE *get_share(const char *table_name, TABLE * table);
+#endif
+
+/****************************************************************************
+ MODULE create/delete handler object
+****************************************************************************/
+
+ha_partition::ha_partition(TABLE *table)
+ :handler(table), m_part_info(NULL), m_create_handler(FALSE),
+ m_is_sub_partitioned(0)
+{
+ DBUG_ENTER("ha_partition::ha_partition(table)");
+ init_handler_variables();
+ if (table)
+ {
+ if (table->s->part_info)
+ {
+ m_part_info= table->s->part_info;
+ m_is_sub_partitioned= is_sub_partitioned(m_part_info);
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+ha_partition::ha_partition(partition_info *part_info)
+ :handler(NULL), m_part_info(part_info), m_create_handler(TRUE),
+ m_is_sub_partitioned(is_sub_partitioned(m_part_info))
+
+{
+ DBUG_ENTER("ha_partition::ha_partition(part_info)");
+ init_handler_variables();
+ DBUG_ASSERT(m_part_info);
+ DBUG_VOID_RETURN;
+}
+
+
+void ha_partition::init_handler_variables()
+{
+ active_index= MAX_KEY;
+ m_file_buffer= NULL;
+ m_name_buffer_ptr= NULL;
+ m_engine_array= NULL;
+ m_file= NULL;
+ m_tot_parts= 0;
+ m_has_transactions= 0;
+ m_pkey_is_clustered= 0;
+ m_lock_type= F_UNLCK;
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ m_scan_value= 2;
+ m_ref_length= 0;
+ m_part_spec.end_part= NO_CURRENT_PART_ID;
+ m_index_scan_type= partition_no_index_scan;
+ m_start_key.key= NULL;
+ m_start_key.length= 0;
+ m_myisam= FALSE;
+ m_innodb= FALSE;
+ m_extra_cache= FALSE;
+ m_extra_cache_size= 0;
+ m_table_flags= HA_FILE_BASED | HA_REC_NOT_IN_SEQ;
+ m_low_byte_first= 1;
+ m_part_field_array= NULL;
+ m_ordered_rec_buffer= NULL;
+ m_top_entry= NO_CURRENT_PART_ID;
+ m_rec_length= 0;
+ m_last_part= 0;
+ m_rec0= 0;
+ m_curr_key_info= 0;
+
+#ifdef DONT_HAVE_TO_BE_INITALIZED
+ m_start_key.flag= 0;
+ m_ordered= TRUE;
+#endif
+}
+
+
+ha_partition::~ha_partition()
+{
+ DBUG_ENTER("ha_partition::~ha_partition()");
+ if (m_file != NULL)
+ {
+ uint i;
+ for (i= 0; i < m_tot_parts; i++)
+ delete m_file[i];
+ }
+ my_free((char*) m_ordered_rec_buffer, MYF(MY_ALLOW_ZERO_PTR));
+
+ clear_handler_file();
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ The partition handler is only a layer on top of other engines. Thus it
+ can't really perform anything without the underlying handlers. Thus we
+ add this method as part of the allocation of a handler object.
+
+ 1) Allocation of underlying handlers
+ If we have access to the partition info we will allocate one handler
+ instance for each partition.
+ 2) Allocation without partition info
+ The cases where we don't have access to this information is when called
+ in preparation for delete_table and rename_table and in that case we
+ only need to set HA_FILE_BASED. In that case we will use the .par file
+ that contains information about the partitions and their engines and
+ the names of each partition.
+ 3) Table flags initialisation
+ We need also to set table flags for the partition handler. This is not
+ static since it depends on what storage engines are used as underlying
+ handlers.
+ The table flags is set in this routine to simulate the behaviour of a
+ normal storage engine
+ The flag HA_FILE_BASED will be set independent of the underlying handlers
+ 4) Index flags initialisation
+ When knowledge exists on the indexes it is also possible to initialise the
+ index flags. Again the index flags must be initialised by using the under-
+ lying handlers since this is storage engine dependent.
+ The flag HA_READ_ORDER will be reset for the time being to indicate no
+ ordered output is available from partition handler indexes. Later a merge
+ sort will be performed using the underlying handlers.
+ 5) primary_key_is_clustered, has_transactions and low_byte_first is
+ calculated here.
+*/
+
+int ha_partition::ha_initialise()
+{
+ handler **file_array, *file;
+ DBUG_ENTER("ha_partition::set_up_constants");
+
+ if (m_part_info)
+ {
+ m_tot_parts= get_tot_partitions(m_part_info);
+ DBUG_ASSERT(m_tot_parts > 0);
+ if (m_create_handler)
+ {
+ if (new_handlers_from_part_info())
+ DBUG_RETURN(1);
+ }
+ else if (get_from_handler_file(table->s->path))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), 129); //Temporary fix TODO print_error
+ DBUG_RETURN(1);
+ }
+ /*
+ We create all underlying table handlers here. We only do it if we have
+ access to the partition info. We do it in this special method to be
+ able to report allocation errors.
+ */
+ /*
+ Set up table_flags, low_byte_first, primary_key_is_clustered and
+ has_transactions since they are called often in all kinds of places,
+ other parameters are calculated on demand.
+ HA_FILE_BASED is always set for partition handler since we use a
+ special file for handling names of partitions, engine types.
+ HA_CAN_GEOMETRY, HA_CAN_FULLTEXT, HA_CAN_SQL_HANDLER,
+ HA_CAN_INSERT_DELAYED is disabled until further investigated.
+ */
+ m_table_flags= m_file[0]->table_flags();
+ m_low_byte_first= m_file[0]->low_byte_first();
+ m_has_transactions= TRUE;
+ m_pkey_is_clustered= TRUE;
+ file_array= m_file;
+ do
+ {
+ file= *file_array;
+ if (m_low_byte_first != file->low_byte_first())
+ {
+ // Cannot have handlers with different endian
+ my_error(ER_MIX_HANDLER_ERROR, MYF(0));
+ DBUG_RETURN(1);
+ }
+ if (!file->has_transactions())
+ m_has_transactions= FALSE;
+ if (!file->primary_key_is_clustered())
+ m_pkey_is_clustered= FALSE;
+ m_table_flags&= file->table_flags();
+ } while (*(++file_array));
+ m_table_flags&= ~(HA_CAN_GEOMETRY & HA_CAN_FULLTEXT &
+ HA_CAN_SQL_HANDLER & HA_CAN_INSERT_DELAYED);
+ /*
+ TODO RONM:
+ Make sure that the tree works without partition defined, compiles
+ and goes through mysql-test-run.
+ */
+ }
+ m_table_flags|= HA_FILE_BASED | HA_REC_NOT_IN_SEQ;
+ DBUG_RETURN(0);
+}
+
+/****************************************************************************
+ MODULE meta data changes
+****************************************************************************/
+/*
+ Used to delete a table. By the time delete_table() has been called all
+ opened references to this table will have been closed (and your globally
+ shared references released. The variable name will just be the name of
+ the table. You will need to remove any files you have created at this
+ point.
+
+ If you do not implement this, the default delete_table() is called from
+ handler.cc and it will delete all files with the file extentions returned
+ by bas_ext().
+
+ Called from handler.cc by delete_table and ha_create_table(). Only used
+ during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
+ the storage engine.
+*/
+
+int ha_partition::delete_table(const char *name)
+{
+ int error;
+ DBUG_ENTER("ha_partition::delete_table");
+ if ((error= del_ren_cre_table(name, NULL, NULL, NULL)))
+ DBUG_RETURN(error);
+ DBUG_RETURN(handler::delete_table(name));
+}
+
+
+/*
+ Renames a table from one name to another from alter table call.
+
+ If you do not implement this, the default rename_table() is called from
+ handler.cc and it will delete all files with the file extentions returned
+ by bas_ext().
+
+ Called from sql_table.cc by mysql_rename_table().
+*/
+
+int ha_partition::rename_table(const char *from, const char *to)
+{
+ int error;
+ DBUG_ENTER("ha_partition::rename_table");
+ if ((error= del_ren_cre_table(from, to, NULL, NULL)))
+ DBUG_RETURN(error);
+ DBUG_RETURN(handler::rename_table(from, to));
+}
+
+
+/*
+ create_handler_files is called to create any handler specific files
+ before opening the file with openfrm to later call ::create on the
+ file object.
+ In the partition handler this is used to store the names of partitions
+ and types of engines in the partitions.
+*/
+
+int ha_partition::create_handler_files(const char *name)
+{
+ DBUG_ENTER("ha_partition::create_handler_files()");
+ if (create_handler_file(name))
+ {
+ my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ create() is called to create a table. The variable name will have the name
+ of the table. When create() is called you do not need to worry about
+ opening the table. Also, the FRM file will have already been created so
+ adjusting create_info will not do you any good. You can overwrite the frm
+ file at this point if you wish to change the table definition, but there
+ are no methods currently provided for doing that.
+
+ Called from handle.cc by ha_create_table().
+*/
+
+int ha_partition::create(const char *name, TABLE *table_arg,
+ HA_CREATE_INFO *create_info)
+{
+ char t_name[FN_REFLEN];
+ DBUG_ENTER("ha_partition::create");
+
+ strmov(t_name, name);
+ *fn_ext(t_name)= 0;
+ if (del_ren_cre_table(t_name, NULL, table_arg, create_info))
+ {
+ handler::delete_table(t_name);
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
+{
+ return;
+}
+
+
+char *ha_partition::update_table_comment(const char *comment)
+{
+ return (char*) comment; // Nothing to change
+}
+
+
+/*
+ This method is used to calculate the partition name, service routine to
+ the del_ren_cre_table method.
+*/
+
+static void create_partition_name(char *out, const char *in1, const char *in2)
+{
+ strxmov(out, in1, "_", in2, NullS);
+}
+
+
+/*
+ Common routine to handle delete_table and rename_table.
+ The routine uses the partition handler file to get the
+ names of the partition instances. Both these routines
+ are called after creating the handler without table
+ object and thus the file is needed to discover the
+ names of the partitions and the underlying storage engines.
+*/
+
+uint ha_partition::del_ren_cre_table(const char *from,
+ const char *to,
+ TABLE *table_arg,
+ HA_CREATE_INFO *create_info)
+{
+ int save_error= 0, error;
+ char from_buff[FN_REFLEN], to_buff[FN_REFLEN];
+ char *name_buffer_ptr;
+ uint i;
+ handler **file;
+ DBUG_ENTER("del_ren_cre_table()");
+
+ if (get_from_handler_file(from))
+ DBUG_RETURN(TRUE);
+ DBUG_ASSERT(m_file_buffer);
+ name_buffer_ptr= m_name_buffer_ptr;
+ file= m_file;
+ i= 0;
+ do
+ {
+ create_partition_name(from_buff, from, name_buffer_ptr);
+ if (to != NULL)
+ { // Rename branch
+ create_partition_name(to_buff, to, name_buffer_ptr);
+ error= (*file)->rename_table((const char*) from_buff,
+ (const char*) to_buff);
+ }
+ else if (table_arg == NULL) // delete branch
+ error= (*file)->delete_table((const char*) from_buff);
+ else
+ {
+ set_up_table_before_create(table_arg, create_info, i);
+ error= (*file)->create(from_buff, table_arg, create_info);
+ }
+ name_buffer_ptr= strend(name_buffer_ptr) + 1;
+ if (error)
+ save_error= error;
+ i++;
+ } while (*(++file));
+ DBUG_RETURN(save_error);
+}
+
+
+partition_element *ha_partition::find_partition_element(uint part_id)
+{
+ uint i;
+ uint curr_part_id= 0;
+ List_iterator_fast < partition_element > part_it(m_part_info->partitions);
+
+ for (i= 0; i < m_part_info->no_parts; i++)
+ {
+ partition_element *part_elem;
+ part_elem= part_it++;
+ if (m_is_sub_partitioned)
+ {
+ uint j;
+ List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ part_elem= sub_it++;
+ if (part_id == curr_part_id++)
+ return part_elem;
+ }
+ }
+ else if (part_id == curr_part_id++)
+ return part_elem;
+ }
+ DBUG_ASSERT(0);
+ current_thd->fatal_error(); // Abort
+ return NULL;
+}
+
+
+void ha_partition::set_up_table_before_create(TABLE *table,
+ HA_CREATE_INFO *info,
+ uint part_id)
+{
+ /*
+ Set up
+ 1) Comment on partition
+ 2) MAX_ROWS, MIN_ROWS on partition
+ 3) Index file name on partition
+ 4) Data file name on partition
+ */
+ partition_element *part_elem= find_partition_element(part_id);
+ if (!part_elem)
+ return; // Fatal error
+ table->s->max_rows= part_elem->part_max_rows;
+ table->s->min_rows= part_elem->part_min_rows;
+ info->index_file_name= part_elem->index_file_name;
+ info->data_file_name= part_elem->data_file_name;
+}
+
+
+/*
+ Routine used to add two names with '_' in between then. Service routine
+ to create_handler_file
+ Include the NULL in the count of characters since it is needed as separator
+ between the partition names.
+*/
+
+static uint name_add(char *dest, const char *first_name, const char *sec_name)
+{
+ return (uint) (strxmov(dest, first_name, "_", sec_name, NullS) -dest) + 1;
+}
+
+
+/*
+ Method used to create handler file with names of partitions, their
+ engine types and the number of partitions.
+*/
+
+bool ha_partition::create_handler_file(const char *name)
+{
+ partition_element *part_elem, *subpart_elem;
+ uint i, j, part_name_len, subpart_name_len;
+ uint tot_partition_words, tot_name_len;
+ uint tot_len_words, tot_len_byte, chksum, tot_name_words;
+ char *name_buffer_ptr;
+ uchar *file_buffer, *engine_array;
+ bool result= TRUE;
+ char file_name[FN_REFLEN];
+ File file;
+ List_iterator_fast < partition_element > part_it(m_part_info->partitions);
+ DBUG_ENTER("create_handler_file");
+
+ DBUG_PRINT("info", ("table name = %s", name));
+ tot_name_len= 0;
+ for (i= 0; i < m_part_info->no_parts; i++)
+ {
+ part_elem= part_it++;
+ part_name_len= strlen(part_elem->partition_name);
+ if (!m_is_sub_partitioned)
+ tot_name_len+= part_name_len + 1;
+ else
+ {
+ List_iterator_fast<partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ subpart_elem= sub_it++;
+ subpart_name_len= strlen(subpart_elem->partition_name);
+ tot_name_len+= part_name_len + subpart_name_len + 2;
+ }
+ }
+ }
+ /*
+ File format:
+ Length in words 4 byte
+ Checksum 4 byte
+ Total number of partitions 4 byte
+ Array of engine types n * 4 bytes where
+ n = (m_tot_parts + 3)/4
+ Length of name part in bytes 4 bytes
+ Name part m * 4 bytes where
+ m = ((length_name_part + 3)/4)*4
+
+ All padding bytes are zeroed
+ */
+ tot_partition_words= (m_tot_parts + 3) / 4;
+ tot_name_words= (tot_name_len + 3) / 4;
+ tot_len_words= 4 + tot_partition_words + tot_name_words;
+ tot_len_byte= 4 * tot_len_words;
+ if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL))))
+ DBUG_RETURN(TRUE);
+ engine_array= (file_buffer + 12);
+ name_buffer_ptr= (char*) (file_buffer + ((4 + tot_partition_words) * 4));
+ part_it.rewind();
+ for (i= 0; i < m_part_info->no_parts; i++)
+ {
+ part_elem= part_it++;
+ if (!m_is_sub_partitioned)
+ {
+ name_buffer_ptr= strmov(name_buffer_ptr, part_elem->partition_name)+1;
+ *engine_array= (uchar) part_elem->engine_type;
+ DBUG_PRINT("info", ("engine: %u", *engine_array));
+ engine_array++;
+ }
+ else
+ {
+ List_iterator_fast<partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ subpart_elem= sub_it++;
+ name_buffer_ptr+= name_add(name_buffer_ptr,
+ part_elem->partition_name,
+ subpart_elem->partition_name);
+ *engine_array= (uchar) part_elem->engine_type;
+ engine_array++;
+ }
+ }
+ }
+ chksum= 0;
+ int4store(file_buffer, tot_len_words);
+ int4store(file_buffer + 8, m_tot_parts);
+ int4store(file_buffer + 12 + (tot_partition_words * 4), tot_name_len);
+ for (i= 0; i < tot_len_words; i++)
+ chksum^= uint4korr(file_buffer + 4 * i);
+ int4store(file_buffer + 4, chksum);
+ /*
+ Remove .frm extension and replace with .par
+ Create and write and close file
+ to be used at open, delete_table and rename_table
+ */
+ fn_format(file_name, name, "", ".par", MYF(MY_REPLACE_EXT));
+ if ((file= my_create(file_name, CREATE_MODE, O_RDWR | O_TRUNC,
+ MYF(MY_WME))) >= 0)
+ {
+ result= my_write(file, (byte *) file_buffer, tot_len_byte,
+ MYF(MY_WME | MY_NABP));
+ VOID(my_close(file, MYF(0)));
+ }
+ else
+ result= TRUE;
+ my_free((char*) file_buffer, MYF(0));
+ DBUG_RETURN(result);
+}
+
+
+void ha_partition::clear_handler_file()
+{
+ my_free((char*) m_file_buffer, MYF(MY_ALLOW_ZERO_PTR));
+ m_file_buffer= NULL;
+ m_name_buffer_ptr= NULL;
+ m_engine_array= NULL;
+}
+
+
+bool ha_partition::create_handlers()
+{
+ uint i;
+ uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
+ DBUG_ENTER("create_handlers");
+
+ if (!(m_file= (handler **) sql_alloc(alloc_len)))
+ DBUG_RETURN(TRUE);
+ bzero(m_file, alloc_len);
+ for (i= 0; i < m_tot_parts; i++)
+ {
+ if (!(m_file[i]= get_new_handler(table, (enum db_type) m_engine_array[i])))
+ DBUG_RETURN(TRUE);
+ DBUG_PRINT("info", ("engine_type: %u", m_engine_array[i]));
+ }
+ m_file[m_tot_parts]= 0;
+ /* For the moment we only support partition over the same table engine */
+ if (m_engine_array[0] == (uchar) DB_TYPE_MYISAM)
+ {
+ DBUG_PRINT("info", ("MyISAM"));
+ m_myisam= TRUE;
+ }
+ else if (m_engine_array[0] == (uchar) DB_TYPE_INNODB)
+ {
+ DBUG_PRINT("info", ("InnoDB"));
+ m_innodb= TRUE;
+ }
+ DBUG_RETURN(FALSE);
+}
+
+
+bool ha_partition::new_handlers_from_part_info()
+{
+ uint i, j;
+ partition_element *part_elem;
+ uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
+ List_iterator_fast <partition_element> part_it(m_part_info->partitions);
+ DBUG_ENTER("ha_partition::new_handlers_from_part_info");
+
+ if (!(m_file= (handler **) sql_alloc(alloc_len)))
+ goto error;
+ bzero(m_file, alloc_len);
+ DBUG_ASSERT(m_part_info->no_parts > 0);
+
+ i= 0;
+ /*
+ Don't know the size of the underlying storage engine, invent a number of
+ bytes allocated for error message if allocation fails
+ */
+ alloc_len= 128;
+ do
+ {
+ part_elem= part_it++;
+ if (!(m_file[i]= get_new_handler(table, part_elem->engine_type)))
+ goto error;
+ DBUG_PRINT("info", ("engine_type: %u", (uint) part_elem->engine_type));
+ if (m_is_sub_partitioned)
+ {
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ if (!(m_file[i]= get_new_handler(table, part_elem->engine_type)))
+ goto error;
+ DBUG_PRINT("info", ("engine_type: %u", (uint) part_elem->engine_type));
+ }
+ }
+ } while (++i < m_part_info->no_parts);
+ if (part_elem->engine_type == DB_TYPE_MYISAM)
+ {
+ DBUG_PRINT("info", ("MyISAM"));
+ m_myisam= TRUE;
+ }
+ DBUG_RETURN(FALSE);
+error:
+ my_error(ER_OUTOFMEMORY, MYF(0), alloc_len);
+ DBUG_RETURN(TRUE);
+}
+
+
+/*
+ Open handler file to get partition names, engine types and number of
+ partitions.
+*/
+
+bool ha_partition::get_from_handler_file(const char *name)
+{
+ char buff[FN_REFLEN], *address_tot_name_len;
+ File file;
+ char *file_buffer, *name_buffer_ptr;
+ uchar *engine_array;
+ uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
+ DBUG_ENTER("ha_partition::get_from_handler_file");
+ DBUG_PRINT("enter", ("table name: '%s'", name));
+
+ if (m_file_buffer)
+ DBUG_RETURN(FALSE);
+ fn_format(buff, name, "", ha_par_ext, MYF(0));
+
+ /* Following could be done with my_stat to read in whole file */
+ if ((file= my_open(buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
+ DBUG_RETURN(TRUE);
+ if (my_read(file, (byte *) & buff[0], 8, MYF(MY_NABP)))
+ goto err1;
+ len_words= uint4korr(buff);
+ len_bytes= 4 * len_words;
+ if (!(file_buffer= my_malloc(len_bytes, MYF(0))))
+ goto err1;
+ VOID(my_seek(file, 0, MY_SEEK_SET, MYF(0)));
+ if (my_read(file, (byte *) file_buffer, len_bytes, MYF(MY_NABP)))
+ goto err2;
+
+ chksum= 0;
+ for (i= 0; i < len_words; i++)
+ chksum ^= uint4korr((file_buffer) + 4 * i);
+ if (chksum)
+ goto err2;
+ m_tot_parts= uint4korr((file_buffer) + 8);
+ tot_partition_words= (m_tot_parts + 3) / 4;
+ engine_array= (uchar *) ((file_buffer) + 12);
+ address_tot_name_len= file_buffer + 12 + 4 * tot_partition_words;
+ tot_name_words= (uint4korr(address_tot_name_len) + 3) / 4;
+ if (len_words != (tot_partition_words + tot_name_words + 4))
+ goto err2;
+ name_buffer_ptr= file_buffer + 16 + 4 * tot_partition_words;
+ VOID(my_close(file, MYF(0)));
+ m_file_buffer= file_buffer; // Will be freed in clear_handler_file()
+ m_name_buffer_ptr= name_buffer_ptr;
+ m_engine_array= engine_array;
+ if (!m_file && create_handlers())
+ {
+ clear_handler_file();
+ DBUG_RETURN(TRUE);
+ }
+ DBUG_RETURN(FALSE);
+
+err2:
+ my_free(file_buffer, MYF(0));
+err1:
+ VOID(my_close(file, MYF(0)));
+ DBUG_RETURN(TRUE);
+}
+
+/****************************************************************************
+ MODULE open/close object
+****************************************************************************/
+/*
+ Used for opening tables. The name will be the name of the file.
+ A table is opened when it needs to be opened. For instance
+ when a request comes in for a select on the table (tables are not
+ open and closed for each request, they are cached).
+
+ Called from handler.cc by handler::ha_open(). The server opens all tables
+ by calling ha_open() which then calls the handler specific open().
+*/
+
+int ha_partition::open(const char *name, int mode, uint test_if_locked)
+{
+ int error;
+ char name_buff[FN_REFLEN];
+ char *name_buffer_ptr= m_name_buffer_ptr;
+ handler **file;
+ uint alloc_len;
+ DBUG_ENTER("ha_partition::open");
+
+ ref_length= 0;
+ m_part_field_array= m_part_info->full_part_field_array;
+ if (get_from_handler_file(name))
+ DBUG_RETURN(1);
+ m_start_key.length= 0;
+ m_rec0= table->record[0];
+ m_rec_length= table->s->reclength;
+ alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS);
+ alloc_len+= table->s->max_key_length;
+ if (!m_ordered_rec_buffer)
+ {
+ if (!(m_ordered_rec_buffer= my_malloc(alloc_len, MYF(MY_WME))))
+ {
+ DBUG_RETURN(1);
+ }
+ {
+ /*
+ We set-up one record per partition and each record has 2 bytes in
+ front where the partition id is written. This is used by ordered
+ index_read.
+ We also set-up a reference to the first record for temporary use in
+ setting up the scan.
+ */
+ char *ptr= m_ordered_rec_buffer;
+ uint i= 0;
+ do
+ {
+ int2store(ptr, i);
+ ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
+ } while (++i < m_tot_parts);
+ m_start_key.key= ptr;
+ }
+ }
+ file= m_file;
+ do
+ {
+ create_partition_name(name_buff, name, name_buffer_ptr);
+ if ((error= (*file)->ha_open((const char*) name_buff, mode,
+ test_if_locked)))
+ goto err_handler;
+ name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
+ set_if_bigger(ref_length, ((*file)->ref_length));
+ } while (*(++file));
+ /*
+ Add 2 bytes for partition id in position ref length.
+ ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
+ */
+ ref_length+= PARTITION_BYTES_IN_POS;
+ m_ref_length= ref_length;
+ /*
+ Release buffer read from .par file. It will not be reused again after
+ being opened once.
+ */
+ clear_handler_file();
+ /*
+ Initialise priority queue, initialised to reading forward.
+ */
+ if ((error= init_queue(&queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS,
+ 0, key_rec_cmp, (void*)this)))
+ goto err_handler;
+ /*
+ Some handlers update statistics as part of the open call. This will in
+ some cases corrupt the statistics of the partition handler and thus
+ to ensure we have correct statistics we call info from open after
+ calling open on all individual handlers.
+ */
+ info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ DBUG_RETURN(0);
+
+err_handler:
+ while (file-- != m_file)
+ (*file)->close();
+ DBUG_RETURN(error);
+}
+
+/*
+ Closes a table. We call the free_share() function to free any resources
+ that we have allocated in the "shared" structure.
+
+ Called from sql_base.cc, sql_select.cc, and table.cc.
+ In sql_select.cc it is only used to close up temporary tables or during
+ the process where a temporary table is converted over to being a
+ myisam table.
+ For sql_base.cc look at close_data_tables().
+*/
+
+int ha_partition::close(void)
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::close");
+ file= m_file;
+ do
+ {
+ (*file)->close();
+ } while (*(++file));
+ DBUG_RETURN(0);
+}
+
+
+/****************************************************************************
+ MODULE start/end statement
+****************************************************************************/
+/*
+ A number of methods to define various constants for the handler. In
+ the case of the partition handler we need to use some max and min
+ of the underlying handlers in most cases.
+*/
+
+/*
+ First you should go read the section "locking functions for mysql" in
+ lock.cc to understand this.
+ This create a lock on the table. If you are implementing a storage engine
+ that can handle transactions look at ha_berkely.cc to see how you will
+ want to goo about doing this. Otherwise you should consider calling
+ flock() here.
+ Originally this method was used to set locks on file level to enable
+ several MySQL Servers to work on the same data. For transactional
+ engines it has been "abused" to also mean start and end of statements
+ to enable proper rollback of statements and transactions. When LOCK
+ TABLES has been issued the start_stmt method takes over the role of
+ indicating start of statement but in this case there is no end of
+ statement indicator(?).
+
+ Called from lock.cc by lock_external() and unlock_external(). Also called
+ from sql_table.cc by copy_data_between_tables().
+*/
+
+int ha_partition::external_lock(THD *thd, int lock_type)
+{
+ uint error;
+ handler **file;
+ DBUG_ENTER("ha_partition::external_lock");
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->external_lock(thd, lock_type)))
+ {
+ if (lock_type != F_UNLCK)
+ goto err_handler;
+ }
+ } while (*(++file));
+ m_lock_type= lock_type; // For the future (2009?)
+ DBUG_RETURN(0);
+
+err_handler:
+ while (file-- != m_file)
+ (*file)->external_lock(thd, F_UNLCK);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ The idea with handler::store_lock() is the following:
+
+ The statement decided which locks we should need for the table
+ for updates/deletes/inserts we get WRITE locks, for SELECT... we get
+ read locks.
+
+ Before adding the lock into the table lock handler (see thr_lock.c)
+ mysqld calls store lock with the requested locks. Store lock can now
+ modify a write lock to a read lock (or some other lock), ignore the
+ lock (if we don't want to use MySQL table locks at all) or add locks
+ for many tables (like we do when we are using a MERGE handler).
+
+ Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE
+ (which signals that we are doing WRITES, but we are still allowing other
+ reader's and writer's.
+
+ When releasing locks, store_lock() are also called. In this case one
+ usually doesn't have to do anything.
+
+ store_lock is called when holding a global mutex to ensure that only
+ one thread at a time changes the locking information of tables.
+
+ In some exceptional cases MySQL may send a request for a TL_IGNORE;
+ This means that we are requesting the same lock as last time and this
+ should also be ignored. (This may happen when someone does a flush
+ table when we have opened a part of the tables, in which case mysqld
+ closes and reopens the tables and tries to get the same locks at last
+ time). In the future we will probably try to remove this.
+
+ Called from lock.cc by get_lock_data().
+*/
+
+THR_LOCK_DATA **ha_partition::store_lock(THD *thd,
+ THR_LOCK_DATA **to,
+ enum thr_lock_type lock_type)
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::store_lock");
+ file= m_file;
+ do
+ {
+ to= (*file)->store_lock(thd, to, lock_type);
+ } while (*(++file));
+ DBUG_RETURN(to);
+}
+
+
+int ha_partition::start_stmt(THD *thd)
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::start_stmt");
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->start_stmt(thd)))
+ break;
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Returns the number of store locks needed in call to store lock.
+ We return number of partitions since we call store_lock on each
+ underlying handler. Assists the above functions in allocating
+ sufficient space for lock structures.
+*/
+
+uint ha_partition::lock_count() const
+{
+ DBUG_ENTER("ha_partition::lock_count");
+ DBUG_RETURN(m_tot_parts);
+}
+
+
+/*
+ Record currently processed was not in the result set of the statement
+ and is thus unlocked. Used for UPDATE and DELETE queries.
+*/
+
+void ha_partition::unlock_row()
+{
+ m_file[m_last_part]->unlock_row();
+ return;
+}
+
+
+/****************************************************************************
+ MODULE change record
+****************************************************************************/
+
+/*
+ write_row() inserts a row. buf() is a byte array of data, normally record[0].
+
+ You can use the field information to extract the data from the native byte
+ array type.
+
+ Example of this would be:
+ for (Field **field=table->field ; *field ; field++)
+ {
+ ...
+ }
+
+ See ha_tina.cc for an partition of extracting all of the data as strings.
+ ha_berekly.cc has an partition of how to store it intact by "packing" it
+ for ha_berkeley's own native storage type.
+
+ See the note for update_row() on auto_increments and timestamps. This
+ case also applied to write_row().
+
+ Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
+ sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
+
+ ADDITIONAL INFO:
+
+ Most handlers set timestamp when calling write row if any such fields
+ exists. Since we are calling an underlying handler we assume the´
+ underlying handler will assume this responsibility.
+
+ Underlying handlers will also call update_auto_increment to calculate
+ the new auto increment value. We will catch the call to
+ get_auto_increment and ensure this increment value is maintained by
+ only one of the underlying handlers.
+*/
+
+int ha_partition::write_row(byte * buf)
+{
+ uint32 part_id;
+ int error;
+#ifdef NOT_NEEDED
+ byte *rec0= m_rec0;
+#endif
+ DBUG_ENTER("ha_partition::write_row");
+ DBUG_ASSERT(buf == m_rec0);
+
+#ifdef NOT_NEEDED
+ if (likely(buf == rec0))
+#endif
+ error= m_part_info->get_partition_id(m_part_info, &part_id);
+#ifdef NOT_NEEDED
+ else
+ {
+ set_field_ptr(m_part_field_array, buf, rec0);
+ error= m_part_info->get_partition_id(m_part_info, &part_id);
+ set_field_ptr(m_part_field_array, rec0, buf);
+ }
+#endif
+ if (unlikely(error))
+ DBUG_RETURN(error);
+ m_last_part= part_id;
+ DBUG_PRINT("info", ("Insert in partition %d", part_id));
+ DBUG_RETURN(m_file[part_id]->write_row(buf));
+}
+
+
+/*
+ Yes, update_row() does what you expect, it updates a row. old_data will
+ have the previous row record in it, while new_data will have the newest
+ data in it.
+ Keep in mind that the server can do updates based on ordering if an
+ ORDER BY clause was used. Consecutive ordering is not guarenteed.
+
+ Currently new_data will not have an updated auto_increament record, or
+ and updated timestamp field. You can do these for partition by doing these:
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+ table->timestamp_field->set_time();
+ if (table->next_number_field && record == table->record[0])
+ update_auto_increment();
+
+ Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
+ new_data is always record[0]
+ old_data is normally record[1] but may be anything
+
+*/
+
+int ha_partition::update_row(const byte *old_data, byte *new_data)
+{
+ uint32 new_part_id, old_part_id;
+ int error;
+ DBUG_ENTER("ha_partition::update_row");
+
+ if ((error= get_parts_for_update(old_data, new_data, table->record[0],
+ m_part_info, &old_part_id, &new_part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+
+ /*
+ TODO:
+ set_internal_auto_increment=
+ max(set_internal_auto_increment, new_data->auto_increment)
+ */
+ m_last_part= new_part_id;
+ if (new_part_id == old_part_id)
+ {
+ DBUG_PRINT("info", ("Update in partition %d", new_part_id));
+ DBUG_RETURN(m_file[new_part_id]->update_row(old_data, new_data));
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Update from partition %d to partition %d",
+ old_part_id, new_part_id));
+ if ((error= m_file[new_part_id]->write_row(new_data)))
+ DBUG_RETURN(error);
+ if ((error= m_file[old_part_id]->delete_row(old_data)))
+ {
+#ifdef IN_THE_FUTURE
+ (void) m_file[new_part_id]->delete_last_inserted_row(new_data);
+#endif
+ DBUG_RETURN(error);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This will delete a row. buf will contain a copy of the row to be deleted.
+ The server will call this right after the current row has been read
+ (from either a previous rnd_xxx() or index_xxx() call).
+ If you keep a pointer to the last row or can access a primary key it will
+ make doing the deletion quite a bit easier.
+ Keep in mind that the server does no guarentee consecutive deletions.
+ ORDER BY clauses can be used.
+
+ Called in sql_acl.cc and sql_udf.cc to manage internal table information.
+ Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select
+ it is used for removing duplicates while in insert it is used for REPLACE
+ calls.
+
+ buf is either record[0] or record[1]
+
+*/
+
+int ha_partition::delete_row(const byte *buf)
+{
+ uint32 part_id;
+ int error;
+ DBUG_ENTER("ha_partition::delete_row");
+
+ if ((error= get_part_for_delete(buf, m_rec0, m_part_info, &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ m_last_part= part_id;
+ DBUG_RETURN(m_file[part_id]->delete_row(buf));
+}
+
+
+/*
+ Used to delete all rows in a table. Both for cases of truncate and
+ for cases where the optimizer realizes that all rows will be
+ removed as a result of a SQL statement.
+
+ Called from item_sum.cc by Item_func_group_concat::clear(),
+ Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
+ Called from sql_delete.cc by mysql_delete().
+ Called from sql_select.cc by JOIN::reinit().
+ Called from sql_union.cc by st_select_lex_unit::exec().
+*/
+
+int ha_partition::delete_all_rows()
+{
+ int error;
+ handler **file;
+ DBUG_ENTER("ha_partition::delete_all_rows");
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->delete_all_rows()))
+ DBUG_RETURN(error);
+ } while (*(++file));
+ DBUG_RETURN(0);
+}
+
+/*
+ rows == 0 means we will probably insert many rows
+*/
+
+void ha_partition::start_bulk_insert(ha_rows rows)
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::start_bulk_insert");
+ if (!rows)
+ {
+ /* Avoid allocation big caches in all underlaying handlers */
+ DBUG_VOID_RETURN;
+ }
+ rows= rows/m_tot_parts + 1;
+ file= m_file;
+ do
+ {
+ (*file)->start_bulk_insert(rows);
+ } while (*(++file));
+ DBUG_VOID_RETURN;
+}
+
+
+int ha_partition::end_bulk_insert()
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::end_bulk_insert");
+
+ file= m_file;
+ do
+ {
+ int tmp;
+ /* We want to execute end_bulk_insert() on all handlers */
+ if ((tmp= (*file)->end_bulk_insert()))
+ error= tmp;
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+/****************************************************************************
+ MODULE full table scan
+****************************************************************************/
+/*
+ Initialize engine for random reads
+
+ SYNOPSIS
+ ha_partition::rnd_init()
+ scan 0 Initialize for random reads through rnd_pos()
+ 1 Initialize for random scan through rnd_next()
+
+ NOTES
+ rnd_init() is called when the server wants the storage engine to do a
+ table scan or when the server wants to access data through rnd_pos.
+
+ When scan is used we will scan one handler partition at a time.
+ When preparing for rnd_pos we will init all handler partitions.
+ No extra cache handling is needed when scannning is not performed.
+
+ Before initialising we will call rnd_end to ensure that we clean up from
+ any previous incarnation of a table scan.
+ Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
+ sql_table.cc, and sql_update.cc.
+*/
+
+int ha_partition::rnd_init(bool scan)
+{
+ int error;
+ handler **file;
+ DBUG_ENTER("ha_partition::rnd_init");
+
+ include_partition_fields_in_used_fields();
+ if (scan)
+ {
+ /*
+ rnd_end() is needed for partitioning to reset internal data if scan
+ is already in use
+ */
+
+ rnd_end();
+ if (partition_scan_set_up(rec_buf(0), FALSE))
+ {
+ /*
+ The set of partitions to scan is empty. We return success and return
+ end of file on first rnd_next.
+ */
+ DBUG_RETURN(0);
+ }
+ /*
+ We will use the partition set in our scan, using the start and stop
+ partition and checking each scan before start dependent on bittfields.
+ */
+ late_extra_cache(m_part_spec.start_part);
+ DBUG_PRINT("info", ("rnd_init on partition %d",m_part_spec.start_part));
+ error= m_file[m_part_spec.start_part]->ha_rnd_init(1);
+ m_scan_value= 1; // Scan active
+ if (error)
+ m_scan_value= 2; // No scan active
+ DBUG_RETURN(error);
+ }
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->ha_rnd_init(0)))
+ goto err;
+ } while (*(++file));
+ m_scan_value= 0;
+ DBUG_RETURN(0);
+
+err:
+ while (file--)
+ (*file)->ha_rnd_end();
+ DBUG_RETURN(error);
+}
+
+
+int ha_partition::rnd_end()
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::rnd_end");
+ switch (m_scan_value) {
+ case 2: // Error
+ break;
+ case 1: // Table scan
+ if (m_part_spec.start_part != NO_CURRENT_PART_ID)
+ {
+ late_extra_no_cache(m_part_spec.start_part);
+ m_file[m_part_spec.start_part]->ha_rnd_end();
+ }
+ break;
+ case 0:
+ file= m_file;
+ do
+ {
+ (*file)->ha_rnd_end();
+ } while (*(++file));
+ break;
+ }
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ m_scan_value= 2;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ read next row during full table scan (scan in random row order)
+
+ SYNOPSIS
+ rnd_next()
+ buf buffer that should be filled with data
+
+ This is called for each row of the table scan. When you run out of records
+ you should return HA_ERR_END_OF_FILE.
+ The Field structure for the table is the key to getting data into buf
+ in a manner that will allow the server to understand it.
+
+ Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
+ sql_table.cc, and sql_update.cc.
+*/
+
+int ha_partition::rnd_next(byte *buf)
+{
+ DBUG_ASSERT(m_scan_value);
+ uint part_id= m_part_spec.start_part; // Cache of this variable
+ handler *file= m_file[part_id];
+ int result= HA_ERR_END_OF_FILE;
+ DBUG_ENTER("ha_partition::rnd_next");
+
+ DBUG_ASSERT(m_scan_value == 1);
+
+ if (part_id > m_part_spec.end_part)
+ {
+ /*
+ The original set of partitions to scan was empty and thus we report
+ the result here.
+ */
+ goto end;
+ }
+ while (TRUE)
+ {
+ if ((result= file->rnd_next(buf)))
+ {
+ if (result == HA_ERR_RECORD_DELETED)
+ continue; // Probably MyISAM
+
+ if (result != HA_ERR_END_OF_FILE)
+ break; // Return error
+
+ /* End current partition */
+ late_extra_no_cache(part_id);
+ DBUG_PRINT("info", ("rnd_end on partition %d", part_id));
+ if ((result= file->ha_rnd_end()))
+ break;
+ /* Shift to next partition */
+ if (++part_id > m_part_spec.end_part)
+ {
+ result= HA_ERR_END_OF_FILE;
+ break;
+ }
+ file= m_file[part_id];
+ DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
+ if ((result= file->ha_rnd_init(1)))
+ break;
+ late_extra_cache(part_id);
+ }
+ else
+ {
+ m_part_spec.start_part= part_id;
+ m_last_part= part_id;
+ table->status= 0;
+ DBUG_RETURN(0);
+ }
+ }
+
+end:
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ table->status= STATUS_NOT_FOUND;
+ DBUG_RETURN(result);
+}
+
+
+inline void store_part_id_in_pos(byte *pos, uint part_id)
+{
+ int2store(pos, part_id);
+}
+
+inline uint get_part_id_from_pos(const byte *pos)
+{
+ return uint2korr(pos);
+}
+
+/*
+ position() is called after each call to rnd_next() if the data needs
+ to be ordered. You can do something like the following to store
+ the position:
+ ha_store_ptr(ref, ref_length, current_position);
+
+ The server uses ref to store data. ref_length in the above case is
+ the size needed to store current_position. ref is just a byte array
+ that the server will maintain. If you are using offsets to mark rows, then
+ current_position should be the offset. If it is a primary key like in
+ BDB, then it needs to be a primary key.
+
+ Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
+*/
+
+void ha_partition::position(const byte *record)
+{
+ handler *file= m_file[m_last_part];
+ DBUG_ENTER("ha_partition::position");
+ file->position(record);
+ store_part_id_in_pos(ref, m_last_part);
+ memcpy((ref + PARTITION_BYTES_IN_POS), file->ref,
+ (ref_length - PARTITION_BYTES_IN_POS));
+
+#ifdef SUPPORTING_PARTITION_OVER_DIFFERENT_ENGINES
+#ifdef HAVE_purify
+ bzero(ref + PARTITION_BYTES_IN_POS + ref_length, max_ref_length-ref_length);
+#endif /* HAVE_purify */
+#endif
+ DBUG_VOID_RETURN;
+}
+
+/*
+ This is like rnd_next, but you are given a position to use
+ to determine the row. The position will be of the type that you stored in
+ ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
+ or position you saved when position() was called.
+ Called from filesort.cc records.cc sql_insert.cc sql_select.cc
+ sql_update.cc.
+*/
+
+int ha_partition::rnd_pos(byte * buf, byte *pos)
+{
+ uint part_id;
+ handler *file;
+ DBUG_ENTER("ha_partition::rnd_pos");
+
+ part_id= get_part_id_from_pos((const byte *) pos);
+ DBUG_ASSERT(part_id < m_tot_parts);
+ file= m_file[part_id];
+ m_last_part= part_id;
+ DBUG_RETURN(file->rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS)));
+}
+
+
+/****************************************************************************
+ MODULE index scan
+****************************************************************************/
+/*
+ Positions an index cursor to the index specified in the handle. Fetches the
+ row if available. If the key value is null, begin at the first key of the
+ index.
+
+ There are loads of optimisations possible here for the partition handler.
+ The same optimisations can also be checked for full table scan although
+ only through conditions and not from index ranges.
+ Phase one optimisations:
+ Check if the fields of the partition function are bound. If so only use
+ the single partition it becomes bound to.
+ Phase two optimisations:
+ If it can be deducted through range or list partitioning that only a
+ subset of the partitions are used, then only use those partitions.
+*/
+
+/*
+ index_init is always called before starting index scans (except when
+ starting through index_read_idx and using read_range variants).
+*/
+
+int ha_partition::index_init(uint inx, bool sorted)
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::index_init");
+
+ active_index= inx;
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ m_start_key.length= 0;
+ m_ordered= sorted;
+ m_curr_key_info= table->key_info+inx;
+ include_partition_fields_in_used_fields();
+
+ file= m_file;
+ do
+ {
+ /* TODO RONM: Change to index_init() when code is stable */
+ if ((error= (*file)->ha_index_init(inx, sorted)))
+ {
+ DBUG_ASSERT(0); // Should never happen
+ break;
+ }
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+
+/*
+ index_end is called at the end of an index scan to clean up any
+ things needed to clean up.
+*/
+
+int ha_partition::index_end()
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::index_end");
+
+ active_index= MAX_KEY;
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ file= m_file;
+ do
+ {
+ int tmp;
+ /* We want to execute index_end() on all handlers */
+ /* TODO RONM: Change to index_end() when code is stable */
+ if ((tmp= (*file)->ha_index_end()))
+ error= tmp;
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+
+/*
+ index_read starts a new index scan using a start key. The MySQL Server
+ will check the end key on its own. Thus to function properly the
+ partitioned handler need to ensure that it delivers records in the sort
+ order of the MySQL Server.
+ index_read can be restarted without calling index_end on the previous
+ index scan and without calling index_init. In this case the index_read
+ is on the same index as the previous index_scan. This is particularly
+ used in conjuntion with multi read ranges.
+*/
+
+int ha_partition::index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag)
+{
+ DBUG_ENTER("ha_partition::index_read");
+ end_range= 0;
+ DBUG_RETURN(common_index_read(buf, key, key_len, find_flag));
+}
+
+
+int ha_partition::common_index_read(byte *buf, const byte *key, uint key_len,
+ enum ha_rkey_function find_flag)
+{
+ int error;
+ DBUG_ENTER("ha_partition::common_index_read");
+
+ memcpy((void*)m_start_key.key, key, key_len);
+ m_start_key.length= key_len;
+ m_start_key.flag= find_flag;
+ m_index_scan_type= partition_index_read;
+
+ if ((error= partition_scan_set_up(buf, TRUE)))
+ {
+ DBUG_RETURN(error);
+ }
+
+ if (!m_ordered_scan_ongoing ||
+ (find_flag == HA_READ_KEY_EXACT &&
+ (key_len >= m_curr_key_info->key_length ||
+ key_len == 0)))
+ {
+ /*
+ We use unordered index scan either when read_range is used and flag
+ is set to not use ordered or when an exact key is used and in this
+ case all records will be sorted equal and thus the sort order of the
+ resulting records doesn't matter.
+ We also use an unordered index scan when the number of partitions to
+ scan is only one.
+ The unordered index scan will use the partition set created.
+ Need to set unordered scan ongoing since we can come here even when
+ it isn't set.
+ */
+ m_ordered_scan_ongoing= FALSE;
+ error= handle_unordered_scan_next_partition(buf);
+ }
+ else
+ {
+ /*
+ In all other cases we will use the ordered index scan. This will use
+ the partition set created by the get_partition_set method.
+ */
+ error= handle_ordered_index_scan(buf);
+ }
+ DBUG_RETURN(error);
+}
+
+
+/*
+ index_first() asks for the first key in the index.
+ This is similar to index_read except that there is no start key since
+ the scan starts from the leftmost entry and proceeds forward with
+ index_next.
+
+ Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
+ and sql_select.cc.
+*/
+
+int ha_partition::index_first(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_first");
+ end_range= 0;
+ m_index_scan_type= partition_index_first;
+ DBUG_RETURN(common_first_last(buf));
+}
+
+
+/*
+ index_last() asks for the last key in the index.
+ This is similar to index_read except that there is no start key since
+ the scan starts from the rightmost entry and proceeds forward with
+ index_prev.
+
+ Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
+ and sql_select.cc.
+*/
+
+int ha_partition::index_last(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_last");
+ m_index_scan_type= partition_index_last;
+ DBUG_RETURN(common_first_last(buf));
+}
+
+int ha_partition::common_first_last(byte *buf)
+{
+ int error;
+ if ((error= partition_scan_set_up(buf, FALSE)))
+ return error;
+ if (!m_ordered_scan_ongoing)
+ return handle_unordered_scan_next_partition(buf);
+ return handle_ordered_index_scan(buf);
+}
+
+/*
+ Positions an index cursor to the index specified in key. Fetches the
+ row if any. This is only used to read whole keys.
+ TODO: Optimise this code to avoid index_init and index_end
+*/
+
+int ha_partition::index_read_idx(byte * buf, uint index, const byte * key,
+ uint key_len,
+ enum ha_rkey_function find_flag)
+{
+ int res;
+ DBUG_ENTER("ha_partition::index_read_idx");
+ index_init(index, 0);
+ res= index_read(buf, key, key_len, find_flag);
+ index_end();
+ DBUG_RETURN(res);
+}
+
+/*
+ This is used in join_read_last_key to optimise away an ORDER BY.
+ Can only be used on indexes supporting HA_READ_ORDER
+*/
+
+int ha_partition::index_read_last(byte *buf, const byte *key, uint keylen)
+{
+ DBUG_ENTER("ha_partition::index_read_last");
+ m_ordered= TRUE; // Safety measure
+ DBUG_RETURN(index_read(buf, key, keylen, HA_READ_PREFIX_LAST));
+}
+
+
+/*
+ Used to read forward through the index.
+*/
+
+int ha_partition::index_next(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_next");
+ /*
+ TODO(low priority):
+ If we want partition to work with the HANDLER commands, we
+ must be able to do index_last() -> index_prev() -> index_next()
+ */
+ DBUG_ASSERT(m_index_scan_type != partition_index_last);
+ if (!m_ordered_scan_ongoing)
+ {
+ DBUG_RETURN(handle_unordered_next(buf, FALSE));
+ }
+ DBUG_RETURN(handle_ordered_next(buf, FALSE));
+}
+
+
+/*
+ This routine is used to read the next but only if the key is the same
+ as supplied in the call.
+*/
+
+int ha_partition::index_next_same(byte *buf, const byte *key, uint keylen)
+{
+ DBUG_ENTER("ha_partition::index_next_same");
+ DBUG_ASSERT(keylen == m_start_key.length);
+ DBUG_ASSERT(m_index_scan_type != partition_index_last);
+ if (!m_ordered_scan_ongoing)
+ DBUG_RETURN(handle_unordered_next(buf, TRUE));
+ DBUG_RETURN(handle_ordered_next(buf, TRUE));
+}
+
+/*
+ Used to read backwards through the index.
+*/
+
+int ha_partition::index_prev(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_prev");
+ /* TODO: read comment in index_next */
+ DBUG_ASSERT(m_index_scan_type != partition_index_first);
+ DBUG_RETURN(handle_ordered_prev(buf));
+}
+
+
+/*
+ We reimplement read_range_first since we don't want the compare_key
+ check at the end. This is already performed in the partition handler.
+ read_range_next is very much different due to that we need to scan
+ all underlying handlers.
+*/
+
+int ha_partition::read_range_first(const key_range *start_key,
+ const key_range *end_key,
+ bool eq_range_arg, bool sorted)
+{
+ int error;
+ DBUG_ENTER("ha_partition::read_range_first");
+ m_ordered= sorted;
+ eq_range= eq_range_arg;
+ end_range= 0;
+ if (end_key)
+ {
+ end_range= &save_end_range;
+ save_end_range= *end_key;
+ key_compare_result_on_equal=
+ ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
+ (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
+ }
+ range_key_part= m_curr_key_info->key_part;
+
+ if (!start_key) // Read first record
+ {
+ m_index_scan_type= partition_index_first;
+ error= common_first_last(m_rec0);
+ }
+ else
+ {
+ error= common_index_read(m_rec0,
+ start_key->key,
+ start_key->length, start_key->flag);
+ }
+ DBUG_RETURN(error);
+}
+
+
+int ha_partition::read_range_next()
+{
+ DBUG_ENTER("ha_partition::read_range_next");
+ if (m_ordered)
+ {
+ DBUG_RETURN(handler::read_range_next());
+ }
+ DBUG_RETURN(handle_unordered_next(m_rec0, eq_range));
+}
+
+
+int ha_partition::partition_scan_set_up(byte * buf, bool idx_read_flag)
+{
+ DBUG_ENTER("ha_partition::partition_scan_set_up");
+
+ if (idx_read_flag)
+ get_partition_set(table,buf,active_index,&m_start_key,&m_part_spec);
+ else
+ get_partition_set(table, buf, MAX_KEY, 0, &m_part_spec);
+ if (m_part_spec.start_part > m_part_spec.end_part)
+ {
+ /*
+ We discovered a partition set but the set was empty so we report
+ key not found.
+ */
+ DBUG_PRINT("info", ("scan with no partition to scan"));
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ if (m_part_spec.start_part == m_part_spec.end_part)
+ {
+ /*
+ We discovered a single partition to scan, this never needs to be
+ performed using the ordered index scan.
+ */
+ DBUG_PRINT("info", ("index scan using the single partition %d",
+ m_part_spec.start_part));
+ m_ordered_scan_ongoing= FALSE;
+ }
+ else
+ {
+ /*
+ Set m_ordered_scan_ongoing according how the scan should be done
+ */
+ m_ordered_scan_ongoing= m_ordered;
+ }
+ DBUG_ASSERT(m_part_spec.start_part < m_tot_parts &&
+ m_part_spec.end_part < m_tot_parts);
+ DBUG_RETURN(0);
+}
+
+
+/****************************************************************************
+ Unordered Index Scan Routines
+****************************************************************************/
+/*
+ These routines are used to scan partitions without considering order.
+ This is performed in two situations.
+ 1) In read_multi_range this is the normal case
+ 2) When performing any type of index_read, index_first, index_last where
+ all fields in the partition function is bound. In this case the index
+ scan is performed on only one partition and thus it isn't necessary to
+ perform any sort.
+*/
+
+int ha_partition::handle_unordered_next(byte *buf, bool next_same)
+{
+ handler *file= file= m_file[m_part_spec.start_part];
+ int error;
+ DBUG_ENTER("ha_partition::handle_unordered_next");
+
+ /*
+ We should consider if this should be split into two functions as
+ next_same is alwas a local constant
+ */
+ if (next_same)
+ {
+ if (!(error= file->index_next_same(buf, m_start_key.key,
+ m_start_key.length)))
+ {
+ m_last_part= m_part_spec.start_part;
+ DBUG_RETURN(0);
+ }
+ }
+ else if (!(error= file->index_next(buf)))
+ {
+ if (compare_key(end_range) <= 0)
+ {
+ m_last_part= m_part_spec.start_part;
+ DBUG_RETURN(0); // Row was in range
+ }
+ error= HA_ERR_END_OF_FILE;
+ }
+
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ m_part_spec.start_part++; // Start using next part
+ error= handle_unordered_scan_next_partition(buf);
+ }
+ DBUG_RETURN(error);
+}
+
+
+/*
+ This routine is used to start the index scan on the next partition.
+ Both initial start and after completing scan on one partition.
+*/
+
+int ha_partition::handle_unordered_scan_next_partition(byte * buf)
+{
+ uint i;
+ DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition");
+
+ for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
+ {
+ int error;
+ handler *file= m_file[i];
+
+ m_part_spec.start_part= i;
+ switch (m_index_scan_type) {
+ case partition_index_read:
+ DBUG_PRINT("info", ("index_read on partition %d", i));
+ error= file->index_read(buf, m_start_key.key,
+ m_start_key.length,
+ m_start_key.flag);
+ break;
+ case partition_index_first:
+ DBUG_PRINT("info", ("index_first on partition %d", i));
+ error= file->index_first(buf);
+ break;
+ default:
+ DBUG_ASSERT(FALSE);
+ DBUG_RETURN(1);
+ }
+ if (!error)
+ {
+ if (compare_key(end_range) <= 0)
+ {
+ m_last_part= i;
+ DBUG_RETURN(0);
+ }
+ error= HA_ERR_END_OF_FILE;
+ }
+ if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
+ DBUG_RETURN(error);
+ DBUG_PRINT("info", ("HA_ERR_END_OF_FILE on partition %d", i));
+ }
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+}
+
+
+/*
+ This part contains the logic to handle index scans that require ordered
+ output. This includes all except those started by read_range_first with
+ the flag ordered set to FALSE. Thus most direct index_read and all
+ index_first and index_last.
+
+ We implement ordering by keeping one record plus a key buffer for each
+ partition. Every time a new entry is requested we will fetch a new
+ entry from the partition that is currently not filled with an entry.
+ Then the entry is put into its proper sort position.
+
+ Returning a record is done by getting the top record, copying the
+ record to the request buffer and setting the partition as empty on
+ entries.
+*/
+
+int ha_partition::handle_ordered_index_scan(byte *buf)
+{
+ uint i, j= 0;
+ bool found= FALSE;
+ bool reverse_order= FALSE;
+ DBUG_ENTER("ha_partition::handle_ordered_index_scan");
+
+ m_top_entry= NO_CURRENT_PART_ID;
+ queue_remove_all(&queue);
+ for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
+ {
+ int error;
+ byte *rec_buf_ptr= rec_buf(i);
+ handler *file= m_file[i];
+
+ switch (m_index_scan_type) {
+ case partition_index_read:
+ error= file->index_read(rec_buf_ptr,
+ m_start_key.key,
+ m_start_key.length,
+ m_start_key.flag);
+ reverse_order= FALSE;
+ break;
+ case partition_index_first:
+ error= file->index_first(rec_buf_ptr);
+ reverse_order= FALSE;
+ break;
+ case partition_index_last:
+ error= file->index_last(rec_buf_ptr);
+ reverse_order= TRUE;
+ break;
+ default:
+ DBUG_ASSERT(FALSE);
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ if (!error)
+ {
+ found= TRUE;
+ /*
+ Initialise queue without order first, simply insert
+ */
+ queue_element(&queue, j++)= (byte*)queue_buf(i);
+ }
+ else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+ {
+ DBUG_RETURN(error);
+ }
+ }
+ if (found)
+ {
+ /*
+ We found at least one partition with data, now sort all entries and
+ after that read the first entry and copy it to the buffer to return in.
+ */
+ queue_set_max_at_top(&queue, reverse_order);
+ queue_set_cmp_arg(&queue, (void*)m_curr_key_info);
+ queue.elements= j;
+ queue_fix(&queue);
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+}
+
+
+void ha_partition::return_top_record(byte *buf)
+{
+ uint part_id;
+ byte *key_buffer= queue_top(&queue);
+ byte *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS;
+ part_id= uint2korr(key_buffer);
+ memcpy(buf, rec_buffer, m_rec_length);
+ m_last_part= part_id;
+ m_top_entry= part_id;
+}
+
+
+int ha_partition::handle_ordered_next(byte *buf, bool next_same)
+{
+ int error;
+ uint part_id= m_top_entry;
+ handler *file= m_file[part_id];
+ DBUG_ENTER("ha_partition::handle_ordered_next");
+
+ if (!next_same)
+ error= file->index_next(rec_buf(part_id));
+ else
+ error= file->index_next_same(rec_buf(part_id), m_start_key.key,
+ m_start_key.length);
+ if (error)
+ {
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ /* Return next buffered row */
+ queue_remove(&queue, (uint) 0);
+ if (queue.elements)
+ {
+ DBUG_PRINT("info", ("Record returned from partition %u (2)",
+ m_top_entry));
+ return_top_record(buf);
+ error= 0;
+ }
+ }
+ DBUG_RETURN(error);
+ }
+ queue_replaced(&queue);
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
+ DBUG_RETURN(0);
+}
+
+
+int ha_partition::handle_ordered_prev(byte *buf)
+{
+ int error;
+ uint part_id= m_top_entry;
+ handler *file= m_file[part_id];
+ DBUG_ENTER("ha_partition::handle_ordered_prev");
+ if ((error= file->index_prev(rec_buf(part_id))))
+ {
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ queue_remove(&queue, (uint) 0);
+ if (queue.elements)
+ {
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %d (2)",
+ m_top_entry));
+ error= 0;
+ }
+ }
+ DBUG_RETURN(error);
+ }
+ queue_replaced(&queue);
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
+ DBUG_RETURN(0);
+}
+
+
+void ha_partition::include_partition_fields_in_used_fields()
+{
+ DBUG_ENTER("ha_partition::include_partition_fields_in_used_fields");
+ Field **ptr= m_part_field_array;
+ do
+ {
+ ha_set_bit_in_read_set((*ptr)->fieldnr);
+ } while (*(++ptr));
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ MODULE information calls
+****************************************************************************/
+
+/*
+ These are all first approximations of the extra, info, scan_time
+ and read_time calls
+*/
+
+/*
+ ::info() is used to return information to the optimizer.
+ Currently this table handler doesn't implement most of the fields
+ really needed. SHOW also makes use of this data
+ Another note, if your handler doesn't proved exact record count,
+ you will probably want to have the following in your code:
+ if (records < 2)
+ records = 2;
+ The reason is that the server will optimize for cases of only a single
+ record. If in a table scan you don't know the number of records
+ it will probably be better to set records to two so you can return
+ as many records as you need.
+
+ Along with records a few more variables you may wish to set are:
+ records
+ deleted
+ data_file_length
+ index_file_length
+ delete_length
+ check_time
+ Take a look at the public variables in handler.h for more information.
+
+ Called in:
+ filesort.cc
+ ha_heap.cc
+ item_sum.cc
+ opt_sum.cc
+ sql_delete.cc
+ sql_delete.cc
+ sql_derived.cc
+ sql_select.cc
+ sql_select.cc
+ sql_select.cc
+ sql_select.cc
+ sql_select.cc
+ sql_show.cc
+ sql_show.cc
+ sql_show.cc
+ sql_show.cc
+ sql_table.cc
+ sql_union.cc
+ sql_update.cc
+
+ Some flags that are not implemented
+ HA_STATUS_POS:
+ This parameter is never used from the MySQL Server. It is checked in a
+ place in MyISAM so could potentially be used by MyISAM specific programs.
+ HA_STATUS_NO_LOCK:
+ This is declared and often used. It's only used by MyISAM.
+ It means that MySQL doesn't need the absolute latest statistics
+ information. This may save the handler from doing internal locks while
+ retrieving statistics data.
+*/
+
+void ha_partition::info(uint flag)
+{
+ handler *file, **file_array;
+ DBUG_ENTER("ha_partition:info");
+
+ if (flag & HA_STATUS_AUTO)
+ {
+ DBUG_PRINT("info", ("HA_STATUS_AUTO"));
+ /*
+ The auto increment value is only maintained by the first handler
+ so we will only call this.
+ */
+ m_file[0]->info(HA_STATUS_AUTO);
+ }
+ if (flag & HA_STATUS_VARIABLE)
+ {
+ DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
+ /*
+ Calculates statistical variables
+ records: Estimate of number records in table
+ We report sum (always at least 2)
+ deleted: Estimate of number holes in the table due to
+ deletes
+ We report sum
+ data_file_length: Length of data file, in principle bytes in table
+ We report sum
+ index_file_length: Length of index file, in principle bytes in
+ indexes in the table
+ We report sum
+ mean_record_length:Mean record length in the table
+ We calculate this
+ check_time: Time of last check (only applicable to MyISAM)
+ We report last time of all underlying handlers
+ */
+ records= 0;
+ deleted= 0;
+ data_file_length= 0;
+ index_file_length= 0;
+ check_time= 0;
+ file_array= m_file;
+ do
+ {
+ file= *file_array;
+ file->info(HA_STATUS_VARIABLE);
+ records+= file->records;
+ deleted+= file->deleted;
+ data_file_length+= file->data_file_length;
+ index_file_length+= file->index_file_length;
+ if (file->check_time > check_time)
+ check_time= file->check_time;
+ } while (*(++file_array));
+ if (records < 2)
+ records= 2;
+ mean_rec_length= (ulong) (data_file_length / records);
+ }
+ if (flag & HA_STATUS_CONST)
+ {
+ DBUG_PRINT("info", ("HA_STATUS_CONST"));
+ /*
+ Recalculate loads of constant variables. MyISAM also sets things
+ directly on the table share object.
+
+ Check whether this should be fixed since handlers should not
+ change things directly on the table object.
+
+ Monty comment: This should NOT be changed! It's the handlers
+ responsibility to correct table->s->keys_xxxx information if keys
+ have been disabled.
+
+ The most important parameters set here is records per key on
+ all indexes. block_size and primar key ref_length.
+
+ For each index there is an array of rec_per_key.
+ As an example if we have an index with three attributes a,b and c
+ we will have an array of 3 rec_per_key.
+ rec_per_key[0] is an estimate of number of records divided by
+ number of unique values of the field a.
+ rec_per_key[1] is an estimate of the number of records divided
+ by the number of unique combinations of the fields a and b.
+ rec_per_key[2] is an estimate of the number of records divided
+ by the number of unique combinations of the fields a,b and c.
+
+ Many handlers only set the value of rec_per_key when all fields
+ are bound (rec_per_key[2] in the example above).
+
+ If the handler doesn't support statistics, it should set all of the
+ above to 0.
+
+ We will allow the first handler to set the rec_per_key and use
+ this as an estimate on the total table.
+
+ max_data_file_length: Maximum data file length
+ We ignore it, is only used in
+ SHOW TABLE STATUS
+ max_index_file_length: Maximum index file length
+ We ignore it since it is never used
+ block_size: Block size used
+ We set it to the value of the first handler
+ sortkey: Never used at any place so ignored
+ ref_length: We set this to the value calculated
+ and stored in local object
+ raid_type: Set by first handler (MyISAM)
+ raid_chunks: Set by first handler (MyISAM)
+ raid_chunksize: Set by first handler (MyISAM)
+ create_time: Creation time of table
+ Set by first handler
+
+ So we calculate these constants by using the variables on the first
+ handler.
+ */
+
+ file= m_file[0];
+ file->info(HA_STATUS_CONST);
+ create_time= file->create_time;
+ raid_type= file->raid_type;
+ raid_chunks= file->raid_chunks;
+ raid_chunksize= file->raid_chunksize;
+ ref_length= m_ref_length;
+ }
+ if (flag & HA_STATUS_ERRKEY)
+ {
+ handler *file= m_file[m_last_part];
+ DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
+ /*
+ This flag is used to get index number of the unique index that
+ reported duplicate key
+ We will report the errkey on the last handler used and ignore the rest
+ */
+ file->info(HA_STATUS_ERRKEY);
+ if (file->errkey != (uint) -1)
+ errkey= file->errkey;
+ }
+ if (flag & HA_STATUS_TIME)
+ {
+ DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
+ /*
+ This flag is used to set the latest update time of the table.
+ Used by SHOW commands
+ We will report the maximum of these times
+ */
+ update_time= 0;
+ file_array= m_file;
+ do
+ {
+ file= *file_array;
+ file->info(HA_STATUS_TIME);
+ if (file->update_time > update_time)
+ update_time= file->update_time;
+ } while (*(++file_array));
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ extra() is called whenever the server wishes to send a hint to
+ the storage engine. The MyISAM engine implements the most hints.
+
+ We divide the parameters into the following categories:
+ 1) Parameters used by most handlers
+ 2) Parameters used by some non-MyISAM handlers
+ 3) Parameters used only by MyISAM
+ 4) Parameters only used by temporary tables for query processing
+ 5) Parameters only used by MyISAM internally
+ 6) Parameters not used at all
+
+ The partition handler need to handle category 1), 2) and 3).
+
+ 1) Parameters used by most handlers
+ -----------------------------------
+ HA_EXTRA_RESET:
+ This option is used by most handlers and it resets the handler state
+ to the same state as after an open call. This includes releasing
+ any READ CACHE or WRITE CACHE or other internal buffer used.
+
+ It is called from the reset method in the handler interface. There are
+ three instances where this is called.
+ 1) After completing a INSERT ... SELECT ... query the handler for the
+ table inserted into is reset
+ 2) It is called from close_thread_table which in turn is called from
+ close_thread_tables except in the case where the tables are locked
+ in which case ha_commit_stmt is called instead.
+ It is only called from here if flush_version hasn't changed and the
+ table is not an old table when calling close_thread_table.
+ close_thread_tables is called from many places as a general clean up
+ function after completing a query.
+ 3) It is called when deleting the QUICK_RANGE_SELECT object if the
+ QUICK_RANGE_SELECT object had its own handler object. It is called
+ immediatley before close of this local handler object.
+ HA_EXTRA_KEYREAD:
+ HA_EXTRA_NO_KEYREAD:
+ These parameters are used to provide an optimisation hint to the handler.
+ If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for
+ many handlers this means that the index-only scans can be used and it
+ is not necessary to use the real records to satisfy this part of the
+ query. Index-only scans is a very important optimisation for disk-based
+ indexes. For main-memory indexes most indexes contain a reference to the
+ record and thus KEYREAD only says that it is enough to read key fields.
+ HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET
+ will disable this option.
+ The handler will set HA_KEYREAD_ONLY in its table flags to indicate this
+ feature is supported.
+ HA_EXTRA_FLUSH:
+ Indication to flush tables to disk, called at close_thread_table to
+ ensure disk based tables are flushed at end of query execution.
+
+ 2) Parameters used by some non-MyISAM handlers
+ ----------------------------------------------
+ HA_EXTRA_RETRIEVE_ALL_COLS:
+ Many handlers have implemented optimisations to avoid fetching all
+ fields when retrieving data. In certain situations all fields need
+ to be retrieved even though the query_id is not set on all field
+ objects.
+
+ It is called from copy_data_between_tables where all fields are
+ copied without setting query_id before calling the handlers.
+ It is called from UPDATE statements when the fields of the index
+ used is updated or ORDER BY is used with UPDATE.
+ And finally when calculating checksum of a table using the CHECKSUM
+ command.
+ HA_EXTRA_RETRIEVE_PRIMARY_KEY:
+ In some situations it is mandatory to retrieve primary key fields
+ independent of the query id's. This extra flag specifies that fetch
+ of primary key fields is mandatory.
+ HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+ This is a strictly InnoDB feature that is more or less undocumented.
+ When it is activated InnoDB copies field by field from its fetch
+ cache instead of all fields in one memcpy. Have no idea what the
+ purpose of this is.
+ Cut from include/my_base.h:
+ When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep
+ other fields intact. When this is off (by default) InnoDB will use memcpy
+ to overwrite entire row.
+ HA_EXTRA_IGNORE_DUP_KEY:
+ HA_EXTRA_NO_IGNORE_DUP_KEY:
+ Informs the handler to we will not stop the transaction if we get an
+ duplicate key errors during insert/upate.
+ Always called in pair, triggered by INSERT IGNORE and other similar
+ SQL constructs.
+ Not used by MyISAM.
+
+ 3) Parameters used only by MyISAM
+ ---------------------------------
+ HA_EXTRA_NORMAL:
+ Only used in MyISAM to reset quick mode, not implemented by any other
+ handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET.
+
+ It is called after completing a successful DELETE query if the QUICK
+ option is set.
+
+ HA_EXTRA_QUICK:
+ When the user does DELETE QUICK FROM table where-clause; this extra
+ option is called before the delete query is performed and
+ HA_EXTRA_NORMAL is called after the delete query is completed.
+ Temporary tables used internally in MySQL always set this option
+
+ The meaning of quick mode is that when deleting in a B-tree no merging
+ of leafs is performed. This is a common method and many large DBMS's
+ actually only support this quick mode since it is very difficult to
+ merge leaves in a tree used by many threads concurrently.
+
+ HA_EXTRA_CACHE:
+ This flag is usually set with extra_opt along with a cache size.
+ The size of this buffer is set by the user variable
+ record_buffer_size. The value of this cache size is the amount of
+ data read from disk in each fetch when performing a table scan.
+ This means that before scanning a table it is normal to call
+ extra with HA_EXTRA_CACHE and when the scan is completed to call
+ HA_EXTRA_NO_CACHE to release the cache memory.
+
+ Some special care is taken when using this extra parameter since there
+ could be a write ongoing on the table in the same statement. In this
+ one has to take special care since there might be a WRITE CACHE as
+ well. HA_EXTRA_CACHE specifies using a READ CACHE and using
+ READ CACHE and WRITE CACHE at the same time is not possible.
+
+ Only MyISAM currently use this option.
+
+ It is set when doing full table scans using rr_sequential and
+ reset when completing such a scan with end_read_record
+ (resetting means calling extra with HA_EXTRA_NO_CACHE).
+
+ It is set in filesort.cc for MyISAM internal tables and it is set in
+ a multi-update where HA_EXTRA_CACHE is called on a temporary result
+ table and after that ha_rnd_init(0) on table to be updated
+ and immediately after that HA_EXTRA_NO_CACHE on table to be updated.
+
+ Apart from that it is always used from init_read_record but not when
+ used from UPDATE statements. It is not used from DELETE statements
+ with ORDER BY and LIMIT but it is used in normal scan loop in DELETE
+ statements. The reason here is that DELETE's in MyISAM doesn't move
+ existings data rows.
+
+ It is also set in copy_data_between_tables when scanning the old table
+ to copy over to the new table.
+ And it is set in join_init_read_record where quick objects are used
+ to perform a scan on the table. In this case the full table scan can
+ even be performed multiple times as part of the nested loop join.
+
+ For purposes of the partition handler it is obviously necessary to have
+ special treatment of this extra call. If we would simply pass this
+ extra call down to each handler we would allocate
+ cache size * no of partitions amount of memory and this is not
+ necessary since we will only scan one partition at a time when doing
+ full table scans.
+
+ Thus we treat it by first checking whether we have MyISAM handlers in
+ the table, if not we simply ignore the call and if we have we will
+ record the call but will not call any underlying handler yet. Then
+ when performing the sequential scan we will check this recorded value
+ and call extra_opt whenever we start scanning a new partition.
+
+ monty: Neads to be fixed so that it's passed to all handlers when we
+ move to another partition during table scan.
+
+ HA_EXTRA_NO_CACHE:
+ When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the
+ flush method in the select_union class.
+ It is used to some extent when insert delayed inserts.
+ See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows().
+
+ It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers
+ if they are MyISAM handlers. Other handlers we can ignore the call
+ for. If no cache is in use they will quickly return after finding
+ this out. And we also ensure that all caches are disabled and no one
+ is left by mistake.
+ In the future this call will probably be deleted an we will instead call
+ ::reset();
+
+ HA_EXTRA_WRITE_CACHE:
+ See above, called from various places. It is mostly used when we
+ do INSERT ... SELECT
+ No special handling to save cache space is developed currently.
+
+ HA_EXTRA_PREPARE_FOR_UPDATE:
+ This is called as part of a multi-table update. When the table to be
+ updated is also scanned then this informs MyISAM handler to drop any
+ caches if dynamic records are used (fixed size records do not care
+ about this call). We pass this along to all underlying MyISAM handlers
+ and ignore it for the rest.
+
+ HA_EXTRA_PREPARE_FOR_DELETE:
+ Only used by MyISAM, called in preparation for a DROP TABLE.
+ It's used mostly by Windows that cannot handle dropping an open file.
+ On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
+
+ HA_EXTRA_READCHECK:
+ HA_EXTRA_NO_READCHECK:
+ Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
+ this is not needed in SQL. The reason for this call is that MyISAM sets
+ the READ_CHECK_USED in the open call so the call is needed for MyISAM
+ to reset this feature.
+ The idea with this parameter was to inform of doing/not doing a read
+ check before applying an update. Since SQL always performs a read before
+ applying the update No Read Check is needed in MyISAM as well.
+
+ This is a cut from Docs/myisam.txt
+ Sometimes you might want to force an update without checking whether
+ another user has changed the record since you last read it. This is
+ somewhat dangerous, so it should ideally not be used. That can be
+ accomplished by wrapping the mi_update() call in two calls to mi_extra(),
+ using these functions:
+ HA_EXTRA_NO_READCHECK=5 No readcheck on update
+ HA_EXTRA_READCHECK=6 Use readcheck (def)
+
+ HA_EXTRA_FORCE_REOPEN:
+ Only used by MyISAM, called when altering table, closing tables to
+ enforce a reopen of the table files.
+
+ 4) Parameters only used by temporary tables for query processing
+ ----------------------------------------------------------------
+ HA_EXTRA_RESET_STATE:
+ Same as HA_EXTRA_RESET except that buffers are not released. If there is
+ a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading
+ or to change type of cache between READ CACHE and WRITE CACHE.
+
+ This extra function is always called immediately before calling
+ delete_all_rows on the handler for temporary tables.
+ There are cases however when HA_EXTRA_RESET_STATE isn't called in
+ a similar case for a temporary table in sql_union.cc and in two other
+ cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE
+ called afterwards.
+ The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means
+ disable caching, delete all rows and enable WRITE CACHE. This is
+ used for temporary tables containing distinct sums and a
+ functional group.
+
+ The only case that delete_all_rows is called on non-temporary tables
+ is in sql_delete.cc when DELETE FROM table; is called by a user.
+ In this case no special extra calls are performed before or after this
+ call.
+
+ The partition handler should not need to bother about this one. It
+ should never be called.
+
+ HA_EXTRA_NO_ROWS:
+ Don't insert rows indication to HEAP and MyISAM, only used by temporary
+ tables used in query processing.
+ Not handled by partition handler.
+
+ 5) Parameters only used by MyISAM internally
+ --------------------------------------------
+ HA_EXTRA_REINIT_CACHE:
+ This call reinitialises the READ CACHE described above if there is one
+ and otherwise the call is ignored.
+
+ We can thus safely call it on all underlying handlers if they are
+ MyISAM handlers. It is however never called so we don't handle it at all.
+ HA_EXTRA_FLUSH_CACHE:
+ Flush WRITE CACHE in MyISAM. It is only from one place in the code.
+ This is in sql_insert.cc where it is called if the table_flags doesn't
+ contain HA_DUPP_POS. The only handler having the HA_DUPP_POS set is the
+ MyISAM handler and so the only handler not receiving this call is MyISAM.
+ Thus in effect this call is called but never used. Could be removed
+ from sql_insert.cc
+ HA_EXTRA_NO_USER_CHANGE:
+ Only used by MyISAM, never called.
+ Simulates lock_type as locked.
+ HA_EXTRA_WAIT_LOCK:
+ HA_EXTRA_WAIT_NOLOCK:
+ Only used by MyISAM, called from MyISAM handler but never from server
+ code on top of the handler.
+ Sets lock_wait on/off
+ HA_EXTRA_NO_KEYS:
+ Only used MyISAM, only used internally in MyISAM handler, never called
+ from server level.
+ HA_EXTRA_KEYREAD_CHANGE_POS:
+ HA_EXTRA_REMEMBER_POS:
+ HA_EXTRA_RESTORE_POS:
+ HA_EXTRA_PRELOAD_BUFFER_SIZE:
+ HA_EXTRA_CHANGE_KEY_TO_DUP:
+ HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
+ Only used by MyISAM, never called.
+
+ 6) Parameters not used at all
+ -----------------------------
+ HA_EXTRA_KEY_CACHE:
+ HA_EXTRA_NO_KEY_CACHE:
+ This parameters are no longer used and could be removed.
+*/
+
+int ha_partition::extra(enum ha_extra_function operation)
+{
+ DBUG_ENTER("ha_partition:extra");
+ DBUG_PRINT("info", ("operation: %d", (int) operation));
+
+ switch (operation) {
+ /* Category 1), used by most handlers */
+ case HA_EXTRA_KEYREAD:
+ case HA_EXTRA_NO_KEYREAD:
+ case HA_EXTRA_FLUSH:
+ DBUG_RETURN(loop_extra(operation));
+
+ /* Category 2), used by non-MyISAM handlers */
+ case HA_EXTRA_IGNORE_DUP_KEY:
+ case HA_EXTRA_NO_IGNORE_DUP_KEY:
+ case HA_EXTRA_RETRIEVE_ALL_COLS:
+ case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
+ case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+ {
+ if (!m_myisam)
+ DBUG_RETURN(loop_extra(operation));
+ break;
+ }
+
+ /* Category 3), used by MyISAM handlers */
+ case HA_EXTRA_NORMAL:
+ case HA_EXTRA_QUICK:
+ case HA_EXTRA_NO_READCHECK:
+ case HA_EXTRA_PREPARE_FOR_UPDATE:
+ case HA_EXTRA_PREPARE_FOR_DELETE:
+ case HA_EXTRA_FORCE_REOPEN:
+ {
+ if (m_myisam)
+ DBUG_RETURN(loop_extra(operation));
+ break;
+ }
+ case HA_EXTRA_CACHE:
+ {
+ prepare_extra_cache(0);
+ break;
+ }
+ case HA_EXTRA_NO_CACHE:
+ {
+ m_extra_cache= FALSE;
+ m_extra_cache_size= 0;
+ DBUG_RETURN(loop_extra(operation));
+ }
+ default:
+ {
+ /* Temporary crash to discover what is wrong */
+ DBUG_ASSERT(0);
+ break;
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This will in the future be called instead of extra(HA_EXTRA_RESET) as this
+ is such a common call
+*/
+
+int ha_partition::reset(void)
+{
+ int result= 0, tmp;
+ handler **file;
+ DBUG_ENTER("ha_partition::reset");
+ file= m_file;
+ do
+ {
+ if ((tmp= (*file)->reset()))
+ result= tmp;
+ } while (*(++file));
+ DBUG_RETURN(result);
+}
+
+
+int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize)
+{
+ DBUG_ENTER("ha_partition::extra_opt()");
+ DBUG_ASSERT(HA_EXTRA_CACHE == operation);
+ prepare_extra_cache(cachesize);
+ DBUG_RETURN(0);
+}
+
+
+void ha_partition::prepare_extra_cache(uint cachesize)
+{
+ DBUG_ENTER("ha_partition::prepare_extra_cache()");
+
+ m_extra_cache= TRUE;
+ m_extra_cache_size= cachesize;
+ if (m_part_spec.start_part != NO_CURRENT_PART_ID)
+ {
+ DBUG_ASSERT(m_part_spec.start_part == 0);
+ late_extra_cache(0);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+int ha_partition::loop_extra(enum ha_extra_function operation)
+{
+ int result= 0, tmp;
+ handler **file;
+ DBUG_ENTER("ha_partition::loop_extra()");
+ for (file= m_file; *file; file++)
+ {
+ if ((tmp= (*file)->extra(operation)))
+ result= tmp;
+ }
+ DBUG_RETURN(result);
+}
+
+
+void ha_partition::late_extra_cache(uint partition_id)
+{
+ handler *file;
+ DBUG_ENTER("ha_partition::late_extra_cache");
+ if (!m_extra_cache)
+ DBUG_VOID_RETURN;
+ file= m_file[partition_id];
+ if (m_extra_cache_size == 0)
+ VOID(file->extra(HA_EXTRA_CACHE));
+ else
+ VOID(file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size));
+ DBUG_VOID_RETURN;
+}
+
+
+void ha_partition::late_extra_no_cache(uint partition_id)
+{
+ handler *file;
+ DBUG_ENTER("ha_partition::late_extra_no_cache");
+ if (!m_extra_cache)
+ DBUG_VOID_RETURN;
+ file= m_file[partition_id];
+ VOID(file->extra(HA_EXTRA_NO_CACHE));
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ MODULE optimiser support
+****************************************************************************/
+
+const key_map *ha_partition::keys_to_use_for_scanning()
+{
+ DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
+ DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
+}
+
+double ha_partition::scan_time()
+{
+ double scan_time= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::scan_time");
+
+ for (file= m_file; *file; file++)
+ scan_time+= (*file)->scan_time();
+ DBUG_RETURN(scan_time);
+}
+
+
+/*
+ This will be optimised later to include whether or not the index can
+ be used with partitioning. To achieve we need to add another parameter
+ that specifies how many of the index fields that are bound in the ranges.
+ Possibly added as a new call to handlers.
+*/
+
+double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+{
+ DBUG_ENTER("ha_partition::read_time");
+ DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
+}
+
+/*
+ Given a starting key, and an ending key estimate the number of rows that
+ will exist between the two. end_key may be empty which in case determine
+ if start_key matches any rows.
+
+ Called from opt_range.cc by check_quick_keys().
+
+ monty: MUST be called for each range and added.
+ Note that MySQL will assume that if this returns 0 there is no
+ matching rows for the range!
+*/
+
+ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
+ key_range *max_key)
+{
+ ha_rows in_range= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::records_in_range");
+
+ file= m_file;
+ do
+ {
+ in_range+= (*file)->records_in_range(inx, min_key, max_key);
+ } while (*(++file));
+ DBUG_RETURN(in_range);
+}
+
+
+ha_rows ha_partition::estimate_rows_upper_bound()
+{
+ ha_rows rows, tot_rows= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
+
+ file= m_file;
+ do
+ {
+ rows= (*file)->estimate_rows_upper_bound();
+ if (rows == HA_POS_ERROR)
+ DBUG_RETURN(HA_POS_ERROR);
+ tot_rows+= rows;
+ } while (*(++file));
+ DBUG_RETURN(tot_rows);
+}
+
+
+uint8 ha_partition::table_cache_type()
+{
+ DBUG_ENTER("ha_partition::table_cache_type");
+ DBUG_RETURN(m_file[0]->table_cache_type());
+}
+
+
+/****************************************************************************
+ MODULE print messages
+****************************************************************************/
+
+const char *ha_partition::index_type(uint inx)
+{
+ DBUG_ENTER("ha_partition::index_type");
+ DBUG_RETURN(m_file[0]->index_type(inx));
+}
+
+
+void ha_partition::print_error(int error, myf errflag)
+{
+ DBUG_ENTER("ha_partition::print_error");
+ /* Should probably look for my own errors first */
+ /* monty: needs to be called for the last used partition ! */
+ m_file[0]->print_error(error, errflag);
+ DBUG_VOID_RETURN;
+}
+
+
+bool ha_partition::get_error_message(int error, String *buf)
+{
+ DBUG_ENTER("ha_partition::get_error_message");
+ /* Should probably look for my own errors first */
+ /* monty: needs to be called for the last used partition ! */
+ DBUG_RETURN(m_file[0]->get_error_message(error, buf));
+}
+
+
+/****************************************************************************
+ MODULE handler characteristics
+****************************************************************************/
+/*
+ If frm_error() is called then we will use this to to find out what file
+ extensions exist for the storage engine. This is also used by the default
+ rename_table and delete_table method in handler.cc.
+*/
+
+static const char *ha_partition_ext[]=
+{
+ ha_par_ext, NullS
+};
+
+const char **ha_partition::bas_ext() const
+{ return ha_partition_ext; }
+
+
+uint ha_partition::min_of_the_max_uint(uint (handler::*operator_func)(void) const) const
+{
+ handler **file;
+ uint min_of_the_max= ((*m_file)->*operator_func)();
+
+ for (file= m_file+1; *file; file++)
+ {
+ uint tmp= ((*file)->*operator_func)();
+ set_if_smaller(min_of_the_max, tmp);
+ }
+ return min_of_the_max;
+}
+
+
+uint ha_partition::max_supported_key_parts() const
+{
+ return min_of_the_max_uint(&handler::max_supported_key_parts);
+}
+
+
+uint ha_partition::max_supported_key_length() const
+{
+ return min_of_the_max_uint(&handler::max_supported_key_length);
+}
+
+
+uint ha_partition::max_supported_key_part_length() const
+{
+ return min_of_the_max_uint(&handler::max_supported_key_part_length);
+}
+
+
+uint ha_partition::max_supported_record_length() const
+{
+ return min_of_the_max_uint(&handler::max_supported_record_length);
+}
+
+
+uint ha_partition::max_supported_keys() const
+{
+ return min_of_the_max_uint(&handler::max_supported_keys);
+}
+
+
+uint ha_partition::extra_rec_buf_length() const
+{
+ handler **file;
+ uint max= (*m_file)->extra_rec_buf_length();
+ for (file= m_file, file++; *file; file++)
+ if (max < (*file)->extra_rec_buf_length())
+ max= (*file)->extra_rec_buf_length();
+ return max;
+}
+
+
+uint ha_partition::min_record_length(uint options) const
+{
+ handler **file;
+ uint max= (*m_file)->min_record_length(options);
+ for (file= m_file, file++; *file; file++)
+ if (max < (*file)->min_record_length(options))
+ max= (*file)->min_record_length(options);
+ return max;
+}
+
+
+/****************************************************************************
+ MODULE compare records
+****************************************************************************/
+/*
+ We get two references and need to check if those records are the same.
+ If they belong to different partitions we decide that they are not
+ the same record. Otherwise we use the particular handler to decide if
+ they are the same. Sort in partition id order if not equal.
+*/
+
+int ha_partition::cmp_ref(const byte *ref1, const byte *ref2)
+{
+ uint part_id;
+ my_ptrdiff_t diff1, diff2;
+ handler *file;
+ DBUG_ENTER("ha_partition::cmp_ref");
+ if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1]))
+ {
+ part_id= get_part_id_from_pos(ref1);
+ file= m_file[part_id];
+ DBUG_ASSERT(part_id < m_tot_parts);
+ DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
+ (ref2 + PARTITION_BYTES_IN_POS)));
+ }
+ diff1= ref2[1] - ref1[1];
+ diff2= ref2[0] - ref1[0];
+ if (diff1 > 0)
+ {
+ DBUG_RETURN(-1);
+ }
+ if (diff1 < 0)
+ {
+ DBUG_RETURN(+1);
+ }
+ if (diff2 > 0)
+ {
+ DBUG_RETURN(-1);
+ }
+ DBUG_RETURN(+1);
+}
+
+
+/****************************************************************************
+ MODULE auto increment
+****************************************************************************/
+
+void ha_partition::restore_auto_increment()
+{
+ DBUG_ENTER("ha_partition::restore_auto_increment");
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ This method is called by update_auto_increment which in turn is called
+ by the individual handlers as part of write_row. We will always let
+ the first handler keep track of the auto increment value for all
+ partitions.
+*/
+
+ulonglong ha_partition::get_auto_increment()
+{
+ DBUG_ENTER("ha_partition::get_auto_increment");
+ DBUG_RETURN(m_file[0]->get_auto_increment());
+}
+
+
+/****************************************************************************
+ MODULE initialise handler for HANDLER call
+****************************************************************************/
+
+void ha_partition::init_table_handle_for_HANDLER()
+{
+ return;
+}
+
+
+/****************************************************************************
+ MODULE Partition Share
+****************************************************************************/
+/*
+ Service routines for ... methods.
+-------------------------------------------------------------------------
+ Variables for partition share methods. A hash used to track open tables.
+ A mutex for the hash table and an init variable to check if hash table
+ is initialised.
+ There is also a constant ending of the partition handler file name.
+*/
+
+#ifdef NOT_USED
+static HASH partition_open_tables;
+static pthread_mutex_t partition_mutex;
+static int partition_init= 0;
+
+
+/*
+ Function we use in the creation of our hash to get key.
+*/
+static byte *partition_get_key(PARTITION_SHARE *share, uint *length,
+ my_bool not_used __attribute__ ((unused)))
+{
+ *length= share->table_name_length;
+ return (byte *) share->table_name;
+}
+
+/*
+ Example of simple lock controls. The "share" it creates is structure we
+ will pass to each partition handler. Do you have to have one of these?
+ Well, you have pieces that are used for locking, and they are needed to
+ function.
+*/
+
+
+static PARTITION_SHARE *get_share(const char *table_name, TABLE *table)
+{
+ PARTITION_SHARE *share;
+ uint length;
+ char *tmp_name;
+
+ /*
+ So why does this exist? There is no way currently to init a storage
+ engine.
+ Innodb and BDB both have modifications to the server to allow them to
+ do this. Since you will not want to do this, this is probably the next
+ best method.
+ */
+ if (!partition_init)
+ {
+ /* Hijack a mutex for init'ing the storage engine */
+ pthread_mutex_lock(&LOCK_mysql_create_db);
+ if (!partition_init)
+ {
+ partition_init++;
+ VOID(pthread_mutex_init(&partition_mutex, MY_MUTEX_INIT_FAST));
+ (void) hash_init(&partition_open_tables, system_charset_info, 32, 0, 0,
+ (hash_get_key) partition_get_key, 0, 0);
+ }
+ pthread_mutex_unlock(&LOCK_mysql_create_db);
+ }
+ pthread_mutex_lock(&partition_mutex);
+ length= (uint) strlen(table_name);
+
+ if (!(share= (PARTITION_SHARE *) hash_search(&partition_open_tables,
+ (byte *) table_name, length)))
+ {
+ if (!(share= (PARTITION_SHARE *)
+ my_multi_malloc(MYF(MY_WME | MY_ZEROFILL),
+ &share, sizeof(*share),
+ &tmp_name, length + 1, NullS)))
+ {
+ pthread_mutex_unlock(&partition_mutex);
+ return NULL;
+ }
+
+ share->use_count= 0;
+ share->table_name_length= length;
+ share->table_name= tmp_name;
+ strmov(share->table_name, table_name);
+ if (my_hash_insert(&partition_open_tables, (byte *) share))
+ goto error;
+ thr_lock_init(&share->lock);
+ pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
+ }
+ share->use_count++;
+ pthread_mutex_unlock(&partition_mutex);
+
+ return share;
+
+error:
+ pthread_mutex_unlock(&partition_mutex);
+ my_free((gptr) share, MYF(0));
+
+ return NULL;
+}
+
+
+/*
+ Free lock controls. We call this whenever we close a table. If the table
+ had the last reference to the share then we free memory associated with
+ it.
+*/
+
+static int free_share(PARTITION_SHARE *share)
+{
+ pthread_mutex_lock(&partition_mutex);
+ if (!--share->use_count)
+ {
+ hash_delete(&partition_open_tables, (byte *) share);
+ thr_lock_delete(&share->lock);
+ pthread_mutex_destroy(&share->mutex);
+ my_free((gptr) share, MYF(0));
+ }
+ pthread_mutex_unlock(&partition_mutex);
+
+ return 0;
+}
+#endif /* NOT_USED */
+#endif /* HAVE_PARTITION_DB */
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
new file mode 100644
index 00000000000..e78cff4cdbb
--- /dev/null
+++ b/sql/ha_partition.h
@@ -0,0 +1,916 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifdef __GNUC__
+#pragma interface /* gcc class implementation */
+#endif
+
+/*
+ PARTITION_SHARE is a structure that will be shared amoung all open handlers
+ The partition implements the minimum of what you will probably need.
+*/
+
+typedef struct st_partition_share
+{
+ char *table_name;
+ uint table_name_length, use_count;
+ pthread_mutex_t mutex;
+ THR_LOCK lock;
+} PARTITION_SHARE;
+
+
+#define PARTITION_BYTES_IN_POS 2
+class ha_partition :public handler
+{
+private:
+ enum partition_index_scan_type
+ {
+ partition_index_read= 0,
+ partition_index_first= 1,
+ partition_index_last= 2,
+ partition_no_index_scan= 3
+ };
+ /* Data for the partition handler */
+ char *m_file_buffer; // Buffer with names
+ char *m_name_buffer_ptr; // Pointer to first partition name
+ uchar *m_engine_array; // Array of types of the handlers
+ handler **m_file; // Array of references to handler inst.
+ partition_info *m_part_info; // local reference to partition
+ byte *m_start_key_ref; // Reference of start key in current
+ // index scan info
+ Field **m_part_field_array; // Part field array locally to save acc
+ byte *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
+ KEY *m_curr_key_info; // Current index
+ byte *m_rec0; // table->record[0]
+ QUEUE queue; // Prio queue used by sorted read
+ /*
+ Since the partition handler is a handler on top of other handlers, it
+ is necessary to keep information about what the underlying handler
+ characteristics is. It is not possible to keep any handler instances
+ for this since the MySQL Server sometimes allocating the handler object
+ without freeing them.
+ */
+ u_long m_table_flags;
+ u_long m_low_byte_first;
+
+ uint m_tot_parts; // Total number of partitions;
+ uint m_last_part; // Last file that we update,write
+ int m_lock_type; // Remembers type of last
+ // external_lock
+ part_id_range m_part_spec; // Which parts to scan
+ uint m_scan_value; // Value passed in rnd_init
+ // call
+ uint m_ref_length; // Length of position in this
+ // handler object
+ key_range m_start_key; // index read key range
+ enum partition_index_scan_type m_index_scan_type;// What type of index
+ // scan
+ uint m_top_entry; // Which partition is to
+ // deliver next result
+ uint m_rec_length; // Local copy of record length
+
+ bool m_ordered; // Ordered/Unordered index scan
+ bool m_has_transactions; // Can we support transactions
+ bool m_pkey_is_clustered; // Is primary key clustered
+ bool m_create_handler; // Handler used to create table
+ bool m_is_sub_partitioned; // Is subpartitioned
+ bool m_ordered_scan_ongoing;
+ bool m_use_bit_array;
+
+ /*
+ We keep track if all underlying handlers are MyISAM since MyISAM has a
+ great number of extra flags not needed by other handlers.
+ */
+ bool m_myisam; // Are all underlying handlers
+ // MyISAM
+ /*
+ We keep track of InnoDB handlers below since it requires proper setting
+ of query_id in fields at index_init and index_read calls.
+ */
+ bool m_innodb; // Are all underlying handlers
+ // InnoDB
+ /*
+ When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying
+ handlers immediately. Instead we cache it and call the underlying
+ immediately before starting the scan on the partition. This is to
+ prevent allocating a READ CACHE for each partition in parallel when
+ performing a full table scan on MyISAM partitioned table.
+ This state is cleared by extra(HA_EXTRA_NO_CACHE).
+ */
+ bool m_extra_cache;
+ uint m_extra_cache_size;
+
+ void init_handler_variables();
+ /*
+ Variables for lock structures.
+ */
+ THR_LOCK_DATA lock; /* MySQL lock */
+ PARTITION_SHARE *share; /* Shared lock info */
+
+public:
+ /*
+ -------------------------------------------------------------------------
+ MODULE create/delete handler object
+ -------------------------------------------------------------------------
+ Object create/delete methode. The normal called when a table object
+ exists. There is also a method to create the handler object with only
+ partition information. This is used from mysql_create_table when the
+ table is to be created and the engine type is deduced to be the
+ partition handler.
+ -------------------------------------------------------------------------
+ */
+ ha_partition(TABLE * table);
+ ha_partition(partition_info * part_info);
+ ~ha_partition();
+ /*
+ A partition handler has no characteristics in itself. It only inherits
+ those from the underlying handlers. Here we set-up those constants to
+ enable later calls of the methods to retrieve constants from the under-
+ lying handlers. Returns false if not successful.
+ */
+ int ha_initialise();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE meta data changes
+ -------------------------------------------------------------------------
+ Meta data routines to CREATE, DROP, RENAME table and often used at
+ ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..).
+
+ update_table_comment is used in SHOW TABLE commands to provide a
+ chance for the handler to add any interesting comments to the table
+ comments not provided by the users comment.
+
+ create_handler_files is called before opening a new handler object
+ with openfrm to call create. It is used to create any local handler
+ object needed in opening the object in openfrm
+ -------------------------------------------------------------------------
+ */
+ virtual int delete_table(const char *from);
+ virtual int rename_table(const char *from, const char *to);
+ virtual int create(const char *name, TABLE * form,
+ HA_CREATE_INFO * create_info);
+ virtual int create_handler_files(const char *name);
+ virtual void update_create_info(HA_CREATE_INFO * create_info);
+ virtual char *update_table_comment(const char *comment);
+private:
+ /*
+ delete_table, rename_table and create uses very similar logic which
+ is packed into this routine.
+ */
+ uint del_ren_cre_table(const char *from,
+ const char *to= NULL,
+ TABLE * table_arg= NULL,
+ HA_CREATE_INFO * create_info= NULL);
+ /*
+ One method to create the table_name.par file containing the names of the
+ underlying partitions, their engine and the number of partitions.
+ And one method to read it in.
+ */
+ bool create_handler_file(const char *name);
+ bool get_from_handler_file(const char *name);
+ bool new_handlers_from_part_info();
+ bool create_handlers();
+ void clear_handler_file();
+ void set_up_table_before_create(TABLE * table_arg, HA_CREATE_INFO * info,
+ uint part_id);
+ partition_element *find_partition_element(uint part_id);
+public:
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE open/close object
+ -------------------------------------------------------------------------
+ Open and close handler object to ensure all underlying files and
+ objects allocated and deallocated for query handling is handled
+ properly.
+ -------------------------------------------------------------------------
+
+ A handler object is opened as part of its initialisation and before
+ being used for normal queries (not before meta-data changes always.
+ If the object was opened it will also be closed before being deleted.
+ */
+ virtual int open(const char *name, int mode, uint test_if_locked);
+ virtual int close(void);
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE start/end statement
+ -------------------------------------------------------------------------
+ This module contains methods that are used to understand start/end of
+ statements, transaction boundaries, and aid for proper concurrency
+ control.
+ The partition handler need not implement abort and commit since this
+ will be handled by any underlying handlers implementing transactions.
+ There is only one call to each handler type involved per transaction
+ and these go directly to the handlers supporting transactions
+ currently InnoDB, BDB and NDB).
+ -------------------------------------------------------------------------
+ */
+ virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to,
+ enum thr_lock_type lock_type);
+ virtual int external_lock(THD * thd, int lock_type);
+ /*
+ When table is locked a statement is started by calling start_stmt
+ instead of external_lock
+ */
+ virtual int start_stmt(THD * thd);
+ /*
+ Lock count is number of locked underlying handlers (I assume)
+ */
+ virtual uint lock_count(void) const;
+ /*
+ Call to unlock rows not to be updated in transaction
+ */
+ virtual void unlock_row();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE change record
+ -------------------------------------------------------------------------
+ This part of the handler interface is used to change the records
+ after INSERT, DELETE, UPDATE, REPLACE method calls but also other
+ special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE.
+ -------------------------------------------------------------------------
+
+ These methods are used for insert (write_row), update (update_row)
+ and delete (delete_row). All methods to change data always work on
+ one row at a time. update_row and delete_row also contains the old
+ row.
+ delete_all_rows will delete all rows in the table in one call as a
+ special optimisation for DELETE from table;
+
+ Bulk inserts are supported if all underlying handlers support it.
+ start_bulk_insert and end_bulk_insert is called before and after a
+ number of calls to write_row.
+ Not yet though.
+ */
+ virtual int write_row(byte * buf);
+ virtual int update_row(const byte * old_data, byte * new_data);
+ virtual int delete_row(const byte * buf);
+ virtual int delete_all_rows(void);
+ virtual void start_bulk_insert(ha_rows rows);
+ virtual int end_bulk_insert();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE full table scan
+ -------------------------------------------------------------------------
+ This module is used for the most basic access method for any table
+ handler. This is to fetch all data through a full table scan. No
+ indexes are needed to implement this part.
+ It contains one method to start the scan (rnd_init) that can also be
+ called multiple times (typical in a nested loop join). Then proceeding
+ to the next record (rnd_next) and closing the scan (rnd_end).
+ To remember a record for later access there is a method (position)
+ and there is a method used to retrieve the record based on the stored
+ position.
+ The position can be a file position, a primary key, a ROWID dependent
+ on the handler below.
+ -------------------------------------------------------------------------
+ */
+ /*
+ unlike index_init(), rnd_init() can be called two times
+ without rnd_end() in between (it only makes sense if scan=1).
+ then the second call should prepare for the new table scan
+ (e.g if rnd_init allocates the cursor, second call should
+ position it to the start of the table, no need to deallocate
+ and allocate it again
+ */
+ virtual int rnd_init(bool scan);
+ virtual int rnd_end();
+ virtual int rnd_next(byte * buf);
+ virtual int rnd_pos(byte * buf, byte * pos);
+ virtual void position(const byte * record);
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE index scan
+ -------------------------------------------------------------------------
+ This part of the handler interface is used to perform access through
+ indexes. The interface is defined as a scan interface but the handler
+ can also use key lookup if the index is a unique index or a primary
+ key index.
+ Index scans are mostly useful for SELECT queries but are an important
+ part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT
+ and so forth.
+ Naturally an index is needed for an index scan and indexes can either
+ be ordered, hash based. Some ordered indexes can return data in order
+ but not necessarily all of them.
+ There are many flags that define the behavior of indexes in the
+ various handlers. These methods are found in the optimizer module.
+ -------------------------------------------------------------------------
+
+ index_read is called to start a scan of an index. The find_flag defines
+ the semantics of the scan. These flags are defined in
+ include/my_base.h
+ index_read_idx is the same but also initializes index before calling doing
+ the same thing as index_read. Thus it is similar to index_init followed
+ by index_read. This is also how we implement it.
+
+ index_read/index_read_idx does also return the first row. Thus for
+ key lookups, the index_read will be the only call to the handler in
+ the index scan.
+
+ index_init initializes an index before using it and index_end does
+ any end processing needed.
+ */
+ virtual int index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ virtual int index_read_idx(byte * buf, uint idx, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ virtual int index_init(uint idx, bool sorted);
+ virtual int index_end();
+
+ /*
+ These methods are used to jump to next or previous entry in the index
+ scan. There are also methods to jump to first and last entry.
+ */
+ virtual int index_next(byte * buf);
+ virtual int index_prev(byte * buf);
+ virtual int index_first(byte * buf);
+ virtual int index_last(byte * buf);
+ virtual int index_next_same(byte * buf, const byte * key, uint keylen);
+ virtual int index_read_last(byte * buf, const byte * key, uint keylen);
+
+ /*
+ read_first_row is virtual method but is only implemented by
+ handler.cc, no storage engine has implemented it so neither
+ will the partition handler.
+
+ virtual int read_first_row(byte *buf, uint primary_key);
+ */
+
+ /*
+ We don't implement multi read range yet, will do later.
+ virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
+ KEY_MULTI_RANGE *ranges, uint range_count,
+ bool sorted, HANDLER_BUFFER *buffer);
+ virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+ */
+
+
+ virtual int read_range_first(const key_range * start_key,
+ const key_range * end_key,
+ bool eq_range, bool sorted);
+ virtual int read_range_next();
+
+private:
+ int common_index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ int common_first_last(byte * buf);
+ int partition_scan_set_up(byte * buf, bool idx_read_flag);
+ int handle_unordered_next(byte * buf, bool next_same);
+ int handle_unordered_scan_next_partition(byte * buf);
+ byte *queue_buf(uint part_id)
+ {
+ return (m_ordered_rec_buffer +
+ (part_id * (m_rec_length + PARTITION_BYTES_IN_POS)));
+ }
+ byte *rec_buf(uint part_id)
+ {
+ return (queue_buf(part_id) +
+ PARTITION_BYTES_IN_POS);
+ }
+ int handle_ordered_index_scan(byte * buf);
+ int handle_ordered_next(byte * buf, bool next_same);
+ int handle_ordered_prev(byte * buf);
+ void return_top_record(byte * buf);
+ void include_partition_fields_in_used_fields();
+public:
+ /*
+ -------------------------------------------------------------------------
+ MODULE information calls
+ -------------------------------------------------------------------------
+ This calls are used to inform the handler of specifics of the ongoing
+ scans and other actions. Most of these are used for optimisation
+ purposes.
+ -------------------------------------------------------------------------
+ */
+ virtual void info(uint);
+ virtual int extra(enum ha_extra_function operation);
+ virtual int extra_opt(enum ha_extra_function operation, ulong cachesize);
+ virtual int reset(void);
+
+private:
+ static const uint NO_CURRENT_PART_ID= 0xFFFFFFFF;
+ int loop_extra(enum ha_extra_function operation);
+ void late_extra_cache(uint partition_id);
+ void late_extra_no_cache(uint partition_id);
+ void prepare_extra_cache(uint cachesize);
+public:
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE optimiser support
+ -------------------------------------------------------------------------
+ -------------------------------------------------------------------------
+ */
+
+ /*
+ NOTE !!!!!!
+ -------------------------------------------------------------------------
+ -------------------------------------------------------------------------
+ One important part of the public handler interface that is not depicted in
+ the methods is the attribute records
+
+ which is defined in the base class. This is looked upon directly and is
+ set by calling info(HA_STATUS_INFO) ?
+ -------------------------------------------------------------------------
+ */
+
+ /*
+ keys_to_use_for_scanning can probably be implemented as the
+ intersection of all underlying handlers if mixed handlers are used.
+ This method is used to derive whether an index can be used for
+ index-only scanning when performing an ORDER BY query.
+ Only called from one place in sql_select.cc
+ */
+ virtual const key_map *keys_to_use_for_scanning();
+
+ /*
+ Called in test_quick_select to determine if indexes should be used.
+ */
+ virtual double scan_time();
+
+ /*
+ The next method will never be called if you do not implement indexes.
+ */
+ virtual double read_time(uint index, uint ranges, ha_rows rows);
+ /*
+ For the given range how many records are estimated to be in this range.
+ Used by optimiser to calculate cost of using a particular index.
+ */
+ virtual ha_rows records_in_range(uint inx, key_range * min_key,
+ key_range * max_key);
+
+ /*
+ Upper bound of number records returned in scan is sum of all
+ underlying handlers.
+ */
+ virtual ha_rows estimate_rows_upper_bound();
+
+ /*
+ table_cache_type is implemented by the underlying handler but all
+ underlying handlers must have the same implementation for it to work.
+ */
+ virtual uint8 table_cache_type();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE print messages
+ -------------------------------------------------------------------------
+ This module contains various methods that returns text messages for
+ table types, index type and error messages.
+ -------------------------------------------------------------------------
+ */
+ /*
+ The name of the index type that will be used for display
+ Here we must ensure that all handlers use the same index type
+ for each index created.
+ */
+ virtual const char *index_type(uint inx);
+
+ /* The name of the table type that will be used for display purposes */
+ virtual const char *table_type() const
+ { return "PARTITION"; }
+
+ /*
+ Handler specific error messages
+ */
+ virtual void print_error(int error, myf errflag);
+ virtual bool get_error_message(int error, String * buf);
+ /*
+ -------------------------------------------------------------------------
+ MODULE handler characteristics
+ -------------------------------------------------------------------------
+ This module contains a number of methods defining limitations and
+ characteristics of the handler. The partition handler will calculate
+ this characteristics based on underlying handler characteristics.
+ -------------------------------------------------------------------------
+
+ This is a list of flags that says what the storage engine
+ implements. The current table flags are documented in handler.h
+ The partition handler will support whatever the underlying handlers
+ support except when specifically mentioned below about exceptions
+ to this rule.
+
+ HA_READ_RND_SAME:
+ Not currently used. (Means that the handler supports the rnd_same() call)
+ (MyISAM, HEAP)
+
+ HA_TABLE_SCAN_ON_INDEX:
+ Used to avoid scanning full tables on an index. If this flag is set then
+ the handler always has a primary key (hidden if not defined) and this
+ index is used for scanning rather than a full table scan in all
+ situations.
+ (InnoDB, BDB, Federated)
+
+ HA_REC_NOT_IN_SEQ:
+ This flag is set for handlers that cannot guarantee that the rows are
+ returned accroding to incremental positions (0, 1, 2, 3...).
+ This also means that rnd_next() should return HA_ERR_RECORD_DELETED
+ if it finds a deleted row.
+ (MyISAM (not fixed length row), BDB, HEAP, NDB, InooDB)
+
+ HA_CAN_GEOMETRY:
+ Can the storage engine handle spatial data.
+ Used to check that no spatial attributes are declared unless
+ the storage engine is capable of handling it.
+ (MyISAM)
+
+ HA_FAST_KEY_READ:
+ Setting this flag indicates that the handler is equally fast in
+ finding a row by key as by position.
+ This flag is used in a very special situation in conjunction with
+ filesort's. For further explanation see intro to init_read_record.
+ (BDB, HEAP, InnoDB)
+
+ HA_NULL_IN_KEY:
+ Is NULL values allowed in indexes.
+ If this is not allowed then it is not possible to use an index on a
+ NULLable field.
+ (BDB, HEAP, MyISAM, NDB, InnoDB)
+
+ HA_DUPP_POS:
+ Tells that we can the position for the conflicting duplicate key
+ record is stored in table->file->dupp_ref. (insert uses rnd_pos() on
+ this to find the duplicated row)
+ (MyISAM)
+
+ HA_CAN_INDEX_BLOBS:
+ Is the storage engine capable of defining an index of a prefix on
+ a BLOB attribute.
+ (BDB, Federated, MyISAM, InnoDB)
+
+ HA_AUTO_PART_KEY:
+ Auto increment fields can be part of a multi-part key. For second part
+ auto-increment keys, the auto_incrementing is done in handler.cc
+ (BDB, Federated, MyISAM, NDB)
+
+ HA_REQUIRE_PRIMARY_KEY:
+ Can't define a table without primary key (and cannot handle a table
+ with hidden primary key)
+ (No handler has this limitation currently)
+
+ HA_NOT_EXACT_COUNT:
+ Does the counter of records after the info call specify an exact
+ value or not. If it doesn't this flag is set.
+ Only MyISAM and HEAP uses exact count.
+ (MyISAM, HEAP, BDB, InnoDB, NDB, Federated)
+
+ HA_CAN_INSERT_DELAYED:
+ Can the storage engine support delayed inserts.
+ To start with the partition handler will not support delayed inserts.
+ Further investigation needed.
+ (HEAP, MyISAM)
+
+ HA_PRIMARY_KEY_IN_READ_INDEX:
+ This parameter is set when the handler will also return the primary key
+ when doing read-only-key on another index.
+
+ HA_NOT_DELETE_WITH_CACHE:
+ Seems to be an old MyISAM feature that is no longer used. No handler
+ has it defined but it is checked in init_read_record.
+ Further investigation needed.
+ (No handler defines it)
+
+ HA_NO_PREFIX_CHAR_KEYS:
+ Indexes on prefixes of character fields is not allowed.
+ (NDB)
+
+ HA_CAN_FULLTEXT:
+ Does the storage engine support fulltext indexes
+ The partition handler will start by not supporting fulltext indexes.
+ (MyISAM)
+
+ HA_CAN_SQL_HANDLER:
+ Can the HANDLER interface in the MySQL API be used towards this
+ storage engine.
+ (MyISAM, InnoDB)
+
+ HA_NO_AUTO_INCREMENT:
+ Set if the storage engine does not support auto increment fields.
+ (Currently not set by any handler)
+
+ HA_HAS_CHECKSUM:
+ Special MyISAM feature. Has special SQL support in CREATE TABLE.
+ No special handling needed by partition handler.
+ (MyISAM)
+
+ HA_FILE_BASED:
+ Should file names always be in lower case (used by engines
+ that map table names to file names.
+ Since partition handler has a local file this flag is set.
+ (BDB, Federated, MyISAM)
+
+ HA_CAN_BIT_FIELD:
+ Is the storage engine capable of handling bit fields?
+ (MyISAM, NDB)
+
+ HA_NEED_READ_RANGE_BUFFER:
+ Is Read Multi-Range supported => need multi read range buffer
+ This parameter specifies whether a buffer for read multi range
+ is needed by the handler. Whether the handler supports this
+ feature or not is dependent of whether the handler implements
+ read_multi_range* calls or not. The only handler currently
+ supporting this feature is NDB so the partition handler need
+ not handle this call. There are methods in handler.cc that will
+ transfer those calls into index_read and other calls in the
+ index scan module.
+ (NDB)
+ */
+ virtual ulong table_flags() const
+ { return m_table_flags; }
+ /*
+ HA_CAN_PARTITION:
+ Used by storage engines that can handle partitioning without this
+ partition handler
+ (Partition, NDB)
+
+ HA_CAN_UPDATE_PARTITION_KEY:
+ Set if the handler can update fields that are part of the partition
+ function.
+
+ HA_CAN_PARTITION_UNIQUE:
+ Set if the handler can handle unique indexes where the fields of the
+ unique key are not part of the fields of the partition function. Thus
+ a unique key can be set on all fields.
+ */
+ virtual ulong partition_flags() const
+ { return HA_CAN_PARTITION; }
+
+ /*
+ This is a bitmap of flags that says how the storage engine
+ implements indexes. The current index flags are documented in
+ handler.h. If you do not implement indexes, just return zero
+ here.
+
+ part is the key part to check. First key part is 0
+ If all_parts it's set, MySQL want to know the flags for the combined
+ index up to and including 'part'.
+
+ HA_READ_NEXT:
+ Does the index support read next, this is assumed in the server
+ code and never checked so all indexes must support this.
+ Note that the handler can be used even if it doesn't have any index.
+ (BDB, HEAP, MyISAM, Federated, NDB, InnoDB)
+
+ HA_READ_PREV:
+ Can the index be used to scan backwards.
+ (BDB, HEAP, MyISAM, NDB, InnoDB)
+
+ HA_READ_ORDER:
+ Can the index deliver its record in index order. Typically true for
+ all ordered indexes and not true for hash indexes.
+ In first step this is not true for partition handler until a merge
+ sort has been implemented in partition handler.
+ Used to set keymap part_of_sortkey
+ This keymap is only used to find indexes usable for resolving an ORDER BY
+ in the query. Thus in most cases index_read will work just fine without
+ order in result production. When this flag is set it is however safe to
+ order all output started by index_read since most engines do this. With
+ read_multi_range calls there is a specific flag setting order or not
+ order so in those cases ordering of index output can be avoided.
+ (BDB, InnoDB, HEAP, MyISAM, NDB)
+
+ HA_READ_RANGE:
+ Specify whether index can handle ranges, typically true for all
+ ordered indexes and not true for hash indexes.
+ Used by optimiser to check if ranges (as key >= 5) can be optimised
+ by index.
+ (BDB, InnoDB, NDB, MyISAM, HEAP)
+
+ HA_ONLY_WHOLE_INDEX:
+ Can't use part key searches. This is typically true for hash indexes
+ and typically not true for ordered indexes.
+ (Federated, NDB, HEAP)
+
+ HA_KEYREAD_ONLY:
+ Does the storage engine support index-only scans on this index.
+ Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD
+ Used to set key_map keys_for_keyread and to check in optimiser for
+ index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler
+ only have to fill in the columns the key covers. If
+ HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns
+ must be updated in the row.
+ (BDB, InnoDB, MyISAM)
+ */
+ virtual ulong index_flags(uint inx, uint part, bool all_parts) const
+ {
+ return m_file[0]->index_flags(inx, part, all_parts);
+ }
+
+ /*
+ extensions of table handler files
+ */
+ virtual const char **bas_ext() const;
+ /*
+ unireg.cc will call the following to make sure that the storage engine
+ can handle the data it is about to send.
+
+ The maximum supported values is the minimum of all handlers in the table
+ */
+ uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const;
+ virtual uint max_supported_record_length() const;
+ virtual uint max_supported_keys() const;
+ virtual uint max_supported_key_parts() const;
+ virtual uint max_supported_key_length() const;
+ virtual uint max_supported_key_part_length() const;
+
+ /*
+ All handlers in a partitioned table must have the same low_byte_first
+ */
+ virtual bool low_byte_first() const
+ { return m_low_byte_first; }
+
+ /*
+ The extra record buffer length is the maximum needed by all handlers.
+ The minimum record length is the maximum of all involved handlers.
+ */
+ virtual uint extra_rec_buf_length() const;
+ virtual uint min_record_length(uint options) const;
+
+ /*
+ Transactions on the table is supported if all handlers below support
+ transactions.
+ */
+ virtual bool has_transactions()
+ { return m_has_transactions; }
+
+ /*
+ Primary key is clustered can only be true if all underlying handlers have
+ this feature.
+ */
+ virtual bool primary_key_is_clustered()
+ { return m_pkey_is_clustered; }
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE compare records
+ -------------------------------------------------------------------------
+ cmp_ref checks if two references are the same. For most handlers this is
+ a simple memcmp of the reference. However some handlers use primary key
+ as reference and this can be the same even if memcmp says they are
+ different. This is due to character sets and end spaces and so forth.
+ For the partition handler the reference is first two bytes providing the
+ partition identity of the referred record and then the reference of the
+ underlying handler.
+ Thus cmp_ref for the partition handler always returns FALSE for records
+ not in the same partition and uses cmp_ref on the underlying handler
+ to check whether the rest of the reference part is also the same.
+ -------------------------------------------------------------------------
+ */
+ virtual int cmp_ref(const byte * ref1, const byte * ref2);
+ /*
+ -------------------------------------------------------------------------
+ MODULE auto increment
+ -------------------------------------------------------------------------
+ This module is used to handle the support of auto increments.
+
+ This variable in the handler is used as part of the handler interface
+ It is maintained by the parent handler object and should not be
+ touched by child handler objects (see handler.cc for its use).
+
+ auto_increment_column_changed
+ -------------------------------------------------------------------------
+ */
+ virtual void restore_auto_increment();
+ virtual ulonglong get_auto_increment();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE initialise handler for HANDLER call
+ -------------------------------------------------------------------------
+ This method is a special InnoDB method called before a HANDLER query.
+ -------------------------------------------------------------------------
+ */
+ virtual void init_table_handle_for_HANDLER();
+
+ /*
+ The remainder of this file defines the handler methods not implemented
+ by the partition handler
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE foreign key support
+ -------------------------------------------------------------------------
+ The following methods are used to implement foreign keys as supported by
+ InnoDB. Implement this ??
+ get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual
+ description of how the CREATE TABLE part to define FOREIGN KEY's is done.
+ free_foreign_key_create_info is used to free the memory area that provided
+ this description.
+ -------------------------------------------------------------------------
+
+ virtual char* get_foreign_key_create_info()
+ virtual void free_foreign_key_create_info(char* str)
+
+ virtual int get_foreign_key_list(THD *thd,
+ List<FOREIGN_KEY_INFO> *f_key_list)
+ virtual uint referenced_by_foreign_key()
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE fulltext index
+ -------------------------------------------------------------------------
+ Fulltext stuff not yet.
+ -------------------------------------------------------------------------
+ virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
+ virtual FT_INFO *ft_init_ext(uint flags,uint inx,const byte *key,
+ uint keylen)
+ { return NULL; }
+ virtual int ft_read(byte *buf) { return HA_ERR_WRONG_COMMAND; }
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE restart full table scan at position (MyISAM)
+ -------------------------------------------------------------------------
+ The following method is only used by MyISAM when used as
+ temporary tables in a join.
+ virtual int restart_rnd_next(byte *buf, byte *pos);
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE on-line ALTER TABLE
+ -------------------------------------------------------------------------
+ These methods are in the handler interface but never used (yet)
+ They are to be used by on-line alter table add/drop index:
+ -------------------------------------------------------------------------
+ virtual ulong index_ddl_flags(KEY *wanted_index) const
+ virtual int add_index(TABLE *table_arg,KEY *key_info,uint num_of_keys);
+ virtual int drop_index(TABLE *table_arg,uint *key_num,uint num_of_keys);
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE tablespace support
+ -------------------------------------------------------------------------
+ Admin of table spaces is not applicable to the partition handler (InnoDB)
+ This means that the following method is not implemented:
+ -------------------------------------------------------------------------
+ virtual int discard_or_import_tablespace(my_bool discard)
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE admin MyISAM
+ -------------------------------------------------------------------------
+ Admin commands not supported currently (almost purely MyISAM routines)
+ This means that the following methods are not implemented:
+ -------------------------------------------------------------------------
+
+ virtual int check(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int backup(TD* thd, HA_CHECK_OPT *check_opt);
+ virtual int restore(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int repair(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int preload_keys(THD *thd, HA_CHECK_OPT *check_opt);
+ virtual bool check_and_repair(THD *thd);
+ virtual int dump(THD* thd, int fd = -1);
+ virtual int net_read_dump(NET* net);
+ virtual uint checksum() const;
+ virtual bool is_crashed() const;
+ virtual bool auto_repair() const;
+
+ -------------------------------------------------------------------------
+ MODULE enable/disable indexes
+ -------------------------------------------------------------------------
+ Enable/Disable Indexes are not supported currently (Heap, MyISAM)
+ This means that the following methods are not implemented:
+ -------------------------------------------------------------------------
+ virtual int disable_indexes(uint mode);
+ virtual int enable_indexes(uint mode);
+ virtual int indexes_are_disabled(void);
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE append_create_info
+ -------------------------------------------------------------------------
+ append_create_info is only used by MyISAM MERGE tables and the partition
+ handler will not support this handler as underlying handler.
+ Implement this??
+ -------------------------------------------------------------------------
+ virtual void append_create_info(String *packet)
+ */
+};
diff --git a/sql/handler.cc b/sql/handler.cc
index bf2ce0dad4e..512363f71d7 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -34,6 +34,9 @@
#ifdef HAVE_EXAMPLE_DB
#include "examples/ha_example.h"
#endif
+#ifdef HAVE_PARTITION_DB
+#include "ha_partition.h"
+#endif
#ifdef HAVE_ARCHIVE_DB
#include "examples/ha_archive.h"
#endif
@@ -170,7 +173,13 @@ enum db_type ha_checktype(THD *thd, enum db_type database_type,
{
if (ha_storage_engine_is_enabled(database_type))
return database_type;
-
+#ifdef HAVE_PARTITION_DB
+ /*
+ Partition handler is not in the list of handlers shown since it is an internal handler
+ */
+ if (database_type == DB_TYPE_PARTITION_DB)
+ return database_type;
+#endif
if (no_substitute)
{
if (report_error)
@@ -236,6 +245,13 @@ handler *get_new_handler(TABLE *table, enum db_type db_type)
file= new ha_example(table);
break;
#endif
+#ifdef HAVE_PARTITION_DB
+ case DB_TYPE_PARTITION_DB:
+ {
+ file= new ha_partition(table);
+ break;
+ }
+#endif
#ifdef HAVE_ARCHIVE_DB
case DB_TYPE_ARCHIVE_DB:
file= new ha_archive(table);
@@ -290,6 +306,29 @@ handler *get_new_handler(TABLE *table, enum db_type db_type)
return file;
}
+
+#ifdef HAVE_PARTITION_DB
+handler *get_ha_partition(partition_info *part_info)
+{
+ ha_partition *partition;
+ DBUG_ENTER("get_ha_partition");
+ if ((partition= new ha_partition(part_info)))
+ {
+ if (partition->ha_initialise())
+ {
+ delete partition;
+ partition= 0;
+ }
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(ha_partition));
+ }
+ DBUG_RETURN(((handler*) partition));
+}
+#endif
+
+
/*
Register handler error messages for use with my_error().
@@ -1406,27 +1445,41 @@ int handler::ha_allocate_read_write_set(ulong no_fields)
my_bool r;
#endif
DBUG_ENTER("ha_allocate_read_write_set");
- DBUG_PRINT("info", ("no_fields = %d", no_fields));
- read_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP));
- write_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP));
- read_buf= (uint32*)sql_alloc(bitmap_size);
- write_buf= (uint32*)sql_alloc(bitmap_size);
- if (!read_set || !write_set || !read_buf || !write_buf)
- {
- ha_deallocate_read_write_set();
- DBUG_RETURN(TRUE);
- }
+ DBUG_PRINT("enter", ("no_fields = %d", no_fields));
+
+ if (table)
+ {
+ if (table->read_set == NULL)
+ {
+ read_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP));
+ write_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP));
+ read_buf= (uint32*)sql_alloc(bitmap_size);
+ write_buf= (uint32*)sql_alloc(bitmap_size);
+ if (!read_set || !write_set || !read_buf || !write_buf)
+ {
+ ha_deallocate_read_write_set();
+ DBUG_RETURN(TRUE);
+ }
#ifndef DEBUG_OFF
- r =
+ r =
#endif
- bitmap_init(read_set, read_buf, no_fields+1, FALSE);
- DBUG_ASSERT(!r /*bitmap_init(read_set...)*/);
+ bitmap_init(read_set, read_buf, no_fields+1, FALSE);
+ DBUG_ASSERT(!r /*bitmap_init(read_set...)*/);
#ifndef DEBUG_OFF
- r =
+ r =
#endif
- bitmap_init(write_set, write_buf, no_fields+1, FALSE);
- DBUG_ASSERT(!r /*bitmap_init(write_set...)*/);
- ha_clear_all_set();
+ bitmap_init(write_set, write_buf, no_fields+1, FALSE);
+ DBUG_ASSERT(!r /*bitmap_init(write_set...)*/);
+ table->read_set= read_set;
+ table->write_set= write_set;
+ ha_clear_all_set();
+ }
+ else
+ {
+ read_set= table->read_set;
+ write_set= table->write_set;
+ }
+ }
DBUG_RETURN(FALSE);
}
@@ -1476,6 +1529,8 @@ void handler::ha_set_primary_key_in_read_set()
}
DBUG_VOID_RETURN;
}
+
+
/*
Read first row (only) from a table
This is never called for InnoDB or BDB tables, as these table types
@@ -1504,7 +1559,7 @@ int handler::read_first_row(byte * buf, uint primary_key)
else
{
/* Find the first row through the primary key */
- (void) ha_index_init(primary_key);
+ (void) ha_index_init(primary_key, 0);
error=index_first(buf);
(void) ha_index_end();
}
@@ -1688,7 +1743,7 @@ ulonglong handler::get_auto_increment()
int error;
(void) extra(HA_EXTRA_KEYREAD);
- index_init(table->s->next_number_index);
+ index_init(table->s->next_number_index, 1);
if (!table->s->next_number_key_offset)
{ // Autoincrement at key-start
error=index_last(table->record[1]);
@@ -2512,7 +2567,7 @@ int handler::compare_key(key_range *range)
int handler::index_read_idx(byte * buf, uint index, const byte * key,
uint key_len, enum ha_rkey_function find_flag)
{
- int error= ha_index_init(index);
+ int error= ha_index_init(index, 0);
if (!error)
error= index_read(buf, key, key_len, find_flag);
if (!error)
diff --git a/sql/handler.h b/sql/handler.h
index d7fe19ad884..d06ae062fb3 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -89,6 +89,11 @@
#define HA_NEED_READ_RANGE_BUFFER (1 << 29) /* for read_multi_range */
#define HA_ANY_INDEX_MAY_BE_UNIQUE (1 << 30)
+/* Flags for partition handlers */
+#define HA_CAN_PARTITION (1 << 0) /* Partition support */
+#define HA_CAN_UPDATE_PARTITION_KEY (1 << 1)
+#define HA_CAN_PARTITION_UNIQUE (1 << 2)
+
/* bits in index_flags(index_number) for what you can do with index */
#define HA_READ_NEXT 1 /* TODO really use this flag */
@@ -172,6 +177,7 @@ enum db_type
DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB,
DB_TYPE_FEDERATED_DB,
DB_TYPE_BLACKHOLE_DB,
+ DB_TYPE_PARTITION_DB,
DB_TYPE_DEFAULT // Must be last
};
@@ -364,6 +370,208 @@ typedef struct st_thd_trans
enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED,
ISO_REPEATABLE_READ, ISO_SERIALIZABLE};
+
+typedef struct {
+ uint32 start_part;
+ uint32 end_part;
+ bool use_bit_array;
+} part_id_range;
+/**
+ * An enum and a struct to handle partitioning and subpartitioning.
+ */
+enum partition_type {
+ NOT_A_PARTITION= 0,
+ RANGE_PARTITION,
+ HASH_PARTITION,
+ LIST_PARTITION
+};
+
+#define UNDEF_NODEGROUP 65535
+class Item;
+
+class partition_element :public Sql_alloc {
+public:
+ List<partition_element> subpartitions;
+ List<Item> list_expr_list;
+ ulonglong part_max_rows;
+ ulonglong part_min_rows;
+ char *partition_name;
+ char *tablespace_name;
+ Item* range_expr;
+ char* part_comment;
+ char* data_file_name;
+ char* index_file_name;
+ enum db_type engine_type;
+ uint16 nodegroup_id;
+
+ partition_element()
+ : part_max_rows(0), part_min_rows(0), partition_name(NULL),
+ tablespace_name(NULL), range_expr(NULL), part_comment(NULL),
+ data_file_name(NULL), index_file_name(NULL),
+ engine_type(DB_TYPE_UNKNOWN), nodegroup_id(UNDEF_NODEGROUP)
+ {
+ subpartitions.empty();
+ list_expr_list.empty();
+ }
+ ~partition_element() {}
+};
+
+typedef struct {
+ longlong list_value;
+ uint partition_id;
+} LIST_PART_ENTRY;
+enum Item_result;
+
+class partition_info;
+
+typedef bool (*get_part_id_func)(partition_info *part_info,
+ uint32 *part_id);
+typedef uint32 (*get_subpart_id_func)(partition_info *part_info);
+
+class partition_info :public Sql_alloc {
+public:
+ /*
+ * Here comes a set of definitions needed for partitioned table handlers.
+ */
+ List<partition_element> partitions;
+
+ List<char> part_field_list;
+ List<char> subpart_field_list;
+
+ get_part_id_func get_partition_id;
+ get_part_id_func get_part_partition_id;
+ get_subpart_id_func get_subpartition_id;
+
+ Field **part_field_array;
+ Field **subpart_field_array;
+ Field **full_part_field_array;
+
+ Item *part_expr;
+ Item *subpart_expr;
+
+ Item *item_free_list;
+
+ union {
+ longlong *range_int_array;
+ LIST_PART_ENTRY *list_array;
+ };
+ char* part_info_string;
+
+ char *part_func_string;
+ char *subpart_func_string;
+
+ partition_element *curr_part_elem;
+ partition_element *current_partition;
+ /*
+ These key_map's are used for Partitioning to enable quick decisions
+ on whether we can derive more information about which partition to
+ scan just by looking at what index is used.
+ */
+ key_map all_fields_in_PF, all_fields_in_PPF, all_fields_in_SPF;
+ key_map some_fields_in_PF;
+
+ enum db_type default_engine_type;
+ Item_result part_result_type;
+ partition_type part_type;
+ partition_type subpart_type;
+
+ uint part_info_len;
+ uint part_func_len;
+ uint subpart_func_len;
+
+ uint no_full_parts;
+ uint no_parts;
+ uint no_subparts;
+ uint count_curr_parts;
+ uint count_curr_subparts;
+
+ uint part_error_code;
+
+ uint no_list_values;
+
+ uint no_part_fields;
+ uint no_subpart_fields;
+ uint no_full_part_fields;
+
+ uint16 linear_hash_mask;
+
+ bool use_default_partitions;
+ bool use_default_subpartitions;
+ bool defined_max_value;
+ bool list_of_part_fields;
+ bool list_of_subpart_fields;
+ bool linear_hash_ind;
+
+ partition_info()
+ : get_partition_id(NULL), get_part_partition_id(NULL),
+ get_subpartition_id(NULL),
+ part_field_array(NULL), subpart_field_array(NULL),
+ full_part_field_array(NULL),
+ part_expr(NULL), subpart_expr(NULL), item_free_list(NULL),
+ list_array(NULL),
+ part_info_string(NULL),
+ part_func_string(NULL), subpart_func_string(NULL),
+ curr_part_elem(NULL), current_partition(NULL),
+ default_engine_type(DB_TYPE_UNKNOWN),
+ part_result_type(INT_RESULT),
+ part_type(NOT_A_PARTITION), subpart_type(NOT_A_PARTITION),
+ part_info_len(0), part_func_len(0), subpart_func_len(0),
+ no_full_parts(0), no_parts(0), no_subparts(0),
+ count_curr_parts(0), count_curr_subparts(0), part_error_code(0),
+ no_list_values(0), no_part_fields(0), no_subpart_fields(0),
+ no_full_part_fields(0), linear_hash_mask(0),
+ use_default_partitions(TRUE),
+ use_default_subpartitions(TRUE), defined_max_value(FALSE),
+ list_of_part_fields(FALSE), list_of_subpart_fields(FALSE),
+ linear_hash_ind(FALSE)
+ {
+ all_fields_in_PF.clear_all();
+ all_fields_in_PPF.clear_all();
+ all_fields_in_SPF.clear_all();
+ some_fields_in_PF.clear_all();
+ partitions.empty();
+ part_field_list.empty();
+ subpart_field_list.empty();
+ }
+ ~partition_info() {}
+};
+
+
+#ifdef HAVE_PARTITION_DB
+/*
+ Answers the question if subpartitioning is used for a certain table
+ SYNOPSIS
+ is_sub_partitioned()
+ part_info A reference to the partition_info struct
+ RETURN VALUE
+ Returns true if subpartitioning used and false otherwise
+ DESCRIPTION
+ A routine to check for subpartitioning for improved readability of code
+*/
+inline
+bool is_sub_partitioned(partition_info *part_info)
+{ return (part_info->subpart_type == NOT_A_PARTITION ? FALSE : TRUE); }
+
+
+/*
+ Returns the total number of partitions on the leaf level.
+ SYNOPSIS
+ get_tot_partitions()
+ part_info A reference to the partition_info struct
+ RETURN VALUE
+ Returns the number of partitions
+ DESCRIPTION
+ A routine to check for number of partitions for improved readability
+ of code
+*/
+inline
+uint get_tot_partitions(partition_info *part_info)
+{
+ return part_info->no_parts *
+ (is_sub_partitioned(part_info) ? part_info->no_subparts : 1);
+}
+#endif
+
typedef struct st_ha_create_information
{
CHARSET_INFO *table_charset, *default_table_charset;
@@ -412,6 +620,31 @@ typedef struct st_ha_check_opt
} HA_CHECK_OPT;
+#ifdef HAVE_PARTITION_DB
+handler *get_ha_partition(partition_info *part_info);
+int get_parts_for_update(const byte *old_data, byte *new_data,
+ const byte *rec0, partition_info *part_info,
+ uint32 *old_part_id, uint32 *new_part_id);
+int get_part_for_delete(const byte *buf, const byte *rec0,
+ partition_info *part_info, uint32 *part_id);
+bool check_partition_info(partition_info *part_info,enum db_type eng_type,
+ handler *file, ulonglong max_rows);
+bool fix_partition_func(THD *thd, const char *name, TABLE *table);
+char *generate_partition_syntax(partition_info *part_info,
+ uint *buf_length, bool use_sql_alloc);
+bool partition_key_modified(TABLE *table, List<Item> &fields);
+void get_partition_set(const TABLE *table, byte *buf, const uint index,
+ const key_range *key_spec,
+ part_id_range *part_spec);
+void get_full_part_id_from_key(const TABLE *table, byte *buf,
+ KEY *key_info,
+ const key_range *key_spec,
+ part_id_range *part_spec);
+bool mysql_unpack_partition(File file, THD *thd, uint part_info_len,
+ TABLE *table);
+#endif
+
+
/*
This is a buffer area that the handler can use to store rows.
'end_of_used_area' should be kept updated after calls to
@@ -429,10 +662,13 @@ typedef struct st_handler_buffer
class handler :public Sql_alloc
{
+#ifdef HAVE_PARTITION_DB
+ friend class ha_partition;
+#endif
protected:
struct st_table *table; /* The table definition */
- virtual int index_init(uint idx) { active_index=idx; return 0; }
+ virtual int index_init(uint idx, bool sorted) { active_index=idx; return 0; }
virtual int index_end() { active_index=MAX_KEY; return 0; }
/*
rnd_init() can be called two times without rnd_end() in between
@@ -518,7 +754,7 @@ public:
{ return rows2double(ranges+rows); }
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
virtual bool has_transactions(){ return 0;}
- virtual uint extra_rec_buf_length() { return 0; }
+ virtual uint extra_rec_buf_length() const { return 0; }
/*
Return upper bound of current number of records in the table
@@ -537,12 +773,12 @@ public:
virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";}
- int ha_index_init(uint idx)
+ int ha_index_init(uint idx, bool sorted)
{
DBUG_ENTER("ha_index_init");
DBUG_ASSERT(inited==NONE);
inited=INDEX;
- DBUG_RETURN(index_init(idx));
+ DBUG_RETURN(index_init(idx, sorted));
}
int ha_index_end()
{
@@ -902,6 +1138,10 @@ public:
virtual const char *table_type() const =0;
virtual const char **bas_ext() const =0;
virtual ulong table_flags(void) const =0;
+#ifdef HAVE_PARTITION_DB
+ virtual ulong partition_flags(void) const { return 0;}
+ virtual int get_default_no_partitions(ulonglong max_rows) { return 1;}
+#endif
virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0;
virtual ulong index_ddl_flags(KEY *wanted_index) const
{ return (HA_DDL_SUPPORT); }
@@ -941,6 +1181,7 @@ public:
virtual int delete_table(const char *name);
virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0;
+ virtual int create_handler_files(const char *name) { return FALSE;}
/* lock_count() can be more than one if the table is a MERGE */
virtual uint lock_count(void) const { return 1; }
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index ad1c9977e5b..903f4c953a2 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1492,7 +1492,7 @@ int subselect_uniquesubquery_engine::exec()
}
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, 0);
error= table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT);
@@ -1545,7 +1545,7 @@ int subselect_indexsubquery_engine::exec()
}
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, 1);
error= table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT);
diff --git a/sql/key.cc b/sql/key.cc
index 4bd71d2fa47..f1e073a4775 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -429,3 +429,86 @@ int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length)
}
return 0; // Keys are equal
}
+
+
+/*
+ Compare two records in index order
+ SYNOPSIS
+ key_rec_cmp()
+ key Index information
+ rec0 Pointer to table->record[0]
+ first_rec Pointer to record compare with
+ second_rec Pointer to record compare against first_rec
+ DESCRIPTION
+ This method is set-up such that it can be called directly from the
+ priority queue and it is attempted to be optimised as much as possible
+ since this will be called O(N * log N) times while performing a merge
+ sort in various places in the code.
+
+ We retrieve the pointer to table->record[0] using the fact that key_parts
+ have an offset making it possible to calculate the start of the record.
+ We need to get the diff to the compared record since none of the records
+ being compared are stored in table->record[0].
+
+ We first check for NULL values, if there are no NULL values we use
+ a compare method that gets two field pointers and a max length
+ and return the result of the comparison.
+*/
+
+int key_rec_cmp(void *key, byte *first_rec, byte *second_rec)
+{
+ KEY *key_info= (KEY*)key;
+ uint key_parts= key_info->key_parts, i= 0;
+ KEY_PART_INFO *key_part= key_info->key_part;
+ char *rec0= key_part->field->ptr - key_part->offset;
+ my_ptrdiff_t first_diff= first_rec - rec0, sec_diff= second_rec - rec0;
+ int result= 0;
+ DBUG_ENTER("key_rec_cmp");
+
+ do
+ {
+ Field *field= key_part->field;
+ uint length;
+
+ if (key_part->null_bit)
+ {
+ /* The key_part can contain NULL values */
+ bool first_is_null= field->is_null(first_diff);
+ bool sec_is_null= field->is_null(sec_diff);
+ /*
+ NULL is smaller then everything so if first is NULL and the other
+ not then we know that we should return -1 and for the opposite
+ we should return +1. If both are NULL then we call it equality
+ although it is a strange form of equality, we have equally little
+ information of the real value.
+ */
+ if (!first_is_null)
+ {
+ if (!sec_is_null)
+ ; /* Fall through, no NULL fields */
+ else
+ {
+ DBUG_RETURN(+1);
+ }
+ }
+ else if (!sec_is_null)
+ {
+ DBUG_RETURN(-1);
+ }
+ else
+ goto next_loop; /* Both were NULL */
+ }
+ /*
+ No null values in the fields
+ We use the virtual method cmp_max with a max length parameter.
+ For most field types this translates into a cmp without
+ max length. The exceptions are the BLOB and VARCHAR field types
+ that take the max length into account.
+ */
+ result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff,
+ key_part->length);
+next_loop:
+ key_part++;
+ } while (!result && ++i < key_parts);
+ DBUG_RETURN(result);
+}
diff --git a/sql/lex.h b/sql/lex.h
index aa10328ced0..59ba6a8e15b 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -274,11 +274,14 @@ static SYMBOL symbols[] = {
{ "LEAVE", SYM(LEAVE_SYM)},
{ "LEAVES", SYM(LEAVES)},
{ "LEFT", SYM(LEFT)},
+ { "LESS", SYM(LESS_SYM)},
{ "LEVEL", SYM(LEVEL_SYM)},
{ "LIKE", SYM(LIKE)},
{ "LIMIT", SYM(LIMIT)},
+ { "LINEAR", SYM(LINEAR_SYM)},
{ "LINES", SYM(LINES)},
{ "LINESTRING", SYM(LINESTRING)},
+ { "LIST", SYM(LIST_SYM)},
{ "LOAD", SYM(LOAD)},
{ "LOCAL", SYM(LOCAL_SYM)},
{ "LOCALTIME", SYM(NOW_SYM)},
@@ -312,6 +315,7 @@ static SYMBOL symbols[] = {
{ "MAX_ROWS", SYM(MAX_ROWS)},
{ "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)},
{ "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)},
+ { "MAXVALUE", SYM(MAX_VALUE_SYM)},
{ "MEDIUM", SYM(MEDIUM_SYM)},
{ "MEDIUMBLOB", SYM(MEDIUMBLOB)},
{ "MEDIUMINT", SYM(MEDIUMINT)},
@@ -343,6 +347,7 @@ static SYMBOL symbols[] = {
{ "NEW", SYM(NEW_SYM)},
{ "NEXT", SYM(NEXT_SYM)},
{ "NO", SYM(NO_SYM)},
+ { "NODEGROUP", SYM(NODEGROUP_SYM)},
{ "NONE", SYM(NONE_SYM)},
{ "NOT", SYM(NOT_SYM)},
{ "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)},
@@ -365,6 +370,10 @@ static SYMBOL symbols[] = {
{ "OUTFILE", SYM(OUTFILE)},
{ "PACK_KEYS", SYM(PACK_KEYS_SYM)},
{ "PARTIAL", SYM(PARTIAL)},
+#ifdef HAVE_PARTITION_DB
+ { "PARTITION", SYM(PARTITION_SYM)},
+#endif
+ { "PARTITIONS", SYM(PARTITIONS_SYM)},
{ "PASSWORD", SYM(PASSWORD)},
{ "PHASE", SYM(PHASE_SYM)},
{ "POINT", SYM(POINT_SYM)},
@@ -385,6 +394,7 @@ static SYMBOL symbols[] = {
{ "RAID_CHUNKS", SYM(RAID_CHUNKS)},
{ "RAID_CHUNKSIZE", SYM(RAID_CHUNKSIZE)},
{ "RAID_TYPE", SYM(RAID_TYPE)},
+ { "RANGE", SYM(RANGE_SYM)},
{ "READ", SYM(READ_SYM)},
{ "READS", SYM(READS_SYM)},
{ "REAL", SYM(REAL)},
@@ -476,6 +486,8 @@ static SYMBOL symbols[] = {
{ "STRING", SYM(STRING_SYM)},
{ "STRIPED", SYM(RAID_STRIPED_SYM)},
{ "SUBJECT", SYM(SUBJECT_SYM)},
+ { "SUBPARTITION", SYM(SUBPARTITION_SYM)},
+ { "SUBPARTITIONS", SYM(SUBPARTITIONS_SYM)},
{ "SUPER", SYM(SUPER_SYM)},
{ "SUSPEND", SYM(SUSPEND_SYM)},
{ "TABLE", SYM(TABLE_SYM)},
@@ -485,6 +497,7 @@ static SYMBOL symbols[] = {
{ "TEMPTABLE", SYM(TEMPTABLE_SYM)},
{ "TERMINATED", SYM(TERMINATED)},
{ "TEXT", SYM(TEXT_SYM)},
+ { "THAN", SYM(THAN_SYM)},
{ "THEN", SYM(THEN_SYM)},
{ "TIME", SYM(TIME_SYM)},
{ "TIMESTAMP", SYM(TIMESTAMP)},
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 83f2903c8fc..91287a8bd40 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -614,6 +614,18 @@ bool check_table_access(THD *thd, ulong want_access, TABLE_LIST *tables,
bool no_errors);
bool check_global_access(THD *thd, ulong want_access);
+/*
+ General routine to change field->ptr of a NULL-terminated array of Field
+ objects. Useful when needed to call val_int, val_str or similar and the
+ field data is not in table->record[0] but in some other structure.
+ set_key_field_ptr changes all fields of an index using a key_info object.
+ All methods presume that there is at least one field to change.
+*/
+
+void set_field_ptr(Field **ptr, const byte *new_buf, const byte *old_buf);
+void set_key_field_ptr(KEY *key_info, const byte *new_buf,
+ const byte *old_buf);
+
bool mysql_backup_table(THD* thd, TABLE_LIST* table_list);
bool mysql_restore_table(THD* thd, TABLE_LIST* table_list);
@@ -772,6 +784,9 @@ Field *
find_field_in_real_table(THD *thd, TABLE *table, const char *name,
uint length, bool check_grants, bool allow_rowid,
uint *cached_field_index_ptr);
+Field *
+find_field_in_table_sef(TABLE *table, const char *name);
+
#ifdef HAVE_OPENSSL
#include <openssl/des.h>
struct st_des_keyblock
@@ -1020,6 +1035,7 @@ bool key_cmp_if_same(TABLE *form,const byte *key,uint index,uint key_length);
void key_unpack(String *to,TABLE *form,uint index);
bool check_if_key_used(TABLE *table, uint idx, List<Item> &fields);
int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length);
+int key_rec_cmp(void *key_info, byte *a, byte *b);
bool init_errmessage(void);
void sql_perror(const char *message);
@@ -1188,6 +1204,7 @@ extern SHOW_COMP_OPTION have_query_cache;
extern SHOW_COMP_OPTION have_geometry, have_rtree_keys;
extern SHOW_COMP_OPTION have_crypt;
extern SHOW_COMP_OPTION have_compress;
+extern SHOW_COMP_OPTION have_partition_db;
#ifndef __WIN__
extern pthread_t signal_thread;
@@ -1238,7 +1255,7 @@ bool mysql_create_frm(THD *thd, my_string file_name,
uint key_count,KEY *key_info,handler *db_type);
int rea_create_table(THD *thd, my_string file_name,HA_CREATE_INFO *create_info,
List<create_field> &create_field,
- uint key_count,KEY *key_info);
+ uint key_count,KEY *key_info, handler *file);
int format_number(uint inputflag,uint max_length,my_string pos,uint length,
my_string *errpos);
int openfrm(THD *thd, const char *name,const char *alias,uint filestat,
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 30bb84a3cb4..80670f2a445 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -325,6 +325,7 @@ my_bool opt_ndb_shm, opt_ndb_optimized_node_selection;
ulong opt_ndb_cache_check_time;
const char *opt_ndb_mgmd;
ulong opt_ndb_nodeid;
+bool opt_ndb_linear_hash;
#endif
my_bool opt_readonly, use_temp_pool, relay_log_purge;
my_bool opt_sync_frm, opt_allow_suspicious_udfs;
@@ -430,6 +431,7 @@ CHARSET_INFO *national_charset_info, *table_alias_charset;
SHOW_COMP_OPTION have_berkeley_db, have_innodb, have_isam, have_ndbcluster,
have_example_db, have_archive_db, have_csv_db;
SHOW_COMP_OPTION have_federated_db;
+SHOW_COMP_OPTION have_partition_db;
SHOW_COMP_OPTION have_raid, have_openssl, have_symlink, have_query_cache;
SHOW_COMP_OPTION have_geometry, have_rtree_keys;
SHOW_COMP_OPTION have_crypt, have_compress;
@@ -4235,6 +4237,7 @@ enum options_mysqld
OPT_NDB_FORCE_SEND, OPT_NDB_AUTOINCREMENT_PREFETCH_SZ,
OPT_NDB_SHM, OPT_NDB_OPTIMIZED_NODE_SELECTION, OPT_NDB_CACHE_CHECK_TIME,
OPT_NDB_MGMD, OPT_NDB_NODEID,
+ OPT_NDB_LINEAR_HASH,
OPT_SKIP_SAFEMALLOC,
OPT_TEMP_POOL, OPT_TX_ISOLATION, OPT_COMPLETION_TYPE,
OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS,
@@ -4800,6 +4803,16 @@ Disable with --skip-ndbcluster (will save memory).",
(gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz,
(gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz,
0, GET_ULONG, REQUIRED_ARG, 32, 1, 256, 0, 0, 0},
+ {"ndb-use-linear-hash", OPT_NDB_LINEAR_HASH,
+ "Flag to indicate whether to use linear hash for default in new tables",
+ (gptr*) &opt_ndb_linear_hash,
+ (gptr*) &opt_ndb_linear_hash,
+ 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
+ {"ndb_use_linear_hash", OPT_NDB_LINEAR_HASH,
+ "Flag to indicate whether to use linear hash for default in new tables",
+ (gptr*) &opt_ndb_linear_hash,
+ (gptr*) &opt_ndb_linear_hash,
+ 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
{"ndb-force-send", OPT_NDB_FORCE_SEND,
"Force send of buffers to ndb immediately without waiting for "
"other threads.",
@@ -6055,6 +6068,11 @@ static void mysql_init_variables(void)
#else
have_example_db= SHOW_OPTION_NO;
#endif
+#ifdef HAVE_PARTITION_DB
+ have_partition_db= SHOW_OPTION_YES;
+#else
+ have_partition_db= SHOW_OPTION_NO;
+#endif
#ifdef HAVE_ARCHIVE_DB
have_archive_db= SHOW_OPTION_YES;
#else
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index e04e15788fa..c3e73632c6f 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -751,7 +751,7 @@ int QUICK_RANGE_SELECT::init()
DBUG_ENTER("QUICK_RANGE_SELECT::init");
if (file->inited == handler::NONE)
- DBUG_RETURN(error= file->ha_index_init(index));
+ DBUG_RETURN(error= file->ha_index_init(index, 1));
error= 0;
DBUG_RETURN(0);
}
@@ -6049,7 +6049,7 @@ int QUICK_RANGE_SELECT::reset()
range= NULL;
cur_range= (QUICK_RANGE**) ranges.buffer;
- if (file->inited == handler::NONE && (error= file->ha_index_init(index)))
+ if (file->inited == handler::NONE && (error= file->ha_index_init(index,1)))
DBUG_RETURN(error);
/* Do not allocate the buffers twice. */
@@ -6308,7 +6308,7 @@ int QUICK_RANGE_SELECT_GEOM::get_next()
(byte*) range->min_key,
range->min_length,
(ha_rkey_function)(range->flag ^ GEOM_FLAG));
- if (result != HA_ERR_KEY_NOT_FOUND)
+ if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
DBUG_RETURN(result);
range=0; // Not found, to next range
}
@@ -6451,7 +6451,7 @@ int QUICK_SELECT_DESC::get_next()
}
if (result)
{
- if (result != HA_ERR_KEY_NOT_FOUND)
+ if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
DBUG_RETURN(result);
range=0; // Not found, to next range
continue;
@@ -8083,7 +8083,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
file->extra(HA_EXTRA_KEYREAD); /* We need only the key attributes */
- result= file->ha_index_init(index);
+ result= file->ha_index_init(index, 1);
result= file->index_last(record);
if (result == HA_ERR_END_OF_FILE)
DBUG_RETURN(0);
@@ -8159,7 +8159,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
DBUG_ASSERT(is_last_prefix <= 0);
if (result == HA_ERR_KEY_NOT_FOUND)
continue;
- else if (result)
+ if (result)
break;
if (have_min)
@@ -8189,10 +8189,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
HA_READ_KEY_EXACT);
result= have_min ? min_res : have_max ? max_res : result;
- }
- while (result == HA_ERR_KEY_NOT_FOUND && is_last_prefix != 0);
+ } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
+ is_last_prefix != 0);
if (result == 0)
+ {
/*
Partially mimic the behavior of end_select_send. Copy the
field data from Item_field::field into Item_field::result_field
@@ -8200,6 +8201,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
other fields in non-ANSI SQL mode).
*/
copy_fields(&join->tmp_table_param);
+ }
else if (result == HA_ERR_KEY_NOT_FOUND)
result= HA_ERR_END_OF_FILE;
@@ -8226,6 +8228,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
RETURN
0 on success
HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
+ HA_ERR_END_OF_FILE - "" -
other if some error occurred
*/
@@ -8279,7 +8282,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
key_restore(record, tmp_record, index_info, 0);
}
- else if (result == HA_ERR_KEY_NOT_FOUND)
+ else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
result= 0; /* There is a result in any case. */
}
}
@@ -8304,6 +8307,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
RETURN
0 on success
HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
+ HA_ERR_END_OF_FILE - "" -
other if some error occurred
*/
@@ -8404,6 +8408,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
0 on success
HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
the ranges
+ HA_ERR_END_OF_FILE - "" -
other if some error
*/
@@ -8448,11 +8453,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
result= file->index_read(record, group_prefix, search_prefix_len,
find_flag);
- if ((result == HA_ERR_KEY_NOT_FOUND) &&
- (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
- continue; /* Check the next range. */
- else if (result)
+ if (result)
{
+ if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
+ (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
+ continue; /* Check the next range. */
+
/*
In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
@@ -8479,7 +8485,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
/* Check if record belongs to the current group. */
if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
{
- result = HA_ERR_KEY_NOT_FOUND;
+ result= HA_ERR_KEY_NOT_FOUND;
continue;
}
@@ -8497,7 +8503,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) ||
(cmp_res <= 0)))
{
- result = HA_ERR_KEY_NOT_FOUND;
+ result= HA_ERR_KEY_NOT_FOUND;
continue;
}
}
@@ -8536,6 +8542,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
0 on success
HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
the ranges
+ HA_ERR_END_OF_FILE - "" -
other if some error
*/
@@ -8581,10 +8588,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
result= file->index_read(record, group_prefix, search_prefix_len,
find_flag);
- if ((result == HA_ERR_KEY_NOT_FOUND) && (cur_range->flag & EQ_RANGE))
- continue; /* Check the next range. */
if (result)
{
+ if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
+ (cur_range->flag & EQ_RANGE))
+ continue; /* Check the next range. */
+
/*
In no key was found with this upper bound, there certainly are no keys
in the ranges to the left.
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index 33c8eadc065..9802bbddde6 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -181,7 +181,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
const_result= 0;
break;
}
- error= table->file->ha_index_init((uint) ref.key);
+ error= table->file->ha_index_init((uint) ref.key, 1);
if (!ref.key_length)
error= table->file->index_first(table->record[0]);
@@ -253,7 +253,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
const_result= 0;
break;
}
- error= table->file->ha_index_init((uint) ref.key);
+ error= table->file->ha_index_init((uint) ref.key, 1);
if (!ref.key_length)
error= table->file->index_last(table->record[0]);
diff --git a/sql/records.cc b/sql/records.cc
index 9b05dc3e291..b3610cf1bbf 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -31,6 +31,74 @@ static int rr_cmp(uchar *a,uchar *b);
/* init struct for read with info->read_record */
+/*
+ init_read_record is used to scan by using a number of different methods.
+ Which method to use is set-up in this call so that later calls to
+ the info->read_record will call the appropriate method using a function
+ pointer.
+
+ There are five methods that relate completely to the sort function
+ filesort. The result of a filesort is retrieved using read_record
+ calls. The other two methods are used for normal table access.
+
+ The filesort will produce references to the records sorted, these
+ references can be stored in memory or in a temporary file.
+
+ The temporary file is normally used when the references doesn't fit into
+ a properly sized memory buffer. For most small queries the references
+ are stored in the memory buffer.
+
+ The temporary file is also used when performing an update where a key is
+ modified.
+
+ Methods used when ref's are in memory (using rr_from_pointers):
+ rr_unpack_from_buffer:
+ ----------------------
+ This method is used when table->sort.addon_field is allocated.
+ This is allocated for most SELECT queries not involving any BLOB's.
+ In this case the records are fetched from a memory buffer.
+ rr_from_pointers:
+ -----------------
+ Used when the above is not true, UPDATE, DELETE and so forth and
+ SELECT's involving BLOB's. It is also used when the addon_field
+ buffer is not allocated due to that its size was bigger than the
+ session variable max_length_for_sort_data.
+ In this case the record data is fetched from the handler using the
+ saved reference using the rnd_pos handler call.
+
+ Methods used when ref's are in a temporary file (using rr_from_tempfile)
+ rr_unpack_from_tempfile:
+ ------------------------
+ Same as rr_unpack_from_buffer except that references are fetched from
+ temporary file. Should obviously not really happen other than in
+ strange configurations.
+
+ rr_from_tempfile:
+ -----------------
+ Same as rr_from_pointers except that references are fetched from
+ temporary file instead of from
+ rr_from_cache:
+ --------------
+ This is a special variant of rr_from_tempfile that can be used for
+ handlers that is not using the HA_FAST_KEY_READ table flag. Instead
+ of reading the references one by one from the temporary file it reads
+ a set of them, sorts them and reads all of them into a buffer which
+ is then used for a number of subsequent calls to rr_from_cache.
+ It is only used for SELECT queries and a number of other conditions
+ on table size.
+
+ All other accesses use either index access methods (rr_quick) or a full
+ table scan (rr_sequential).
+ rr_quick:
+ ---------
+ rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to
+ perform an index scan. There are loads of functionality hidden
+ in these quick classes. It handles all index scans of various kinds.
+ rr_sequential:
+ --------------
+ This is the most basic access method of a table using rnd_init,
+ rnd_next and rnd_end. No indexes are used.
+*/
void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
SQL_SELECT *select,
int use_record_cache, bool print_error)
diff --git a/sql/set_var.cc b/sql/set_var.cc
index ae7e4bd844b..eb6cdfa37d0 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -786,6 +786,7 @@ struct show_var_st init_vars[]= {
{"have_isam", (char*) &have_isam, SHOW_HAVE},
{"have_ndbcluster", (char*) &have_ndbcluster, SHOW_HAVE},
{"have_openssl", (char*) &have_openssl, SHOW_HAVE},
+ {"have_partition_engine", (char*) &have_partition_db, SHOW_HAVE},
{"have_query_cache", (char*) &have_query_cache, SHOW_HAVE},
{"have_raid", (char*) &have_raid, SHOW_HAVE},
{"have_rtree_keys", (char*) &have_rtree_keys, SHOW_HAVE},
diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt
index f999f17aedf..466f1049dc5 100644
--- a/sql/share/errmsg.txt
+++ b/sql/share/errmsg.txt
@@ -5370,3 +5370,81 @@ ER_SCALE_BIGGER_THAN_PRECISION 42000 S1009
eng "Scale may not be larger than the precision (column '%-.64s')."
ER_WRONG_LOCK_OF_SYSTEM_TABLE
eng "You can't combine write-locking of system '%-.64s.%-.64s' table with other tables"
+ER_PARTITION_REQUIRES_VALUES_ERROR
+ eng "%s PARTITIONING requires definition of VALUES %s for each partition"
+ swe "%s PARTITIONering kräver definition av VALUES %s för varje partition"
+ER_PARTITION_WRONG_VALUES_ERROR
+ eng "Only %s PARTITIONING can use VALUES %s in partition definition"
+ swe "Endast %s partitionering kan använda VALUES %s i definition av partitionen"
+ER_PARTITION_MAXVALUE_ERROR
+ eng "MAXVALUE can only be used in last partition definition"
+ swe "MAXVALUE kan bara användas i definitionen av den sista partitionen"
+ER_PARTITION_SUBPARTITION_ERROR
+ eng "Subpartitions can only be hash partitions and by key"
+ swe "Subpartitioner kan bara vara hash och key partitioner"
+ER_PARTITION_WRONG_NO_PART_ERROR
+ eng "Wrong number of partitions defined, mismatch with previous setting"
+ swe "Antal partitioner definierade och antal partitioner är inte lika"
+ER_PARTITION_WRONG_NO_SUBPART_ERROR
+ eng "Wrong number of subpartitions defined, mismatch with previous setting"
+ swe "Antal subpartitioner definierade och antal subpartitioner är inte lika"
+ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR
+ eng "Constant/Random expression in (sub)partitioning function is not allowed"
+ swe "Konstanta uttryck eller slumpmässiga uttryck är inte tillåtna (sub)partitioneringsfunktioner"
+ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR
+ eng "Expression in RANGE/LIST VALUES must be constant"
+ swe "Uttryck i RANGE/LIST VALUES måste vara ett konstant uttryck"
+ER_FIELD_NOT_FOUND_PART_ERROR
+ eng "Field in list of fields for partition function not found in table"
+ swe "Fält i listan av fält för partitionering med key inte funnen i tabellen"
+ER_LIST_OF_FIELDS_ONLY_IN_HASH_ERROR
+ eng "List of fields is only allowed in KEY partitions"
+ swe "En lista av fält är endast tillåtet för KEY partitioner"
+ER_INCONSISTENT_PARTITION_INFO_ERROR
+ eng "The partition info in the frm file is not consistent with what can be written into the frm file"
+ swe "Partitioneringsinformationen i frm-filen är inte konsistent med vad som kan skrivas i frm-filen"
+ER_PARTITION_FUNC_NOT_ALLOWED_ERROR
+ eng "The %s function returns the wrong type"
+ swe "%s-funktionen returnerar felaktig typ"
+ER_PARTITIONS_MUST_BE_DEFINED_ERROR
+ eng "For %s partitions each partition must be defined"
+ swe "För %s partitionering så måste varje partition definieras"
+ER_RANGE_NOT_INCREASING_ERROR
+ eng "VALUES LESS THAN value must be strictly increasing for each partition"
+ swe "Värden i VALUES LESS THAN måste vara strikt växande för varje partition"
+ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR
+ eng "VALUES %s value must be of same type as partition function"
+ swe "Värden i VALUES %s måste vara av samma typ som partitioneringsfunktionen"
+ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR
+ eng "Multiple definition of same constant in list partitioning"
+ swe "Multipel definition av samma konstant i list partitionering"
+ER_PARTITION_ENTRY_ERROR
+ eng "Partitioning can not be used stand-alone in query"
+ swe "Partitioneringssyntax kan inte användas på egen hand i en SQL-fråga"
+ER_MIX_HANDLER_ERROR
+ eng "The mix of handlers in the partitions is not allowed in this version in MySQL"
+ swe "Denna mix av lagringsmotorer är inte tillåten i denna version av MySQL"
+ER_PARTITION_NOT_DEFINED_ERROR
+ eng "For the partitioned engine it is necessary to define all %s"
+ swe "För partitioneringsmotorn så är det nödvändigt att definiera alla %s"
+ER_TOO_MANY_PARTITIONS_ERROR
+ eng "Too many partitions were defined"
+ swe "För många partitioner definierades"
+ER_SUBPARTITION_ERROR
+ eng "It is only possible to mix RANGE/LIST partitioning with HASH/KEY partitioning for subpartitioning"
+ swe "Det är endast möjligt att blanda RANGE/LIST partitionering med HASH/KEY partitionering för subpartitionering"
+ER_CANT_CREATE_HANDLER_FILE
+ eng "Failed to create specific handler file"
+ swe "Misslyckades med att skapa specifik fil i lagringsmotor"
+ER_BLOB_FIELD_IN_PART_FUNC_ERROR
+ eng "A BLOB field is not allowed in partition function"
+ swe "Ett BLOB-fält är inte tillåtet i partitioneringsfunktioner"
+ER_CHAR_SET_IN_PART_FIELD_ERROR
+ eng "VARCHAR only allowed if binary collation for partition functions"
+ swe "VARCHAR endast tillåten med binär collation för partitioneringsfunktion"
+ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF
+ eng "A %s need to include all fields in the partition function"
+ swe "En %s behöver inkludera alla fält i partitioneringsfunktionen för denna lagringsmotor"
+ER_NO_PARTS_ERROR
+ eng "Number of %s = 0 is not an allowed value"
+ swe "Antal %s = 0 är inte ett tillåten värde"
diff --git a/sql/sp.cc b/sql/sp.cc
index 55087f47f5e..cf381762bac 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -799,7 +799,7 @@ db_show_routine_status(THD *thd, int type, const char *wild)
}
}
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 1);
if ((res= table->file->index_first(table->record[0])))
{
res= (res == HA_ERR_END_OF_FILE) ? 0 : SP_INTERNAL_ERROR;
@@ -849,7 +849,7 @@ sp_drop_db_routines(THD *thd, char *db)
goto err;
ret= SP_OK;
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 1);
if (! table->file->index_read(table->record[0],
key, keylen, HA_READ_KEY_EXACT))
{
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index 315103aa8b1..fdb7f7f069c 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -2048,7 +2048,7 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs)
key_copy(key, col_privs->record[0], col_privs->key_info, key_prefix_len);
col_privs->field[4]->store("",0, &my_charset_latin1);
- col_privs->file->ha_index_init(0);
+ col_privs->file->ha_index_init(0, 1);
if (col_privs->file->index_read(col_privs->record[0],
(byte*) key,
key_prefix_len, HA_READ_KEY_EXACT))
@@ -2193,7 +2193,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
List_iterator <LEX_COLUMN> iter(columns);
class LEX_COLUMN *column;
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 1);
while ((column= iter++))
{
ulong privileges= column->rights;
@@ -3168,8 +3168,8 @@ my_bool grant_init(THD *org_thd)
t_table = tables[0].table; c_table = tables[1].table;
p_table= tables[2].table;
- t_table->file->ha_index_init(0);
- p_table->file->ha_index_init(0);
+ t_table->file->ha_index_init(0, 1);
+ p_table->file->ha_index_init(0, 1);
if (!t_table->file->index_first(t_table->record[0]))
{
/* Will be restored by org_thd->store_globals() */
@@ -4473,7 +4473,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop,
user_key, key_prefix_length,
HA_READ_KEY_EXACT)))
{
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
{
table->file->print_error(error, MYF(0));
result= -1;
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 0013d478600..5d6dd29b998 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -2561,6 +2561,42 @@ find_field_in_table(THD *thd, TABLE_LIST *table_list,
/*
+ Find field in table, no side effects, only purpose is to check for field
+ in table object and get reference to the field if found.
+
+ SYNOPSIS
+ find_field_in_table_sef()
+
+ table table where to find
+ name Name of field searched for
+
+ RETURN
+ 0 field is not found
+ # pointer to field
+*/
+
+Field *find_field_in_table_sef(TABLE *table, const char *name)
+{
+ Field **field_ptr;
+ if (table->s->name_hash.records)
+ field_ptr= (Field**)hash_search(&table->s->name_hash,(byte*) name,
+ strlen(name));
+ else
+ {
+ if (!(field_ptr= table->field))
+ return (Field *)0;
+ for (; *field_ptr; ++field_ptr)
+ if (!my_strcasecmp(system_charset_info, (*field_ptr)->field_name, name))
+ break;
+ }
+ if (field_ptr)
+ return *field_ptr;
+ else
+ return (Field *)0;
+}
+
+
+/*
Find field in table
SYNOPSIS
@@ -2623,13 +2659,16 @@ Field *find_field_in_real_table(THD *thd, TABLE *table,
(bool)(thd->set_query_id-1));
if (field->query_id != thd->query_id)
{
+ if (table->get_fields_in_item_tree)
+ field->flags|= GET_FIXED_FIELDS_FLAG;
field->query_id=thd->query_id;
table->used_fields++;
table->used_keys.intersect(field->part_of_key);
}
else
thd->dupp_field=field;
- }
+ } else if (table->get_fields_in_item_tree)
+ field->flags|= GET_FIXED_FIELDS_FLAG;
#ifndef NO_EMBEDDED_ACCESS_CHECKS
if (check_grants && check_grant_column(thd, &table->grant,
table->s->db,
diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc
index e109600bcd0..84087db9719 100644
--- a/sql/sql_handler.cc
+++ b/sql/sql_handler.cc
@@ -461,7 +461,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
if (keyname)
{
table->file->ha_index_or_rnd_end();
- table->file->ha_index_init(keyno);
+ table->file->ha_index_init(keyno, 1);
error= table->file->index_first(table->record[0]);
}
else
@@ -483,7 +483,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
case RLAST:
DBUG_ASSERT(keyname != 0);
table->file->ha_index_or_rnd_end();
- table->file->ha_index_init(keyno);
+ table->file->ha_index_init(keyno, 1);
error= table->file->index_last(table->record[0]);
mode=RPREV;
break;
@@ -522,7 +522,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
if (!(key= (byte*) thd->calloc(ALIGN_SIZE(key_len))))
goto err;
table->file->ha_index_or_rnd_end();
- table->file->ha_index_init(keyno);
+ table->file->ha_index_init(keyno, 1);
key_copy(key, table->record[0], table->key_info + keyno, key_len);
error= table->file->index_read(table->record[0],
key,key_len,ha_rkey_mode);
diff --git a/sql/sql_help.cc b/sql/sql_help.cc
index 6780beec258..11045529a51 100644
--- a/sql/sql_help.cc
+++ b/sql/sql_help.cc
@@ -286,8 +286,8 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations,
rtopic_id= find_fields[help_relation_help_topic_id].field;
rkey_id= find_fields[help_relation_help_keyword_id].field;
- topics->file->ha_index_init(iindex_topic);
- relations->file->ha_index_init(iindex_relations);
+ topics->file->ha_index_init(iindex_topic,1);
+ relations->file->ha_index_init(iindex_relations,1);
rkey_id->store((longlong) key_id);
rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW);
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 630a7e950f7..73aaecd39aa 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -155,6 +155,7 @@ void lex_start(THD *thd, uchar *buf,uint length)
lex->yylineno = 1;
lex->in_comment=0;
lex->length=0;
+ lex->part_info= 0;
lex->select_lex.in_sum_expr=0;
lex->select_lex.expr_list.empty();
lex->select_lex.ftfunc_list_alloc.empty();
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 45c8182a29c..edcf4db09a4 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -25,6 +25,7 @@ class sp_head;
class sp_name;
class sp_instr;
class sp_pcontext;
+class partition_info;
/*
The following hack is needed because mysql_yacc.cc does not define
@@ -721,6 +722,8 @@ typedef struct st_lex
TABLE_LIST **query_tables_last;
/* store original leaf_tables for INSERT SELECT and PS/SP */
TABLE_LIST *leaf_tables_insert;
+ /* Partition info structure filled in by PARTITION BY parse part */
+ partition_info *part_info;
List<key_part_spec> col_list;
List<key_part_spec> ref_list;
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
new file mode 100644
index 00000000000..ffdf53ed287
--- /dev/null
+++ b/sql/sql_partition.cc
@@ -0,0 +1,3117 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ This file was introduced as a container for general functionality related
+ to partitioning introduced in MySQL version 5.1. It contains functionality
+ used by all handlers that support partitioning, which in the first version
+ is the partitioning handler itself and the NDB handler.
+
+ The first version was written by Mikael Ronström.
+
+ This version supports RANGE partitioning, LIST partitioning, HASH
+ partitioning and composite partitioning (hereafter called subpartitioning)
+ where each RANGE/LIST partitioning is HASH partitioned. The hash function
+ can either be supplied by the user or by only a list of fields (also
+ called KEY partitioning, where the MySQL server will use an internal
+ hash function.
+ There are quite a few defaults that can be used as well.
+*/
+
+/* Some general useful functions */
+
+#include "mysql_priv.h"
+#include <errno.h>
+#include <m_ctype.h>
+#include "md5.h"
+
+
+#ifdef HAVE_PARTITION_DB
+/*
+ Partition related functions declarations and some static constants;
+*/
+static char *hash_str= "HASH";
+static char *range_str= "RANGE";
+static char *list_str= "LIST";
+static char *part_str= "PARTITION";
+static char *sub_str= "SUB";
+static char *by_str= "BY";
+static char *key_str= "KEY";
+static char *space_str= " ";
+static char *equal_str= "=";
+static char *end_paren_str= ")";
+static char *begin_paren_str= "(";
+static char *comma_str= ",";
+static char buff[22];
+
+bool get_partition_id_list(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_hash_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_key_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_linear_hash_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_linear_key_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_key(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_linear_key(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_key(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_linear_key(partition_info *part_info,
+ uint32 *part_id);
+uint32 get_partition_id_hash_sub(partition_info *part_info);
+uint32 get_partition_id_key_sub(partition_info *part_info);
+uint32 get_partition_id_linear_hash_sub(partition_info *part_info);
+uint32 get_partition_id_linear_key_sub(partition_info *part_info);
+
+/*
+ A useful routine used by update_row for partition handlers to calculate
+ the partition ids of the old and the new record.
+ SYNOPSIS
+ get_part_for_update()
+ old_data Buffer of old record
+ new_data Buffer of new record
+ rec0 Reference to table->record[0]
+ part_info Reference to partition information
+ part_field_array A NULL-terminated array of fields for partition
+ function
+ old_part_id The returned partition id of old record
+ new_part_id The returned partition id of new record
+ RETURN VALUE
+ 0 Success
+ > 0 Error code
+ DESCRIPTION
+ Dependent on whether buf is not record[0] we need to prepare the
+ fields. Then we call the function pointer get_partition_id to
+ calculate the partition ids.
+*/
+
+int get_parts_for_update(const byte *old_data, byte *new_data,
+ const byte *rec0, partition_info *part_info,
+ uint32 *old_part_id, uint32 *new_part_id)
+{
+ Field **part_field_array= part_info->full_part_field_array;
+ int error;
+ DBUG_ENTER("get_parts_for_update");
+ DBUG_ASSERT(new_data == rec0);
+
+ set_field_ptr(part_field_array, old_data, rec0);
+ error= part_info->get_partition_id(part_info, old_part_id);
+ set_field_ptr(part_field_array, rec0, old_data);
+ if (unlikely(error)) // Should never happen
+ {
+ DBUG_ASSERT(0);
+ DBUG_RETURN(error);
+ }
+#ifdef NOT_NEEDED
+ if (new_data == rec0)
+#endif
+ {
+ if (unlikely(error= part_info->get_partition_id(part_info,new_part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ }
+#ifdef NOT_NEEDED
+ else
+ {
+ /*
+ This branch should never execute but it is written anyways for
+ future use. It will be tested by ensuring that the above
+ condition is false in one test situation before pushing the code.
+ */
+ set_field_ptr(part_field_array, new_data, rec0);
+ error= part_info->get_partition_id(part_info, new_part_id);
+ set_field_ptr(part_field_array, rec0, new_data);
+ if (unlikely(error))
+ {
+ DBUG_RETURN(error);
+ }
+ }
+#endif
+ DBUG_RETURN(0);
+}
+
+
+/*
+ A useful routine used by delete_row for partition handlers to calculate
+ the partition id.
+ SYNOPSIS
+ get_part_for_delete()
+ buf Buffer of old record
+ rec0 Reference to table->record[0]
+ part_info Reference to partition information
+ part_field_array A NULL-terminated array of fields for partition
+ function
+ part_id The returned partition id to delete from
+ RETURN VALUE
+ 0 Success
+ > 0 Error code
+ DESCRIPTION
+ Dependent on whether buf is not record[0] we need to prepare the
+ fields. Then we call the function pointer get_partition_id to
+ calculate the partition id.
+*/
+
+int get_part_for_delete(const byte *buf, const byte *rec0,
+ partition_info *part_info, uint32 *part_id)
+{
+ int error;
+ DBUG_ENTER("get_part_for_delete");
+
+ if (likely(buf == rec0))
+ {
+ if (unlikely((error= part_info->get_partition_id(part_info, part_id))))
+ {
+ DBUG_RETURN(error);
+ }
+ DBUG_PRINT("info", ("Delete from partition %d", *part_id));
+ }
+ else
+ {
+ Field **part_field_array= part_info->full_part_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ error= part_info->get_partition_id(part_info, part_id);
+ set_field_ptr(part_field_array, rec0, buf);
+ if (unlikely(error))
+ {
+ DBUG_RETURN(error);
+ }
+ DBUG_PRINT("info", ("Delete from partition %d (path2)", *part_id));
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This routine allocates an array for all range constants to achieve a fast
+ check what partition a certain value belongs to. At the same time it does
+ also check that the range constants are defined in increasing order and
+ that the expressions are constant integer expressions.
+ SYNOPSIS
+ check_range_constants()
+ part_info
+ RETURN VALUE
+ TRUE An error occurred during creation of range constants
+ FALSE Successful creation of range constant mapping
+ DESCRIPTION
+ This routine is called from check_partition_info to get a quick error
+ before we came too far into the CREATE TABLE process. It is also called
+ from fix_partition_func every time we open the .frm file. It is only
+ called for RANGE PARTITIONed tables.
+*/
+
+static bool check_range_constants(partition_info *part_info)
+{
+ partition_element* part_def;
+ longlong current_largest_int= LONGLONG_MIN, part_range_value_int;
+ uint no_parts= part_info->no_parts, i;
+ List_iterator<partition_element> it(part_info->partitions);
+ bool result= TRUE;
+ DBUG_ENTER("check_range_constants");
+ DBUG_PRINT("enter", ("INT_RESULT with %d parts", no_parts));
+
+ part_info->part_result_type= INT_RESULT;
+ part_info->range_int_array=
+ (longlong*)sql_alloc(no_parts * sizeof(longlong));
+ if (unlikely(part_info->range_int_array == NULL))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), no_parts*sizeof(longlong));
+ goto end;
+ }
+ i= 0;
+ do
+ {
+ part_def= it++;
+ if ((i != (no_parts - 1)) || !part_info->defined_max_value)
+ {
+ if (likely(part_def->range_expr->result_type() == INT_RESULT))
+ part_range_value_int= part_def->range_expr->val_int();
+ else
+ {
+ my_error(ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR, MYF(0),
+ "LESS THAN");
+ goto end;
+ }
+ }
+ else
+ part_range_value_int= LONGLONG_MAX;
+ if (likely(current_largest_int < part_range_value_int))
+ {
+ current_largest_int= part_range_value_int;
+ part_info->range_int_array[i]= part_range_value_int;
+ }
+ else
+ {
+ my_error(ER_RANGE_NOT_INCREASING_ERROR, MYF(0));
+ goto end;
+ }
+ } while (++i < no_parts);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ A support routine for check_list_constants used by qsort to sort the
+ constant list expressions.
+ SYNOPSIS
+ list_part_cmp()
+ a First list constant to compare with
+ b Second list constant to compare with
+ RETURN VALUE
+ +1 a > b
+ 0 a == b
+ -1 a < b
+*/
+
+static int list_part_cmp(const void* a, const void* b)
+{
+ longlong a1, b1;
+ a1= ((LIST_PART_ENTRY*)a)->list_value;
+ b1= ((LIST_PART_ENTRY*)b)->list_value;
+ if (a1 < b1)
+ return -1;
+ else if (a1 > b1)
+ return +1;
+ else
+ return 0;
+}
+
+
+/*
+ This routine allocates an array for all list constants to achieve a fast
+ check what partition a certain value belongs to. At the same time it does
+ also check that there are no duplicates among the list constants and that
+ that the list expressions are constant integer expressions.
+ SYNOPSIS
+ check_list_constants()
+ part_info
+ RETURN VALUE
+ TRUE An error occurred during creation of list constants
+ FALSE Successful creation of list constant mapping
+ DESCRIPTION
+ This routine is called from check_partition_info to get a quick error
+ before we came too far into the CREATE TABLE process. It is also called
+ from fix_partition_func every time we open the .frm file. It is only
+ called for LIST PARTITIONed tables.
+*/
+
+static bool check_list_constants(partition_info *part_info)
+{
+ uint i, no_list_values= 0, no_parts, list_index= 0;
+ Item *list_expr;
+ bool not_first, result= TRUE;
+ longlong curr_value, prev_value;
+ partition_element* part_def;
+ List_iterator<partition_element> list_func_it(part_info->partitions);
+ DBUG_ENTER("check_list_constants");
+
+ part_info->part_result_type= INT_RESULT;
+
+ /*
+ We begin by calculating the number of list values that have been
+ defined in the first step.
+
+ We use this number to allocate a properly sized array of structs
+ to keep the partition id and the value to use in that partition.
+ In the second traversal we check that all Item trees are of the
+ same type (INT_RESULT) and assign them values in the struct array.
+
+ Finally we sort the array of structs in order of values to enable
+ a quick binary search for the proper value to discover the
+ partition id.
+ After sorting the array we check that there are no duplicates in the
+ list.
+ */
+
+ no_parts= part_info->no_parts;
+ i= 0;
+ do
+ {
+ part_def= list_func_it++;
+ List_iterator<Item> list_val_it1(part_def->list_expr_list);
+ while (list_val_it1++)
+ no_list_values++;
+ } while (++i < no_parts);
+ list_func_it.rewind();
+ part_info->no_list_values= no_list_values;
+ part_info->list_array=
+ (LIST_PART_ENTRY*)sql_alloc(no_list_values*sizeof(LIST_PART_ENTRY));
+ if (unlikely(part_info->list_array == NULL))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), no_list_values*sizeof(LIST_PART_ENTRY));
+ goto end;
+ }
+
+ i= 0;
+ do
+ {
+ part_def= list_func_it++;
+ List_iterator<Item> list_val_it2(part_def->list_expr_list);
+ while ((list_expr= list_val_it2++))
+ {
+ if (likely(list_expr->result_type() == INT_RESULT))
+ {
+ part_info->list_array[list_index].list_value= list_expr->val_int();
+ part_info->list_array[list_index++].partition_id= i;
+ }
+ else
+ {
+ my_error(ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR, MYF(0), "IN");
+ goto end;
+ }
+ }
+ } while (++i < no_parts);
+
+ qsort((void*)part_info->list_array, no_list_values,
+ sizeof(LIST_PART_ENTRY), &list_part_cmp);
+
+ not_first= FALSE;
+ i= prev_value= 0; //prev_value initialised to quiet compiler
+ do
+ {
+ curr_value= part_info->list_array[i].list_value;
+ if (likely(!not_first || prev_value != curr_value))
+ {
+ prev_value= curr_value;
+ not_first= TRUE;
+ }
+ else
+ {
+ my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0));
+ goto end;
+ }
+ } while (++i < no_list_values);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Create a memory area where default partition names are stored and fill it
+ up with the names.
+ SYNOPSIS
+ create_default_partition_names()
+ no_parts Number of partitions
+ subpart Is it subpartitions
+ RETURN VALUE
+ A pointer to the memory area of the default partition names
+ DESCRIPTION
+ A support routine for the partition code where default values are
+ generated.
+ The external routine needing this code is check_partition_info
+*/
+
+#define MAX_PART_NAME_SIZE 8
+
+static char *create_default_partition_names(uint no_parts, bool subpart)
+{
+ char *ptr= sql_calloc(no_parts*MAX_PART_NAME_SIZE);
+ char *move_ptr= ptr;
+ uint i= 0;
+ DBUG_ENTER("create_default_partition_names");
+ if (likely(ptr != 0))
+ {
+ do
+ {
+ if (subpart)
+ my_sprintf(move_ptr, (move_ptr,"sp%u", i));
+ else
+ my_sprintf(move_ptr, (move_ptr,"p%u", i));
+ move_ptr+=MAX_PART_NAME_SIZE;
+ } while (++i < no_parts);
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), no_parts*MAX_PART_NAME_SIZE);
+ }
+ DBUG_RETURN(ptr);
+}
+
+
+/*
+ Set up all the default partitions not set-up by the user in the SQL
+ statement. Also perform a number of checks that the user hasn't tried
+ to use default values where no defaults exists.
+ SYNOPSIS
+ set_up_default_partitions()
+ part_info The reference to all partition information
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, attempted default values not possible
+ FALSE Ok, default partitions set-up
+ DESCRIPTION
+ The routine uses the underlying handler of the partitioning to define
+ the default number of partitions. For some handlers this requires
+ knowledge of the maximum number of rows to be stored in the table.
+ This routine only accepts HASH and KEY partitioning and thus there is
+ no subpartitioning if this routine is successful.
+ The external routine needing this code is check_partition_info
+*/
+
+static bool set_up_default_partitions(partition_info *part_info,
+ handler *file, ulonglong max_rows)
+{
+ uint no_parts, i;
+ char *default_name;
+ bool result= TRUE;
+ DBUG_ENTER("set_up_default_partitions");
+
+ if (part_info->part_type != HASH_PARTITION)
+ {
+ char *error_string;
+ if (part_info->part_type == RANGE_PARTITION)
+ error_string= range_str;
+ else
+ error_string= list_str;
+ my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_string);
+ goto end;
+ }
+ if (part_info->no_parts == 0)
+ part_info->no_parts= file->get_default_no_partitions(max_rows);
+ no_parts= part_info->no_parts;
+ if (unlikely(no_parts > MAX_PARTITIONS))
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely((!(default_name= create_default_partition_names(no_parts,
+ FALSE)))))
+ goto end;
+ i= 0;
+ do
+ {
+ partition_element *part_elem= new partition_element();
+ if (likely(part_elem != 0))
+ {
+ part_elem->engine_type= DB_TYPE_UNKNOWN;
+ part_elem->partition_name= default_name;
+ default_name+=MAX_PART_NAME_SIZE;
+ part_info->partitions.push_back(part_elem);
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ goto end;
+ }
+ } while (++i < no_parts);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Set up all the default subpartitions not set-up by the user in the SQL
+ statement. Also perform a number of checks that the default partitioning
+ becomes an allowed partitioning scheme.
+ SYNOPSIS
+ set_up_default_subpartitions()
+ part_info The reference to all partition information
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, attempted default values not possible
+ FALSE Ok, default partitions set-up
+ DESCRIPTION
+ The routine uses the underlying handler of the partitioning to define
+ the default number of partitions. For some handlers this requires
+ knowledge of the maximum number of rows to be stored in the table.
+ This routine is only called for RANGE or LIST partitioning and those
+ need to be specified so only subpartitions are specified.
+ The external routine needing this code is check_partition_info
+*/
+
+static bool set_up_default_subpartitions(partition_info *part_info,
+ handler *file, ulonglong max_rows)
+{
+ uint i, j= 0, no_parts, no_subparts;
+ char *default_name;
+ bool result= TRUE;
+ partition_element *part_elem;
+ List_iterator<partition_element> part_it(part_info->partitions);
+ DBUG_ENTER("set_up_default_subpartitions");
+
+ if (part_info->no_subparts == 0)
+ part_info->no_subparts= file->get_default_no_partitions(max_rows);
+ no_parts= part_info->no_parts;
+ no_subparts= part_info->no_subparts;
+ if (unlikely((no_parts * no_subparts) > MAX_PARTITIONS))
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely((!(default_name=
+ create_default_partition_names(no_subparts, TRUE)))))
+ goto end;
+ i= 0;
+ do
+ {
+ part_elem= part_it++;
+ do
+ {
+ partition_element *subpart_elem= new partition_element();
+ if (likely(subpart_elem != 0))
+ {
+ subpart_elem->engine_type= DB_TYPE_UNKNOWN;
+ subpart_elem->partition_name= default_name;
+ default_name+= MAX_PART_NAME_SIZE;
+ part_elem->subpartitions.push_back(subpart_elem);
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ goto end;
+ }
+ } while (++j < no_subparts);
+ } while (++i < no_parts);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Set up defaults for partition or subpartition (cannot set-up for both,
+ this will return an error.
+ SYNOPSIS
+ set_up_defaults_for_partitioning()
+ part_info The reference to all partition information
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, attempted default values not possible
+ FALSE Ok, default partitions set-up
+ DESCRIPTION
+ Support routine for check_partition_info
+*/
+
+static bool set_up_defaults_for_partitioning(partition_info *part_info,
+ handler *file,
+ ulonglong max_rows)
+{
+ DBUG_ENTER("set_up_defaults_for_partitioning");
+
+ if (part_info->use_default_partitions)
+ DBUG_RETURN(set_up_default_partitions(part_info, file, max_rows));
+ if (is_sub_partitioned(part_info) && part_info->use_default_subpartitions)
+ DBUG_RETURN(set_up_default_subpartitions(part_info, file, max_rows));
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ Check that all partitions use the same storage engine.
+ This is currently a limitation in this version.
+ SYNOPSIS
+ check_engine_mix()
+ engine_array An array of engine identifiers
+ no_parts Total number of partitions
+ RETURN VALUE
+ TRUE Error, mixed engines
+ FALSE Ok, no mixed engines
+*/
+
+static bool check_engine_mix(u_char *engine_array, uint no_parts)
+{
+ /*
+ Current check verifies only that all handlers are the same.
+ Later this check will be more sophisticated.
+ */
+ uint i= 0;
+ bool result= FALSE;
+ DBUG_ENTER("check_engine_mix");
+
+ do
+ {
+ if (engine_array[i] != engine_array[0])
+ {
+ result= TRUE;
+ break;
+ }
+ } while (++i < no_parts);
+ DBUG_RETURN(result);
+}
+
+
+/*
+ We will check that the partition info requested is possible to set-up in
+ this version. This routine is an extension of the parser one could say.
+ If defaults were used we will generate default data structures for all
+ partitions.
+ SYNOPSIS
+ check_partition_info()
+ part_info The reference to all partition information
+ db_type Default storage engine if no engine specified per
+ partition.
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, something went wrong
+ FALSE Ok, full partition data structures are now generated
+ DESCRIPTION
+ This code is used early in the CREATE TABLE and ALTER TABLE process.
+*/
+
+bool check_partition_info(partition_info *part_info,enum db_type eng_type,
+ handler *file, ulonglong max_rows)
+{
+ u_char *engine_array= NULL;
+ uint part_count= 0, i, no_parts, tot_partitions;
+ bool result= TRUE;
+ List_iterator<partition_element> part_it(part_info->partitions);
+ DBUG_ENTER("check_partition_info");
+
+ if (unlikely(is_sub_partitioned(part_info) &&
+ (!(part_info->part_type == RANGE_PARTITION ||
+ part_info->part_type == LIST_PARTITION))))
+ {
+ /* Only RANGE and LIST partitioning can be subpartitioned */
+ my_error(ER_SUBPARTITION_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely(set_up_defaults_for_partitioning(part_info, file, max_rows)))
+ goto end;
+ tot_partitions= get_tot_partitions(part_info);
+ if (unlikely(tot_partitions > MAX_PARTITIONS))
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ goto end;
+ }
+ engine_array= (u_char*)my_malloc(tot_partitions, MYF(MY_WME));
+ if (unlikely(!engine_array))
+ goto end;
+ i= 0;
+ no_parts= part_info->no_parts;
+ do
+ {
+ partition_element *part_elem= part_it++;
+ if (!is_sub_partitioned(part_info))
+ {
+ if (part_elem->engine_type == DB_TYPE_UNKNOWN)
+ part_elem->engine_type= eng_type;
+ DBUG_PRINT("info", ("engine = %u",(uint)part_elem->engine_type));
+ engine_array[part_count++]= (u_char)part_elem->engine_type;
+ }
+ else
+ {
+ uint j= 0, no_subparts= part_info->no_subparts;;
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ do
+ {
+ part_elem= sub_it++;
+ if (part_elem->engine_type == DB_TYPE_UNKNOWN)
+ part_elem->engine_type= eng_type;
+ DBUG_PRINT("info", ("engine = %u",(uint)part_elem->engine_type));
+ engine_array[part_count++]= (u_char)part_elem->engine_type;
+ } while (++j < no_subparts);
+ }
+ } while (++i < part_info->no_parts);
+ if (unlikely(check_engine_mix(engine_array, part_count)))
+ {
+ my_error(ER_MIX_HANDLER_ERROR, MYF(0));
+ goto end;
+ }
+
+ /*
+ We need to check all constant expressions that they are of the correct
+ type and that they are increasing for ranges and not overlapping for
+ list constants.
+ */
+
+ if (unlikely((part_info->part_type == RANGE_PARTITION &&
+ check_range_constants(part_info)) ||
+ (part_info->part_type == LIST_PARTITION &&
+ check_list_constants(part_info))))
+ goto end;
+ result= FALSE;
+end:
+ my_free((char*)engine_array,MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(result);
+}
+
+
+/*
+ A great number of functions below here is part of the fix_partition_func
+ method. It is used to set up the partition structures for execution from
+ openfrm. It is called at the end of the openfrm when the table struct has
+ been set-up apart from the partition information.
+ It involves:
+ 1) Setting arrays of fields for the partition functions.
+ 2) Setting up binary search array for LIST partitioning
+ 3) Setting up array for binary search for RANGE partitioning
+ 4) Setting up key_map's to assist in quick evaluation whether one
+ can deduce anything from a given index of what partition to use
+ 5) Checking whether a set of partitions can be derived from a range on
+ a field in the partition function.
+ As part of doing this there is also a great number of error controls.
+ This is actually the place where most of the things are checked for
+ partition information when creating a table.
+ Things that are checked includes
+ 1) No NULLable fields in partition function
+ 2) All fields of partition function in Primary keys and unique indexes
+ (if not supported)
+ 3) No fields in partition function that are BLOB's or VARCHAR with a
+ collation other than the binary collation.
+
+
+
+ Create an array of partition fields (NULL terminated). Before this method
+ is called fix_fields or find_table_in_sef has been called to set
+ GET_FIXED_FIELDS_FLAG on all fields that are part of the partition
+ function.
+ SYNOPSIS
+ set_up_field_array()
+ table TABLE object for which partition fields are set-up
+ sub_part Is the table subpartitioned as well
+ RETURN VALUE
+ TRUE Error, some field didn't meet requirements
+ FALSE Ok, partition field array set-up
+ DESCRIPTION
+ This method is used to set-up both partition and subpartitioning
+ field array and used for all types of partitioning.
+ It is part of the logic around fix_partition_func.
+*/
+static bool set_up_field_array(TABLE *table,
+ bool sub_part)
+{
+ Field **ptr, *field, **field_array;
+ uint no_fields= 0, size_field_array, i= 0;
+ partition_info *part_info= table->s->part_info;
+ int result= FALSE;
+ DBUG_ENTER("set_up_field_array");
+
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & GET_FIXED_FIELDS_FLAG)
+ no_fields++;
+ }
+ size_field_array= (no_fields+1)*sizeof(Field*);
+ field_array= (Field**)sql_alloc(size_field_array);
+ if (unlikely(!field_array))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), size_field_array);
+ result= TRUE;
+ }
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & GET_FIXED_FIELDS_FLAG)
+ {
+ field->flags&= ~GET_FIXED_FIELDS_FLAG;
+ field->flags|= FIELD_IN_PART_FUNC_FLAG;
+ if (likely(!result))
+ {
+ field_array[i++]= field;
+
+ /*
+ We check that the fields are proper. It is required for each
+ field in a partition function to:
+ 1) Not be a BLOB of any type
+ A BLOB takes too long time to evaluate so we don't want it for
+ performance reasons.
+ 2) Not be a VARCHAR other than VARCHAR with a binary collation
+ A VARCHAR with character sets can have several values being
+ equal with different number of spaces or NULL's. This is not a
+ good ground for a safe and exact partition function. Thus it is
+ not allowed in partition functions.
+ */
+
+ if (unlikely(field->flags & BLOB_FLAG))
+ {
+ my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0));
+ result= TRUE;
+ }
+ else if (unlikely((!field->flags & BINARY_FLAG) &&
+ field->real_type() == MYSQL_TYPE_VARCHAR))
+ {
+ my_error(ER_CHAR_SET_IN_PART_FIELD_ERROR, MYF(0));
+ result= TRUE;
+ }
+ }
+ }
+ }
+ field_array[no_fields]= 0;
+ if (!sub_part)
+ {
+ part_info->part_field_array= field_array;
+ part_info->no_part_fields= no_fields;
+ }
+ else
+ {
+ part_info->subpart_field_array= field_array;
+ part_info->no_subpart_fields= no_fields;
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Create a field array including all fields of both the partitioning and the
+ subpartitioning functions.
+ SYNOPSIS
+ create_full_part_field_array()
+ table TABLE object for which partition fields are set-up
+ part_info Reference to partitioning data structure
+ RETURN VALUE
+ TRUE Memory allocation of field array failed
+ FALSE Ok
+ DESCRIPTION
+ If there is no subpartitioning then the same array is used as for the
+ partitioning. Otherwise a new array is built up using the flag
+ FIELD_IN_PART_FUNC in the field object.
+ This function is called from fix_partition_func
+*/
+
+static bool create_full_part_field_array(TABLE *table,
+ partition_info *part_info)
+{
+ bool result= FALSE;
+ DBUG_ENTER("create_full_part_field_array");
+
+ if (!is_sub_partitioned(part_info))
+ {
+ part_info->full_part_field_array= part_info->part_field_array;
+ part_info->no_full_part_fields= part_info->no_part_fields;
+ }
+ else
+ {
+ Field **ptr, *field, **field_array;
+ uint no_part_fields=0, size_field_array;
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & FIELD_IN_PART_FUNC_FLAG)
+ no_part_fields++;
+ }
+ size_field_array= (no_part_fields+1)*sizeof(Field*);
+ field_array= (Field**)sql_alloc(size_field_array);
+ if (unlikely(!field_array))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), size_field_array);
+ result= TRUE;
+ goto end;
+ }
+ no_part_fields= 0;
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & FIELD_IN_PART_FUNC_FLAG)
+ field_array[no_part_fields++]= field;
+ }
+ field_array[no_part_fields]=0;
+ part_info->full_part_field_array= field_array;
+ part_info->no_full_part_fields= no_part_fields;
+ }
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ These support routines is used to set/reset an indicator of all fields
+ in a certain key. It is used in conjunction with another support routine
+ that traverse all fields in the PF to find if all or some fields in the
+ PF is part of the key. This is used to check primary keys and unique
+ keys involve all fields in PF (unless supported) and to derive the
+ key_map's used to quickly decide whether the index can be used to
+ derive which partitions are needed to scan.
+
+
+
+ Clear flag GET_FIXED_FIELDS_FLAG in all fields of a key previously set by
+ set_indicator_in_key_fields (always used in pairs).
+ SYNOPSIS
+ clear_indicator_in_key_fields()
+ key_info Reference to find the key fields
+*/
+
+static void clear_indicator_in_key_fields(KEY *key_info)
+{
+ KEY_PART_INFO *key_part;
+ uint key_parts= key_info->key_parts, i;
+ for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++)
+ key_part->field->flags&= (~GET_FIXED_FIELDS_FLAG);
+}
+
+
+/*
+ Set flag GET_FIXED_FIELDS_FLAG in all fields of a key.
+ SYNOPSIS
+ set_indicator_in_key_fields
+ key_info Reference to find the key fields
+*/
+
+static void set_indicator_in_key_fields(KEY *key_info)
+{
+ KEY_PART_INFO *key_part;
+ uint key_parts= key_info->key_parts, i;
+ for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++)
+ key_part->field->flags|= GET_FIXED_FIELDS_FLAG;
+}
+
+
+/*
+ Check if all or some fields in partition field array is part of a key
+ previously used to tag key fields.
+ SYNOPSIS
+ check_fields_in_PF()
+ ptr Partition field array
+ all_fields Is all fields of partition field array used in key
+ some_fields Is some fields of partition field array used in key
+ RETURN VALUE
+ all_fields, some_fields
+*/
+
+static void check_fields_in_PF(Field **ptr, bool *all_fields,
+ bool *some_fields)
+{
+ DBUG_ENTER("check_fields_in_PF");
+ *all_fields= TRUE;
+ *some_fields= FALSE;
+ do
+ {
+ /* Check if the field of the PF is part of the current key investigated */
+ if ((*ptr)->flags & GET_FIXED_FIELDS_FLAG)
+ *some_fields= TRUE;
+ else
+ *all_fields= FALSE;
+ } while (*(++ptr));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Clear flag GET_FIXED_FIELDS_FLAG in all fields of the table.
+ This routine is used for error handling purposes.
+ SYNOPSIS
+ clear_field_flag()
+ table TABLE object for which partition fields are set-up
+*/
+
+static void clear_field_flag(TABLE *table)
+{
+ Field **ptr;
+ DBUG_ENTER("clear_field_flag");
+
+ for (ptr= table->field; *ptr; ptr++)
+ (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ This routine sets-up the partition field array for KEY partitioning, it
+ also verifies that all fields in the list of fields is actually a part of
+ the table.
+ SYNOPSIS
+ handle_list_of_fields()
+ it A list of field names for the partition function
+ table TABLE object for which partition fields are set-up
+ part_info Reference to partitioning data structure
+ sub_part Is the table subpartitioned as well
+ RETURN VALUE
+ TRUE Fields in list of fields not part of table
+ FALSE All fields ok and array created
+ DESCRIPTION
+ find_field_in_table_sef finds the field given its name. All fields get
+ GET_FIXED_FIELDS_FLAG set.
+*/
+
+static bool handle_list_of_fields(List_iterator<char> it,
+ TABLE *table,
+ partition_info *part_info,
+ bool sub_part)
+{
+ Field *field;
+ bool result;
+ char *field_name;
+ DBUG_ENTER("handle_list_of_fields");
+
+ while ((field_name= it++))
+ {
+ field= find_field_in_table_sef(table, field_name);
+ if (likely(field != 0))
+ field->flags|= GET_FIXED_FIELDS_FLAG;
+ else
+ {
+ my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0));
+ clear_field_flag(table);
+ result= TRUE;
+ goto end;
+ }
+ }
+ result= set_up_field_array(table, sub_part);
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ This function is used to build an array of partition fields for the
+ partitioning function and subpartitioning function. The partitioning
+ function is an item tree that must reference at least one field in the
+ table. This is checked first in the parser that the function doesn't
+ contain non-cacheable parts (like a random function) and by checking
+ here that the function isn't a constant function.
+ SYNOPSIS
+ fix_fields_part_func()
+ thd The thread object
+ tables A list of one table, the partitioned table
+ func_expr The item tree reference of the partition function
+ part_info Reference to partitioning data structure
+ sub_part Is the table subpartitioned as well
+ RETURN VALUE
+ TRUE An error occurred, something was wrong with the
+ partition function.
+ FALSE Ok, a partition field array was created
+ DESCRIPTION
+ The function uses a new feature in fix_fields where the flag
+ GET_FIXED_FIELDS_FLAG is set for all fields in the item tree.
+ This field must always be reset before returning from the function
+ since it is used for other purposes as well.
+*/
+
+static bool fix_fields_part_func(THD *thd, TABLE_LIST *tables,
+ Item* func_expr, partition_info *part_info,
+ bool sub_part)
+{
+ /*
+ Calculate the number of fields in the partition function.
+ Use it allocate memory for array of Field pointers.
+ Initialise array of field pointers. Use information set when
+ calling fix_fields and reset it immediately after.
+ The get_fields_in_item_tree activates setting of bit in flags
+ on the field object.
+ */
+
+ bool result= TRUE;
+ TABLE *table= tables->table;
+ TABLE_LIST *save_list;
+ int error;
+ Name_resolution_context *context= &thd->lex->current_select->context;
+ DBUG_ENTER("fix_fields_part_func");
+
+ table->map= 1; //To ensure correct calculation of const item
+ table->get_fields_in_item_tree= TRUE;
+ save_list= context->table_list;
+ context->table_list= tables;
+ thd->where= "partition function";
+ error= func_expr->fix_fields(thd, (Item**)0);
+ context->table_list= save_list;
+ if (unlikely(error))
+ {
+ DBUG_PRINT("info", ("Field in partition function not part of table"));
+ clear_field_flag(table);
+ goto end;
+ }
+ if (unlikely(func_expr->const_item()))
+ {
+ my_error(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0));
+ clear_field_flag(table);
+ goto end;
+ }
+ result= set_up_field_array(table, sub_part);
+end:
+ table->get_fields_in_item_tree= FALSE;
+ table->map= 0; //Restore old value
+ DBUG_RETURN(result);
+}
+
+
+/*
+ This function verifies that if there is a primary key that it contains
+ all the fields of the partition function.
+ This is a temporary limitation that will hopefully be removed after a
+ while.
+ SYNOPSIS
+ check_primary_key()
+ table TABLE object for which partition fields are set-up
+ RETURN VALUES
+ TRUE Not all fields in partitioning function was part
+ of primary key
+ FALSE Ok, all fields of partitioning function were part
+ of primary key
+*/
+
+static bool check_primary_key(TABLE *table)
+{
+ uint primary_key= table->s->primary_key;
+ bool all_fields, some_fields, result= FALSE;
+ DBUG_ENTER("check_primary_key");
+
+ if (primary_key < MAX_KEY)
+ {
+ set_indicator_in_key_fields(table->key_info+primary_key);
+ check_fields_in_PF(table->s->part_info->full_part_field_array,
+ &all_fields, &some_fields);
+ clear_indicator_in_key_fields(table->key_info+primary_key);
+ if (unlikely(!all_fields))
+ {
+ my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"PRIMARY KEY");
+ result= TRUE;
+ }
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ This function verifies that if there is a unique index that it contains
+ all the fields of the partition function.
+ This is a temporary limitation that will hopefully be removed after a
+ while.
+ SYNOPSIS
+ check_unique_keys()
+ table TABLE object for which partition fields are set-up
+ RETURN VALUES
+ TRUE Not all fields in partitioning function was part
+ of all unique keys
+ FALSE Ok, all fields of partitioning function were part
+ of unique keys
+*/
+
+static bool check_unique_keys(TABLE *table)
+{
+ bool all_fields, some_fields, result= FALSE;
+ uint keys= table->s->keys, i;
+ DBUG_ENTER("check_unique_keys");
+ for (i= 0; i < keys; i++)
+ {
+ if (table->key_info[i].flags & HA_NOSAME) //Unique index
+ {
+ set_indicator_in_key_fields(table->key_info+i);
+ check_fields_in_PF(table->s->part_info->full_part_field_array,
+ &all_fields, &some_fields);
+ clear_indicator_in_key_fields(table->key_info+i);
+ if (unlikely(!all_fields))
+ {
+ my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"UNIQUE INDEX");
+ result= TRUE;
+ break;
+ }
+ }
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ An important optimisation is whether a range on a field can select a subset
+ of the partitions.
+ A prerequisite for this to happen is that the PF is a growing function OR
+ a shrinking function.
+ This can never happen for a multi-dimensional PF. Thus this can only happen
+ with PF with at most one field involved in the PF.
+ The idea is that if the function is a growing function and you know that
+ the field of the PF is 4 <= A <= 6 then we can convert this to a range
+ in the PF instead by setting the range to PF(4) <= PF(A) <= PF(6). In the
+ case of RANGE PARTITIONING and LIST PARTITIONING this can be used to
+ calculate a set of partitions rather than scanning all of them.
+ Thus the following prerequisites are there to check if sets of partitions
+ can be found.
+ 1) Only possible for RANGE and LIST partitioning (not for subpartitioning)
+ 2) Only possible if PF only contains 1 field
+ 3) Possible if PF is a growing function of the field
+ 4) Possible if PF is a shrinking function of the field
+ OBSERVATION:
+ 1) IF f1(A) is a growing function AND f2(A) is a growing function THEN
+ f1(A) + f2(A) is a growing function
+ f1(A) * f2(A) is a growing function if f1(A) >= 0 and f2(A) >= 0
+ 2) IF f1(A) is a growing function and f2(A) is a shrinking function THEN
+ f1(A) / f2(A) is a growing function if f1(A) >= 0 and f2(A) > 0
+ 3) IF A is a growing function then a function f(A) that removes the
+ least significant portion of A is a growing function
+ E.g. DATE(datetime) is a growing function
+ MONTH(datetime) is not a growing/shrinking function
+ 4) IF f1(A) is a growing function and f2(A) is a growing function THEN
+ f1(f2(A)) and f2(f1(A)) are also growing functions
+ 5) IF f1(A) is a shrinking function and f2(A) is a growing function THEN
+ f1(f2(A)) is a shrinking function and f2(f1(A)) is a shrinking function
+ 6) f1(A) = A is a growing function
+ 7) f1(A) = A*a + b (where a and b are constants) is a growing function
+
+ By analysing the item tree of the PF we can use these deducements and
+ derive whether the PF is a growing function or a shrinking function or
+ neither of it.
+
+ If the PF is range capable then a flag is set on the table object
+ indicating this to notify that we can use also ranges on the field
+ of the PF to deduce a set of partitions if the fields of the PF were
+ not all fully bound.
+ SYNOPSIS
+ check_range_capable_PF()
+ table TABLE object for which partition fields are set-up
+ DESCRIPTION
+ Support for this is not implemented yet.
+*/
+
+void check_range_capable_PF(TABLE *table)
+{
+ DBUG_ENTER("check_range_capable_PF");
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Set up partition key maps
+ SYNOPSIS
+ set_up_partition_key_maps()
+ table TABLE object for which partition fields are set-up
+ part_info Reference to partitioning data structure
+ RETURN VALUES
+ None
+ DESCRIPTION
+ This function sets up a couple of key maps to be able to quickly check
+ if an index ever can be used to deduce the partition fields or even
+ a part of the fields of the partition function.
+ We set up the following key_map's.
+ PF = Partition Function
+ 1) All fields of the PF is set even by equal on the first fields in the
+ key
+ 2) All fields of the PF is set if all fields of the key is set
+ 3) At least one field in the PF is set if all fields is set
+ 4) At least one field in the PF is part of the key
+*/
+
+static void set_up_partition_key_maps(TABLE *table,
+ partition_info *part_info)
+{
+ uint keys= table->s->keys, i;
+ bool all_fields, some_fields;
+ DBUG_ENTER("set_up_partition_key_maps");
+
+ part_info->all_fields_in_PF.clear_all();
+ part_info->all_fields_in_PPF.clear_all();
+ part_info->all_fields_in_SPF.clear_all();
+ part_info->some_fields_in_PF.clear_all();
+ for (i= 0; i < keys; i++)
+ {
+ set_indicator_in_key_fields(table->key_info+i);
+ check_fields_in_PF(part_info->full_part_field_array,
+ &all_fields, &some_fields);
+ if (all_fields)
+ part_info->all_fields_in_PF.set_bit(i);
+ if (some_fields)
+ part_info->some_fields_in_PF.set_bit(i);
+ if (is_sub_partitioned(part_info))
+ {
+ check_fields_in_PF(part_info->part_field_array,
+ &all_fields, &some_fields);
+ if (all_fields)
+ part_info->all_fields_in_PPF.set_bit(i);
+ check_fields_in_PF(part_info->subpart_field_array,
+ &all_fields, &some_fields);
+ if (all_fields)
+ part_info->all_fields_in_SPF.set_bit(i);
+ }
+ clear_indicator_in_key_fields(table->key_info+i);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Set-up all function pointers for calculation of partition id,
+ subpartition id and the upper part in subpartitioning. This is to speed up
+ execution of get_partition_id which is executed once every record to be
+ written and deleted and twice for updates.
+ SYNOPSIS
+ set_up_partition_function_pointers()
+ part_info Reference to partitioning data structure
+*/
+
+static void set_up_partition_func_pointers(partition_info *part_info)
+{
+ if (is_sub_partitioned(part_info))
+ {
+ if (part_info->part_type == RANGE_PARTITION)
+ {
+ part_info->get_part_partition_id= get_partition_id_range;
+ if (part_info->list_of_subpart_fields)
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_linear_key;
+ part_info->get_subpartition_id= get_partition_id_linear_key_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_key;
+ part_info->get_subpartition_id= get_partition_id_key_sub;
+ }
+ }
+ else
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_linear_hash;
+ part_info->get_subpartition_id= get_partition_id_linear_hash_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_hash;
+ part_info->get_subpartition_id= get_partition_id_hash_sub;
+ }
+ }
+ }
+ else //LIST Partitioning
+ {
+ part_info->get_part_partition_id= get_partition_id_list;
+ if (part_info->list_of_subpart_fields)
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_linear_key;
+ part_info->get_subpartition_id= get_partition_id_linear_key_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_key;
+ part_info->get_subpartition_id= get_partition_id_key_sub;
+ }
+ }
+ else
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_linear_hash;
+ part_info->get_subpartition_id= get_partition_id_linear_hash_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_hash;
+ part_info->get_subpartition_id= get_partition_id_hash_sub;
+ }
+ }
+ }
+ }
+ else //No subpartitioning
+ {
+ part_info->get_part_partition_id= NULL;
+ part_info->get_subpartition_id= NULL;
+ if (part_info->part_type == RANGE_PARTITION)
+ part_info->get_partition_id= get_partition_id_range;
+ else if (part_info->part_type == LIST_PARTITION)
+ part_info->get_partition_id= get_partition_id_list;
+ else //HASH partitioning
+ {
+ if (part_info->list_of_part_fields)
+ {
+ if (part_info->linear_hash_ind)
+ part_info->get_partition_id= get_partition_id_linear_key_nosub;
+ else
+ part_info->get_partition_id= get_partition_id_key_nosub;
+ }
+ else
+ {
+ if (part_info->linear_hash_ind)
+ part_info->get_partition_id= get_partition_id_linear_hash_nosub;
+ else
+ part_info->get_partition_id= get_partition_id_hash_nosub;
+ }
+ }
+ }
+}
+
+
+/*
+ For linear hashing we need a mask which is on the form 2**n - 1 where
+ 2**n >= no_parts. Thus if no_parts is 6 then mask is 2**3 - 1 = 8 - 1 = 7.
+ SYNOPSIS
+ set_linear_hash_mask()
+ part_info Reference to partitioning data structure
+ no_parts Number of parts in linear hash partitioning
+*/
+
+static void set_linear_hash_mask(partition_info *part_info, uint no_parts)
+{
+ uint mask;
+ for (mask= 1; mask < no_parts; mask<<=1)
+ ;
+ part_info->linear_hash_mask= mask - 1;
+}
+
+
+/*
+ This function calculates the partition id provided the result of the hash
+ function using linear hashing parameters, mask and number of partitions.
+ SYNOPSIS
+ get_part_id_from_linear_hash()
+ hash_value Hash value calculated by HASH function or KEY function
+ mask Mask calculated previously by set_linear_hash_mask
+ no_parts Number of partitions in HASH partitioned part
+ RETURN VALUE
+ part_id The calculated partition identity (starting at 0)
+ DESCRIPTION
+ The partition is calculated according to the theory of linear hashing.
+ See e.g. Linear hashing: a new tool for file and table addressing,
+ Reprinted from VLDB-80 in Readings Database Systems, 2nd ed, M. Stonebraker
+ (ed.), Morgan Kaufmann 1994.
+*/
+
+static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask,
+ uint no_parts)
+{
+ uint32 part_id= (uint32)(hash_value & mask);
+ if (part_id >= no_parts)
+ {
+ uint new_mask= ((mask + 1) >> 1) - 1;
+ part_id= hash_value & new_mask;
+ }
+ return part_id;
+}
+
+/*
+ This function is called as part of opening the table by opening the .frm
+ file. It is a part of CREATE TABLE to do this so it is quite permissible
+ that errors due to erroneus syntax isn't found until we come here.
+ If the user has used a non-existing field in the table is one such example
+ of an error that is not discovered until here.
+ SYNOPSIS
+ fix_partition_func()
+ thd The thread object
+ name The name of the partitioned table
+ table TABLE object for which partition fields are set-up
+ RETURN VALUE
+ TRUE
+ FALSE
+ DESCRIPTION
+ The name parameter contains the full table name and is used to get the
+ database name of the table which is used to set-up a correct
+ TABLE_LIST object for use in fix_fields.
+*/
+
+bool fix_partition_func(THD *thd, const char* name, TABLE *table)
+{
+ bool result= TRUE;
+ uint dir_length, home_dir_length;
+ TABLE_LIST tables;
+ TABLE_SHARE *share= table->s;
+ char db_name_string[FN_REFLEN];
+ char* db_name;
+ partition_info *part_info= share->part_info;
+ ulong save_set_query_id= thd->set_query_id;
+ DBUG_ENTER("fix_partition_func");
+
+ thd->set_query_id= 0;
+ /*
+ Set-up the TABLE_LIST object to be a list with a single table
+ Set the object to zero to create NULL pointers and set alias
+ and real name to table name and get database name from file name.
+ */
+
+ bzero((void*)&tables, sizeof(TABLE_LIST));
+ tables.alias= tables.table_name= (char*)share->table_name;
+ tables.table= table;
+ strmov(db_name_string, name);
+ dir_length= dirname_length(db_name_string);
+ db_name_string[dir_length - 1]= 0;
+ home_dir_length= dirname_length(db_name_string);
+ db_name= &db_name_string[home_dir_length];
+ tables.db= db_name;
+
+ part_info->no_full_parts= part_info->no_parts;
+ if (is_sub_partitioned(part_info))
+ {
+ DBUG_ASSERT(part_info->subpart_type == HASH_PARTITION);
+ part_info->no_full_parts= part_info->no_parts*part_info->no_subparts;
+ /*
+ Subpartition is defined. We need to verify that subpartitioning
+ function is correct.
+ */
+ if (part_info->linear_hash_ind)
+ set_linear_hash_mask(part_info, part_info->no_subparts);
+ if (part_info->list_of_subpart_fields)
+ {
+ List_iterator<char> it(part_info->subpart_field_list);
+ if (unlikely(handle_list_of_fields(it, table, part_info, TRUE)))
+ goto end;
+ }
+ else
+ {
+ if (unlikely(fix_fields_part_func(thd, &tables,
+ part_info->subpart_expr, part_info, TRUE)))
+ goto end;
+ if (unlikely(part_info->subpart_expr->result_type() != INT_RESULT))
+ {
+ my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0),
+ "SUBPARTITION");
+ goto end;
+ }
+ }
+ }
+ DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION);
+ /*
+ Partition is defined. We need to verify that partitioning
+ function is correct.
+ */
+ if (part_info->part_type == HASH_PARTITION)
+ {
+ if (part_info->linear_hash_ind)
+ set_linear_hash_mask(part_info, part_info->no_parts);
+ if (part_info->list_of_part_fields)
+ {
+ List_iterator<char> it(part_info->part_field_list);
+ if (unlikely(handle_list_of_fields(it, table, part_info, FALSE)))
+ goto end;
+ }
+ else
+ {
+ if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr,
+ part_info, FALSE)))
+ goto end;
+ if (unlikely(part_info->part_expr->result_type() != INT_RESULT))
+ {
+ my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str);
+ goto end;
+ }
+ part_info->part_result_type= INT_RESULT;
+ }
+ }
+ else
+ {
+ char *error_str;
+ if (part_info->part_type == RANGE_PARTITION)
+ {
+ error_str= range_str;
+ if (unlikely(check_range_constants(part_info)))
+ goto end;
+ }
+ else if (part_info->part_type == LIST_PARTITION)
+ {
+ error_str= list_str;
+ if (unlikely(check_list_constants(part_info)))
+ goto end;
+ }
+ else
+ {
+ DBUG_ASSERT(0);
+ my_error(ER_INCONSISTENT_PARTITION_INFO_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely(part_info->no_parts < 1))
+ {
+ my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_str);
+ goto end;
+ }
+ if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr,
+ part_info, FALSE)))
+ goto end;
+ if (unlikely(part_info->part_expr->result_type() != INT_RESULT))
+ {
+ my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str);
+ goto end;
+ }
+ }
+ if (unlikely(create_full_part_field_array(table, part_info)))
+ goto end;
+ if (unlikely(check_primary_key(table)))
+ goto end;
+ if (unlikely((!table->file->partition_flags() & HA_CAN_PARTITION_UNIQUE) &&
+ check_unique_keys(table)))
+ goto end;
+ check_range_capable_PF(table);
+ set_up_partition_key_maps(table, part_info);
+ set_up_partition_func_pointers(part_info);
+ result= FALSE;
+end:
+ thd->set_query_id= save_set_query_id;
+ DBUG_RETURN(result);
+}
+
+
+/*
+ The code below is support routines for the reverse parsing of the
+ partitioning syntax. This feature is very useful to generate syntax for
+ all default values to avoid all default checking when opening the frm
+ file. It is also used when altering the partitioning by use of various
+ ALTER TABLE commands. Finally it is used for SHOW CREATE TABLES.
+*/
+
+static int add_write(File fptr, const char *buf, uint len)
+{
+ uint len_written= my_write(fptr, buf, len, MYF(0));
+ if (likely(len == len_written))
+ return 0;
+ else
+ return 1;
+}
+
+static int add_string(File fptr, const char *string)
+{
+ return add_write(fptr, string, strlen(string));
+}
+
+static int add_string_len(File fptr, const char *string, uint len)
+{
+ return add_write(fptr, string, len);
+}
+
+static int add_space(File fptr)
+{
+ return add_string(fptr, space_str);
+}
+
+static int add_comma(File fptr)
+{
+ return add_string(fptr, comma_str);
+}
+
+static int add_equal(File fptr)
+{
+ return add_string(fptr, equal_str);
+}
+
+static int add_end_parenthesis(File fptr)
+{
+ return add_string(fptr, end_paren_str);
+}
+
+static int add_begin_parenthesis(File fptr)
+{
+ return add_string(fptr, begin_paren_str);
+}
+
+static int add_part_key_word(File fptr, const char *key_string)
+{
+ int err= add_string(fptr, key_string);
+ err+= add_space(fptr);
+ return err + add_begin_parenthesis(fptr);
+}
+
+static int add_hash(File fptr)
+{
+ return add_part_key_word(fptr, hash_str);
+}
+
+static int add_partition(File fptr)
+{
+ strxmov(buff, part_str, space_str, NullS);
+ return add_string(fptr, buff);
+}
+
+static int add_subpartition(File fptr)
+{
+ int err= add_string(fptr, sub_str);
+ return err + add_partition(fptr);
+}
+
+static int add_partition_by(File fptr)
+{
+ strxmov(buff, part_str, space_str, by_str, space_str, NullS);
+ return add_string(fptr, buff);
+}
+
+static int add_subpartition_by(File fptr)
+{
+ int err= add_string(fptr, sub_str);
+ return err + add_partition_by(fptr);
+}
+
+static int add_key_partition(File fptr, List<char> field_list)
+{
+ uint i, no_fields;
+ int err;
+ List_iterator<char> part_it(field_list);
+ err= add_part_key_word(fptr, key_str);
+ no_fields= field_list.elements;
+ i= 0;
+ do
+ {
+ const char *field_str= part_it++;
+ err+= add_string(fptr, field_str);
+ if (i != (no_fields-1))
+ err+= add_comma(fptr);
+ } while (++i < no_fields);
+ return err;
+}
+
+static int add_int(File fptr, longlong number)
+{
+ llstr(number, buff);
+ return add_string(fptr, buff);
+}
+
+static int add_keyword_string(File fptr, const char *keyword,
+ const char *keystr)
+{
+ int err= add_string(fptr, keyword);
+ err+= add_space(fptr);
+ err+= add_equal(fptr);
+ err+= add_space(fptr);
+ err+= add_string(fptr, keystr);
+ return err + add_space(fptr);
+}
+
+static int add_keyword_int(File fptr, const char *keyword, longlong num)
+{
+ int err= add_string(fptr, keyword);
+ err+= add_space(fptr);
+ err+= add_equal(fptr);
+ err+= add_space(fptr);
+ err+= add_int(fptr, num);
+ return err + add_space(fptr);
+}
+
+static int add_engine(File fptr, enum db_type engine_type)
+{
+ const char *engine_str= ha_get_storage_engine(engine_type);
+ int err= add_string(fptr, "ENGINE = ");
+ return err + add_string(fptr, engine_str);
+ return err;
+}
+
+static int add_partition_options(File fptr, partition_element *p_elem)
+{
+ int err= 0;
+ if (p_elem->tablespace_name)
+ err+= add_keyword_string(fptr,"TABLESPACE",p_elem->tablespace_name);
+ if (p_elem->nodegroup_id != UNDEF_NODEGROUP)
+ err+= add_keyword_int(fptr,"NODEGROUP",(longlong)p_elem->nodegroup_id);
+ if (p_elem->part_max_rows)
+ err+= add_keyword_int(fptr,"MAX_ROWS",(longlong)p_elem->part_max_rows);
+ if (p_elem->part_min_rows)
+ err+= add_keyword_int(fptr,"MIN_ROWS",(longlong)p_elem->part_min_rows);
+ if (p_elem->data_file_name)
+ err+= add_keyword_string(fptr,"DATA DIRECTORY",p_elem->data_file_name);
+ if (p_elem->index_file_name)
+ err+= add_keyword_string(fptr,"INDEX DIRECTORY",p_elem->index_file_name);
+ if (p_elem->part_comment)
+ err+= add_keyword_string(fptr, "COMMENT",p_elem->part_comment);
+ return err + add_engine(fptr,p_elem->engine_type);
+}
+
+static int add_partition_values(File fptr, partition_info *part_info,
+ partition_element *p_elem)
+{
+ int err= 0;
+ if (part_info->part_type == RANGE_PARTITION)
+ {
+ err+= add_string(fptr, "VALUES LESS THAN ");
+ if (p_elem->range_expr)
+ {
+ err+= add_begin_parenthesis(fptr);
+ err+= add_int(fptr,p_elem->range_expr->val_int());
+ err+= add_end_parenthesis(fptr);
+ }
+ else
+ err+= add_string(fptr, "MAXVALUE");
+ }
+ else if (part_info->part_type == LIST_PARTITION)
+ {
+ uint i;
+ List_iterator<Item> list_expr_it(p_elem->list_expr_list);
+ err+= add_string(fptr, "VALUES IN ");
+ uint no_items= p_elem->list_expr_list.elements;
+ err+= add_begin_parenthesis(fptr);
+ i= 0;
+ do
+ {
+ Item *list_expr= list_expr_it++;
+ err+= add_int(fptr, list_expr->val_int());
+ if (i != (no_items-1))
+ err+= add_comma(fptr);
+ } while (++i < no_items);
+ err+= add_end_parenthesis(fptr);
+ }
+ return err + add_space(fptr);
+}
+
+/*
+ Generate the partition syntax from the partition data structure.
+ Useful for support of generating defaults, SHOW CREATE TABLES
+ and easy partition management.
+ SYNOPSIS
+ generate_partition_syntax()
+ part_info The partitioning data structure
+ buf_length A pointer to the returned buffer length
+ use_sql_alloc Allocate buffer from sql_alloc if true
+ otherwise use my_malloc
+ RETURN VALUES
+ NULL error
+ buf, buf_length Buffer and its length
+ DESCRIPTION
+ Here we will generate the full syntax for the given command where all
+ defaults have been expanded. By so doing the it is also possible to
+ make lots of checks of correctness while at it.
+ This could will also be reused for SHOW CREATE TABLES and also for all
+ type ALTER TABLE commands focusing on changing the PARTITION structure
+ in any fashion.
+
+ The implementation writes the syntax to a temporary file (essentially
+ an abstraction of a dynamic array) and if all writes goes well it
+ allocates a buffer and writes the syntax into this one and returns it.
+
+ As a security precaution the file is deleted before writing into it. This
+ means that no other processes on the machine can open and read the file
+ while this processing is ongoing.
+
+ The code is optimised for minimal code size since it is not used in any
+ common queries.
+*/
+
+char *generate_partition_syntax(partition_info *part_info,
+ uint *buf_length,
+ bool use_sql_alloc)
+{
+ uint i,j, no_parts, no_subparts;
+ partition_element *part_elem;
+ ulonglong buffer_length;
+ char path[FN_REFLEN];
+ int err= 0;
+ DBUG_ENTER("generate_partition_syntax");
+ File fptr;
+ char *buf= NULL; //Return buffer
+ const char *file_name;
+ sprintf(path, "%s_%lx_%lx", "part_syntax", current_pid,
+ current_thd->thread_id);
+ fn_format(path,path,mysql_tmpdir,".psy", MY_REPLACE_EXT);
+ file_name= &path[0];
+ DBUG_PRINT("info", ("File name = %s", file_name));
+ if (unlikely(((fptr= my_open(file_name,O_CREAT|O_RDWR, MYF(MY_WME))) == -1)))
+ DBUG_RETURN(NULL);
+#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2)
+#else
+ my_delete(file_name, MYF(0));
+#endif
+ err+= add_space(fptr);
+ err+= add_partition_by(fptr);
+ switch (part_info->part_type)
+ {
+ case RANGE_PARTITION:
+ err+= add_part_key_word(fptr, range_str);
+ break;
+ case LIST_PARTITION:
+ err+= add_part_key_word(fptr, list_str);
+ break;
+ case HASH_PARTITION:
+ if (part_info->linear_hash_ind)
+ err+= add_string(fptr, "LINEAR ");
+ if (part_info->list_of_part_fields)
+ err+= add_key_partition(fptr, part_info->part_field_list);
+ else
+ err+= add_hash(fptr);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ /* We really shouldn't get here, no use in continuing from here */
+ current_thd->fatal_error();
+ DBUG_RETURN(NULL);
+ }
+ if (part_info->part_expr)
+ err+= add_string_len(fptr, part_info->part_func_string,
+ part_info->part_func_len);
+ err+= add_end_parenthesis(fptr);
+ err+= add_space(fptr);
+ if (is_sub_partitioned(part_info))
+ {
+ err+= add_subpartition_by(fptr);
+ /* Must be hash partitioning for subpartitioning */
+ if (part_info->list_of_subpart_fields)
+ err+= add_key_partition(fptr, part_info->subpart_field_list);
+ else
+ err+= add_hash(fptr);
+ if (part_info->subpart_expr)
+ err+= add_string_len(fptr, part_info->subpart_func_string,
+ part_info->subpart_func_len);
+ err+= add_end_parenthesis(fptr);
+ err+= add_space(fptr);
+ }
+ err+= add_begin_parenthesis(fptr);
+ List_iterator<partition_element> part_it(part_info->partitions);
+ no_parts= part_info->no_parts;
+ no_subparts= part_info->no_subparts;
+ i= 0;
+ do
+ {
+ part_elem= part_it++;
+ err+= add_partition(fptr);
+ err+= add_string(fptr, part_elem->partition_name);
+ err+= add_space(fptr);
+ err+= add_partition_values(fptr, part_info, part_elem);
+ if (!is_sub_partitioned(part_info))
+ err+= add_partition_options(fptr, part_elem);
+ if (is_sub_partitioned(part_info))
+ {
+ err+= add_space(fptr);
+ err+= add_begin_parenthesis(fptr);
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ j= 0;
+ do
+ {
+ part_elem= sub_it++;
+ err+= add_subpartition(fptr);
+ err+= add_string(fptr, part_elem->partition_name);
+ err+= add_space(fptr);
+ err+= add_partition_options(fptr, part_elem);
+ if (j != (no_subparts-1))
+ {
+ err+= add_comma(fptr);
+ err+= add_space(fptr);
+ }
+ else
+ err+= add_end_parenthesis(fptr);
+ } while (++j < no_subparts);
+ }
+ if (i != (no_parts-1))
+ {
+ err+= add_comma(fptr);
+ err+= add_space(fptr);
+ }
+ else
+ err+= add_end_parenthesis(fptr);
+ } while (++i < no_parts);
+ if (err)
+ goto close_file;
+ buffer_length= my_seek(fptr, 0L,MY_SEEK_END,MYF(0));
+ if (unlikely(buffer_length == MY_FILEPOS_ERROR))
+ goto close_file;
+ if (unlikely(my_seek(fptr, 0L, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR))
+ goto close_file;
+ *buf_length= (uint)buffer_length;
+ if (use_sql_alloc)
+ buf= sql_alloc(*buf_length+1);
+ else
+ buf= my_malloc(*buf_length+1, MYF(MY_WME));
+ if (!buf)
+ goto close_file;
+
+ if (unlikely(my_read(fptr, buf, *buf_length, MYF(MY_FNABP))))
+ {
+ if (!use_sql_alloc)
+ my_free(buf, MYF(0));
+ else
+ buf= NULL;
+ }
+ else
+ buf[*buf_length]= 0;
+
+close_file:
+ /*
+ Delete the file before closing to ensure the file doesn't get synched
+ to disk unnecessary. We only used the file system as a dynamic array
+ implementation so we are not really interested in getting the file
+ present on disk.
+ This is not possible on Windows so here it has to be done after closing
+ the file. Also on Unix we delete immediately after opening to ensure no
+ other process can read the information written into the file.
+ */
+ my_close(fptr, MYF(0));
+#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2)
+ my_delete(file_name, MYF(0));
+#endif
+ DBUG_RETURN(buf);
+}
+
+
+/*
+ Check if partition key fields are modified and if it can be handled by the
+ underlying storage engine.
+ SYNOPSIS
+ partition_key_modified
+ table TABLE object for which partition fields are set-up
+ fields A list of the to be modifed
+ RETURN VALUES
+ TRUE Need special handling of UPDATE
+ FALSE Normal UPDATE handling is ok
+*/
+
+bool partition_key_modified(TABLE *table, List<Item> &fields)
+{
+ List_iterator_fast<Item> f(fields);
+ partition_info *part_info= table->s->part_info;
+ Item_field *item_field;
+ DBUG_ENTER("partition_key_modified");
+ if (!part_info)
+ DBUG_RETURN(FALSE);
+ if (table->file->partition_flags() & HA_CAN_UPDATE_PARTITION_KEY)
+ DBUG_RETURN(FALSE);
+ f.rewind();
+ while ((item_field=(Item_field*) f++))
+ if (item_field->field->flags & FIELD_IN_PART_FUNC_FLAG)
+ DBUG_RETURN(TRUE);
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ The next set of functions are used to calculate the partition identity.
+ A handler sets up a variable that corresponds to one of these functions
+ to be able to quickly call it whenever the partition id needs to calculated
+ based on the record in table->record[0] (or set up to fake that).
+ There are 4 functions for hash partitioning and 2 for RANGE/LIST partitions.
+ In addition there are 4 variants for RANGE subpartitioning and 4 variants
+ for LIST subpartitioning thus in total there are 14 variants of this
+ function.
+
+ We have a set of support functions for these 14 variants. There are 4
+ variants of hash functions and there is a function for each. The KEY
+ partitioning uses the function calculate_key_value to calculate the hash
+ value based on an array of fields. The linear hash variants uses the
+ method get_part_id_from_linear_hash to get the partition id using the
+ hash value and some parameters calculated from the number of partitions.
+*/
+
+/*
+ Calculate hash value for KEY partitioning using an array of fields.
+ SYNOPSIS
+ calculate_key_value()
+ field_array An array of the fields in KEY partitioning
+ RETURN VALUE
+ hash_value calculated
+ DESCRIPTION
+ Uses the hash function on the character set of the field. Integer and
+ floating point fields use the binary character set by default.
+*/
+
+static uint32 calculate_key_value(Field **field_array)
+{
+ uint32 hashnr= 0;
+ ulong nr2= 4;
+ do
+ {
+ Field *field= *field_array;
+ if (field->is_null())
+ {
+ hashnr^= (hashnr << 1) | 1;
+ }
+ else
+ {
+ uint len= field->pack_length();
+ ulong nr1= 1;
+ CHARSET_INFO *cs= field->charset();
+ cs->coll->hash_sort(cs, (uchar*)field->ptr, len, &nr1, &nr2);
+ hashnr^= (uint32)nr1;
+ }
+ } while (*(++field_array));
+ return hashnr;
+}
+
+
+/*
+ A simple support function to calculate part_id given local part and
+ sub part.
+ SYNOPSIS
+ get_part_id_for_sub()
+ loc_part_id Local partition id
+ sub_part_id Subpartition id
+ no_subparts Number of subparts
+*/
+
+inline
+static uint32 get_part_id_for_sub(uint32 loc_part_id, uint32 sub_part_id,
+ uint no_subparts)
+{
+ return (uint32)((loc_part_id * no_subparts) + sub_part_id);
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY HASH
+ SYNOPSIS
+ get_part_id_hash()
+ no_parts Number of hash partitions
+ part_expr Item tree of hash function
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_hash(uint no_parts,
+ Item *part_expr)
+{
+ DBUG_ENTER("get_part_id_hash");
+ DBUG_RETURN((uint32)(part_expr->val_int() % no_parts));
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY LINEAR HASH
+ SYNOPSIS
+ get_part_id_linear_hash()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ no_parts Number of hash partitions
+ part_expr Item tree of hash function
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_linear_hash(partition_info *part_info,
+ uint no_parts,
+ Item *part_expr)
+{
+ DBUG_ENTER("get_part_id_linear_hash");
+ DBUG_RETURN(get_part_id_from_linear_hash(part_expr->val_int(),
+ part_info->linear_hash_mask,
+ no_parts));
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY KEY
+ SYNOPSIS
+ get_part_id_key()
+ field_array Array of fields for PARTTION KEY
+ no_parts Number of KEY partitions
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_key(Field **field_array,
+ uint no_parts)
+{
+ DBUG_ENTER("get_part_id_key");
+ DBUG_RETURN(calculate_key_value(field_array) & no_parts);
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY LINEAR KEY
+ SYNOPSIS
+ get_part_id_linear_key()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ field_array Array of fields for PARTTION KEY
+ no_parts Number of KEY partitions
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_linear_key(partition_info *part_info,
+ Field **field_array,
+ uint no_parts)
+{
+ DBUG_ENTER("get_partition_id_linear_key");
+ DBUG_RETURN(get_part_id_from_linear_hash(calculate_key_value(field_array),
+ part_info->linear_hash_mask,
+ no_parts));
+}
+
+/*
+ This function is used to calculate the partition id where all partition
+ fields have been prepared to point to a record where the partition field
+ values are bound.
+ SYNOPSIS
+ get_partition_id()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ part_id The partition id is returned through this pointer
+ RETURN VALUE
+ part_id
+ return TRUE means that the fields of the partition function didn't fit
+ into any partition and thus the values of the PF-fields are not allowed.
+ DESCRIPTION
+ A routine used from write_row, update_row and delete_row from any
+ handler supporting partitioning. It is also a support routine for
+ get_partition_set used to find the set of partitions needed to scan
+ for a certain index scan or full table scan.
+
+ It is actually 14 different variants of this function which are called
+ through a function pointer.
+
+ get_partition_id_list
+ get_partition_id_range
+ get_partition_id_hash_nosub
+ get_partition_id_key_nosub
+ get_partition_id_linear_hash_nosub
+ get_partition_id_linear_key_nosub
+ get_partition_id_range_sub_hash
+ get_partition_id_range_sub_key
+ get_partition_id_range_sub_linear_hash
+ get_partition_id_range_sub_linear_key
+ get_partition_id_list_sub_hash
+ get_partition_id_list_sub_key
+ get_partition_id_list_sub_linear_hash
+ get_partition_id_list_sub_linear_key
+*/
+
+/*
+ This function is used to calculate the main partition to use in the case of
+ subpartitioning and we don't know enough to get the partition identity in
+ total.
+ SYNOPSIS
+ get_part_partition_id()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ part_id The partition id is returned through this pointer
+ RETURN VALUE
+ part_id
+ return TRUE means that the fields of the partition function didn't fit
+ into any partition and thus the values of the PF-fields are not allowed.
+ DESCRIPTION
+
+ It is actually 6 different variants of this function which are called
+ through a function pointer.
+
+ get_partition_id_list
+ get_partition_id_range
+ get_partition_id_hash_nosub
+ get_partition_id_key_nosub
+ get_partition_id_linear_hash_nosub
+ get_partition_id_linear_key_nosub
+*/
+
+
+bool get_partition_id_list(partition_info *part_info,
+ uint32 *part_id)
+{
+ DBUG_ENTER("get_partition_id_list");
+ LIST_PART_ENTRY *list_array= part_info->list_array;
+ uint list_index;
+ longlong list_value;
+ uint min_list_index= 0, max_list_index= part_info->no_list_values - 1;
+ longlong part_func_value= part_info->part_expr->val_int();
+ while (max_list_index >= min_list_index)
+ {
+ list_index= (max_list_index + min_list_index) >> 1;
+ list_value= list_array[list_index].list_value;
+ if (list_value < part_func_value)
+ min_list_index= list_index + 1;
+ else if (list_value > part_func_value)
+ max_list_index= list_index - 1;
+ else {
+ *part_id= (uint32)list_array[list_index].partition_id;
+ DBUG_RETURN(FALSE);
+ }
+ }
+ *part_id= 0;
+ DBUG_RETURN(TRUE);
+}
+
+
+bool get_partition_id_range(partition_info *part_info,
+ uint32 *part_id)
+{
+ DBUG_ENTER("get_partition_id_int_range");
+ longlong *range_array= part_info->range_int_array;
+ uint max_partition= part_info->no_parts - 1;
+ uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
+ longlong part_func_value= part_info->part_expr->val_int();
+ while (max_part_id > min_part_id)
+ {
+ loc_part_id= (max_part_id + min_part_id + 1) >> 1;
+ if (range_array[loc_part_id] < part_func_value)
+ min_part_id= loc_part_id + 1;
+ else
+ max_part_id= loc_part_id - 1;
+ }
+ loc_part_id= max_part_id;
+ if (part_func_value >= range_array[loc_part_id])
+ if (loc_part_id != max_partition)
+ loc_part_id++;
+ *part_id= (uint32)loc_part_id;
+ if (loc_part_id == max_partition)
+ if (range_array[loc_part_id] != LONGLONG_MAX)
+ if (part_func_value >= range_array[loc_part_id])
+ DBUG_RETURN(TRUE);
+ DBUG_RETURN(FALSE);
+}
+
+bool get_partition_id_hash_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_hash(part_info->no_parts, part_info->part_expr);
+ return FALSE;
+}
+
+
+bool get_partition_id_linear_hash_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_linear_hash(part_info, part_info->no_parts,
+ part_info->part_expr);
+ return FALSE;
+}
+
+
+bool get_partition_id_key_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_key(part_info->part_field_array, part_info->no_parts);
+ return FALSE;
+}
+
+
+bool get_partition_id_linear_key_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_linear_key(part_info,
+ part_info->part_field_array,
+ part_info->no_parts);
+ return FALSE;
+}
+
+
+bool get_partition_id_range_sub_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_hash");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_range_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_linear_hash");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_linear_hash(part_info, no_subparts,
+ part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_range_sub_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_key");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_range_sub_linear_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_linear_key");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_linear_key(part_info,
+ part_info->subpart_field_array,
+ no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_list_sub_hash");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_list_sub_linear_hash");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_key");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_linear_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_list_sub_linear_key");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_linear_key(part_info,
+ part_info->subpart_field_array,
+ no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ This function is used to calculate the subpartition id
+ SYNOPSIS
+ get_subpartition_id()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ RETURN VALUE
+ part_id
+ The subpartition identity
+ DESCRIPTION
+ A routine used in some SELECT's when only partial knowledge of the
+ partitions is known.
+
+ It is actually 4 different variants of this function which are called
+ through a function pointer.
+
+ get_partition_id_hash_sub
+ get_partition_id_key_sub
+ get_partition_id_linear_hash_sub
+ get_partition_id_linear_key_sub
+*/
+
+uint32 get_partition_id_hash_sub(partition_info *part_info)
+{
+ return get_part_id_hash(part_info->no_subparts, part_info->subpart_expr);
+}
+
+
+uint32 get_partition_id_linear_hash_sub(partition_info *part_info)
+{
+ return get_part_id_linear_hash(part_info, part_info->no_subparts,
+ part_info->subpart_expr);
+}
+
+
+uint32 get_partition_id_key_sub(partition_info *part_info)
+{
+ return get_part_id_key(part_info->subpart_field_array,
+ part_info->no_subparts);
+}
+
+
+uint32 get_partition_id_linear_key_sub(partition_info *part_info)
+{
+ return get_part_id_linear_key(part_info,
+ part_info->subpart_field_array,
+ part_info->no_subparts);
+}
+
+
+/*
+ Set an indicator on all partition fields that are set by the key
+ SYNOPSIS
+ set_PF_fields_in_key()
+ key_info Information about the index
+ key_length Length of key
+ RETURN VALUE
+ TRUE Found partition field set by key
+ FALSE No partition field set by key
+*/
+
+static bool set_PF_fields_in_key(KEY *key_info, uint key_length)
+{
+ KEY_PART_INFO *key_part;
+ bool found_part_field= FALSE;
+ DBUG_ENTER("set_PF_fields_in_key");
+
+ for (key_part= key_info->key_part; (int)key_length > 0; key_part++)
+ {
+ if (key_part->null_bit)
+ key_length--;
+ if (key_part->type == HA_KEYTYPE_BIT)
+ {
+ if (((Field_bit*)key_part->field)->bit_len)
+ key_length--;
+ }
+ if (key_part->key_part_flag & (HA_BLOB_PART + HA_VAR_LENGTH_PART))
+ {
+ key_length-= HA_KEY_BLOB_LENGTH;
+ }
+ if (key_length < key_part->length)
+ break;
+ key_length-= key_part->length;
+ if (key_part->field->flags & FIELD_IN_PART_FUNC_FLAG)
+ {
+ found_part_field= TRUE;
+ key_part->field->flags|= GET_FIXED_FIELDS_FLAG;
+ }
+ }
+ DBUG_RETURN(found_part_field);
+}
+
+
+/*
+ We have found that at least one partition field was set by a key, now
+ check if a partition function has all its fields bound or not.
+ SYNOPSIS
+ check_part_func_bound()
+ ptr Array of fields NULL terminated (partition fields)
+ RETURN VALUE
+ TRUE All fields in partition function are set
+ FALSE Not all fields in partition function are set
+*/
+
+static bool check_part_func_bound(Field **ptr)
+{
+ bool result= TRUE;
+ DBUG_ENTER("check_part_func_bound");
+
+ for (; *ptr; ptr++)
+ {
+ if (!((*ptr)->flags & GET_FIXED_FIELDS_FLAG))
+ {
+ result= FALSE;
+ break;
+ }
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Get the id of the subpartitioning part by using the key buffer of the
+ index scan.
+ SYNOPSIS
+ get_sub_part_id_from_key()
+ table The table object
+ buf A buffer that can be used to evaluate the partition function
+ key_info The index object
+ key_spec A key_range containing key and key length
+ RETURN VALUES
+ part_id Subpartition id to use
+ DESCRIPTION
+ Use key buffer to set-up record in buf, move field pointers and
+ get the partition identity and restore field pointers afterwards.
+*/
+
+static uint32 get_sub_part_id_from_key(const TABLE *table,byte *buf,
+ KEY *key_info,
+ const key_range *key_spec)
+{
+ byte *rec0= table->record[0];
+ partition_info *part_info= table->s->part_info;
+ uint32 part_id;
+ DBUG_ENTER("get_sub_part_id_from_key");
+
+ key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
+ if (likely(rec0 == buf))
+ part_id= part_info->get_subpartition_id(part_info);
+ else
+ {
+ Field **part_field_array= part_info->subpart_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ part_id= part_info->get_subpartition_id(part_info);
+ set_field_ptr(part_field_array, rec0, buf);
+ }
+ DBUG_RETURN(part_id);
+}
+
+/*
+ Get the id of the partitioning part by using the key buffer of the
+ index scan.
+ SYNOPSIS
+ get_part_id_from_key()
+ table The table object
+ buf A buffer that can be used to evaluate the partition function
+ key_info The index object
+ key_spec A key_range containing key and key length
+ part_id Partition to use
+ RETURN VALUES
+ TRUE Partition to use not found
+ FALSE Ok, part_id indicates partition to use
+ DESCRIPTION
+ Use key buffer to set-up record in buf, move field pointers and
+ get the partition identity and restore field pointers afterwards.
+*/
+bool get_part_id_from_key(const TABLE *table, byte *buf, KEY *key_info,
+ const key_range *key_spec, uint32 *part_id)
+{
+ bool result;
+ byte *rec0= table->record[0];
+ partition_info *part_info= table->s->part_info;
+ DBUG_ENTER("get_part_id_from_key");
+
+ key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
+ if (likely(rec0 == buf))
+ result= part_info->get_part_partition_id(part_info, part_id);
+ else
+ {
+ Field **part_field_array= part_info->part_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ result= part_info->get_part_partition_id(part_info, part_id);
+ set_field_ptr(part_field_array, rec0, buf);
+ }
+ DBUG_RETURN(result);
+}
+
+/*
+ Get the partitioning id of the full PF by using the key buffer of the
+ index scan.
+ SYNOPSIS
+ get_full_part_id_from_key()
+ table The table object
+ buf A buffer that is used to evaluate the partition function
+ key_info The index object
+ key_spec A key_range containing key and key length
+ part_spec A partition id containing start part and end part
+ RETURN VALUES
+ part_spec
+ No partitions to scan is indicated by end_part > start_part when returning
+ DESCRIPTION
+ Use key buffer to set-up record in buf, move field pointers if needed and
+ get the partition identity and restore field pointers afterwards.
+*/
+
+void get_full_part_id_from_key(const TABLE *table, byte *buf,
+ KEY *key_info,
+ const key_range *key_spec,
+ part_id_range *part_spec)
+{
+ bool result;
+ partition_info *part_info= table->s->part_info;
+ byte *rec0= table->record[0];
+ DBUG_ENTER("get_full_part_id_from_key");
+
+ key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
+ if (likely(rec0 == buf))
+ result= part_info->get_partition_id(part_info, &part_spec->start_part);
+ else
+ {
+ Field **part_field_array= part_info->full_part_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ result= part_info->get_partition_id(part_info, &part_spec->start_part);
+ set_field_ptr(part_field_array, rec0, buf);
+ }
+ part_spec->end_part= part_spec->start_part;
+ if (unlikely(result))
+ part_spec->start_part++;
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Get the set of partitions to use in query.
+ SYNOPSIS
+ get_partition_set()
+ table The table object
+ buf A buffer that can be used to evaluate the partition function
+ index The index of the key used, if MAX_KEY no index used
+ key_spec A key_range containing key and key length
+ part_spec Contains start part, end part and indicator if bitmap is
+ used for which partitions to scan
+ DESCRIPTION
+ This function is called to discover which partitions to use in an index
+ scan or a full table scan.
+ It returns a range of partitions to scan. If there are holes in this
+ range with partitions that are not needed to scan a bit array is used
+ to signal which partitions to use and which not to use.
+ If start_part > end_part at return it means no partition needs to be
+ scanned. If start_part == end_part it always means a single partition
+ needs to be scanned.
+ RETURN VALUE
+ part_spec
+*/
+void get_partition_set(const TABLE *table, byte *buf, const uint index,
+ const key_range *key_spec, part_id_range *part_spec)
+{
+ partition_info *part_info= table->s->part_info;
+ uint no_parts= part_info->no_full_parts, i, part_id;
+ uint sub_part= no_parts, part_part= no_parts;
+ KEY *key_info= NULL;
+ bool found_part_field= FALSE;
+ DBUG_ENTER("get_partition_set");
+
+ part_spec->use_bit_array= FALSE;
+ part_spec->start_part= 0;
+ part_spec->end_part= no_parts - 1;
+ if ((index < MAX_KEY) &&
+ key_spec->flag == (uint)HA_READ_KEY_EXACT &&
+ part_info->some_fields_in_PF.is_set(index))
+ {
+ key_info= table->key_info+index;
+ /*
+ The index can potentially provide at least one PF-field (field in the
+ partition function). Thus it is interesting to continue our probe.
+ */
+ if (key_spec->length == key_info->key_length)
+ {
+ /*
+ The entire key is set so we can check whether we can immediately
+ derive either the complete PF or if we can derive either
+ the top PF or the subpartitioning PF. This can be established by
+ checking precalculated bits on each index.
+ */
+ if (part_info->all_fields_in_PF.is_set(index))
+ {
+ /*
+ We can derive the exact partition to use, no more than this one
+ is needed.
+ */
+ get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec);
+ DBUG_VOID_RETURN;
+ }
+ else if (is_sub_partitioned(part_info))
+ {
+ if (part_info->all_fields_in_SPF.is_set(index))
+ sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec);
+ else if (part_info->all_fields_in_PPF.is_set(index))
+ {
+ if (get_part_id_from_key(table,buf,key_info,key_spec,&part_part))
+ {
+ /*
+ The value of the RANGE or LIST partitioning was outside of
+ allowed values. Thus it is certain that the result of this
+ scan will be empty.
+ */
+ part_spec->start_part= no_parts;
+ DBUG_VOID_RETURN;
+ }
+ }
+ }
+ }
+ else
+ {
+ /*
+ Set an indicator on all partition fields that are bound.
+ If at least one PF-field was bound it pays off to check whether
+ the PF or PPF or SPF has been bound.
+ (PF = Partition Function, SPF = Subpartition Function and
+ PPF = Partition Function part of subpartitioning)
+ */
+ if ((found_part_field= set_PF_fields_in_key(key_info,
+ key_spec->length)))
+ {
+ if (check_part_func_bound(part_info->full_part_field_array))
+ {
+ /*
+ We were able to bind all fields in the partition function even
+ by using only a part of the key. Calculate the partition to use.
+ */
+ get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec);
+ clear_indicator_in_key_fields(key_info);
+ DBUG_VOID_RETURN;
+ }
+ else if (check_part_func_bound(part_info->part_field_array))
+ sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec);
+ else if (check_part_func_bound(part_info->subpart_field_array))
+ {
+ if (get_part_id_from_key(table,buf,key_info,key_spec,&part_part))
+ {
+ part_spec->start_part= no_parts;
+ clear_indicator_in_key_fields(key_info);
+ DBUG_VOID_RETURN;
+ }
+ }
+ }
+ }
+ }
+ {
+ /*
+ The next step is to analyse the table condition to see whether any
+ information about which partitions to scan can be derived from there.
+ Currently not implemented.
+ */
+ }
+ /*
+ If we come here we have found a range of sorts we have either discovered
+ nothing or we have discovered a range of partitions with possible holes
+ in it. We need a bitvector to further the work here.
+ */
+ if (!(part_part == no_parts && sub_part == no_parts))
+ {
+ /*
+ We can only arrive here if we are using subpartitioning.
+ */
+ if (part_part != no_parts)
+ {
+ /*
+ We know the top partition and need to scan all underlying
+ subpartitions. This is a range without holes.
+ */
+ DBUG_ASSERT(sub_part == no_parts);
+ part_spec->start_part= part_part * part_info->no_parts;
+ part_spec->end_part= part_spec->start_part+part_info->no_subparts - 1;
+ }
+ else
+ {
+ DBUG_ASSERT(sub_part != no_parts);
+ part_spec->use_bit_array= TRUE;
+ part_spec->start_part= sub_part;
+ part_spec->end_part=sub_part+
+ (part_info->no_subparts*(part_info->no_parts-1));
+ for (i= 0, part_id= sub_part; i < part_info->no_parts;
+ i++, part_id+= part_info->no_subparts)
+ ; //Set bit part_id in bit array
+ }
+ }
+ if (found_part_field)
+ clear_indicator_in_key_fields(key_info);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ If the table is partitioned we will read the partition info into the
+ .frm file here.
+ -------------------------------
+ | Fileinfo 64 bytes |
+ -------------------------------
+ | Formnames 7 bytes |
+ -------------------------------
+ | Not used 4021 bytes |
+ -------------------------------
+ | Keyinfo + record |
+ -------------------------------
+ | Padded to next multiple |
+ | of IO_SIZE |
+ -------------------------------
+ | Forminfo 288 bytes |
+ -------------------------------
+ | Screen buffer, to make |
+ | field names readable |
+ -------------------------------
+ | Packed field info |
+ | 17 + 1 + strlen(field_name) |
+ | + 1 end of file character |
+ -------------------------------
+ | Partition info |
+ -------------------------------
+ We provide the length of partition length in Fileinfo[55-58].
+
+ Read the partition syntax from the frm file and parse it to get the
+ data structures of the partitioning.
+ SYNOPSIS
+ mysql_unpack_partition()
+ file File reference of frm file
+ thd Thread object
+ part_info_len Length of partition syntax
+ table Table object of partitioned table
+ RETURN VALUE
+ TRUE Error
+ FALSE Sucess
+ DESCRIPTION
+ Read the partition syntax from the current position in the frm file.
+ Initiate a LEX object, save the list of item tree objects to free after
+ the query is done. Set-up partition info object such that parser knows
+ it is called from internally. Call parser to create data structures
+ (best possible recreation of item trees and so forth since there is no
+ serialisation of these objects other than in parseable text format).
+ We need to save the text of the partition functions since it is not
+ possible to retrace this given an item tree.
+*/
+
+bool mysql_unpack_partition(File file, THD *thd, uint part_info_len,
+ TABLE* table)
+{
+ Item *thd_free_list= thd->free_list;
+ bool result= TRUE;
+ uchar* part_buf= NULL;
+ partition_info *part_info;
+ LEX *old_lex= thd->lex, lex;
+ DBUG_ENTER("mysql_unpack_partition");
+ if (read_string(file, (gptr*)&part_buf, part_info_len))
+ DBUG_RETURN(result);
+ thd->lex= &lex;
+ lex_start(thd, part_buf, part_info_len);
+ /*
+ We need to use the current SELECT_LEX since I need to keep the
+ Name_resolution_context object which is referenced from the
+ Item_field objects.
+ This is not a nice solution since if the parser uses current_select
+ for anything else it will corrupt the current LEX object.
+ */
+ thd->lex->current_select= old_lex->current_select;
+ /*
+ All Items created is put into a free list on the THD object. This list
+ is used to free all Item objects after completing a query. We don't
+ want that to happen with the Item tree created as part of the partition
+ info. This should be attached to the table object and remain so until
+ the table object is released.
+ Thus we move away the current list temporarily and start a new list that
+ we then save in the partition info structure.
+ */
+ thd->free_list= NULL;
+ lex.part_info= (partition_info*)1; //Indicate yyparse from this place
+ if (yyparse((void*)thd) || thd->is_fatal_error)
+ {
+ free_items(thd->free_list);
+ goto end;
+ }
+ part_info= lex.part_info;
+ table->s->part_info= part_info;
+ part_info->item_free_list= thd->free_list;
+
+ {
+ /*
+ This code part allocates memory for the serialised item information for
+ the partition functions. In most cases this is not needed but if the
+ table is used for SHOW CREATE TABLES or ALTER TABLE that modifies
+ partition information it is needed and the info is lost if we don't
+ save it here so unfortunately we have to do it here even if in most
+ cases it is not needed. This is a consequence of that item trees are
+ not serialisable.
+ */
+ uint part_func_len= part_info->part_func_len;
+ uint subpart_func_len= part_info->subpart_func_len;
+ char *part_func_string, *subpart_func_string= NULL;
+ if (!((part_func_string= sql_alloc(part_func_len))) ||
+ (subpart_func_len &&
+ !((subpart_func_string= sql_alloc(subpart_func_len)))))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), part_func_len);
+ free_items(thd->free_list);
+ part_info->item_free_list= 0;
+ goto end;
+ }
+ memcpy(part_func_string, part_info->part_func_string, part_func_len);
+ if (subpart_func_len)
+ memcpy(subpart_func_string, part_info->subpart_func_string,
+ subpart_func_len);
+ part_info->part_func_string= part_func_string;
+ part_info->subpart_func_string= subpart_func_string;
+ }
+
+ result= FALSE;
+end:
+ thd->free_list= thd_free_list;
+ x_free((gptr)part_buf);
+ thd->lex= old_lex;
+ DBUG_RETURN(result);
+}
+#endif
+
+/*
+ Prepare for calling val_int on partition function by setting fields to
+ point to the record where the values of the PF-fields are stored.
+ SYNOPSIS
+ set_field_ptr()
+ ptr Array of fields to change ptr
+ new_buf New record pointer
+ old_buf Old record pointer
+ DESCRIPTION
+ Set ptr in field objects of field array to refer to new_buf record
+ instead of previously old_buf. Used before calling val_int and after
+ it is used to restore pointers to table->record[0].
+ This routine is placed outside of partition code since it can be useful
+ also for other programs.
+*/
+
+void set_field_ptr(Field **ptr, const byte *new_buf,
+ const byte *old_buf)
+{
+ my_ptrdiff_t diff= (new_buf - old_buf);
+ DBUG_ENTER("set_nullable_field_ptr");
+
+ do
+ {
+ (*ptr)->move_field(diff);
+ } while (*(++ptr));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Prepare for calling val_int on partition function by setting fields to
+ point to the record where the values of the PF-fields are stored.
+ This variant works on a key_part reference.
+ It is not required that all fields are NOT NULL fields.
+ SYNOPSIS
+ set_key_field_ptr()
+ key_part key part with a set of fields to change ptr
+ new_buf New record pointer
+ old_buf Old record pointer
+ DESCRIPTION
+ Set ptr in field objects of field array to refer to new_buf record
+ instead of previously old_buf. Used before calling val_int and after
+ it is used to restore pointers to table->record[0].
+ This routine is placed outside of partition code since it can be useful
+ also for other programs.
+*/
+
+void set_key_field_ptr(KEY *key_info, const byte *new_buf,
+ const byte *old_buf)
+{
+ KEY_PART_INFO *key_part= key_info->key_part;
+ uint key_parts= key_info->key_parts, i= 0;
+ my_ptrdiff_t diff= (new_buf - old_buf);
+ DBUG_ENTER("set_key_field_ptr");
+
+ do
+ {
+ key_part->field->move_field(diff);
+ key_part++;
+ } while (++i < key_parts);
+ DBUG_VOID_RETURN;
+}
+
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index bc45c5fa3be..c54efe531ad 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1277,6 +1277,9 @@ JOIN::exec()
/* Copy data to the temporary table */
thd->proc_info= "Copying to tmp table";
DBUG_PRINT("info", ("%s", thd->proc_info));
+ if (!curr_join->sort_and_group &&
+ curr_join->const_tables != curr_join->tables)
+ curr_join->join_tab[curr_join->const_tables].sorted= 0;
if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0)))
{
error= tmp_error;
@@ -1423,6 +1426,9 @@ JOIN::exec()
1, TRUE))
DBUG_VOID_RETURN;
curr_join->group_list= 0;
+ if (!curr_join->sort_and_group &&
+ curr_join->const_tables != curr_join->tables)
+ curr_join->join_tab[curr_join->const_tables].sorted= 0;
if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
(tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
0)))
@@ -1608,6 +1614,16 @@ JOIN::exec()
(select_options & OPTION_FOUND_ROWS ?
HA_POS_ERROR : unit->select_limit_cnt)))
DBUG_VOID_RETURN;
+ if (curr_join->const_tables != curr_join->tables &&
+ !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache)
+ {
+ /*
+ If no IO cache exists for the first table then we are using an
+ INDEX SCAN and no filesort. Thus we should not remove the sorted
+ attribute on the INDEX SCAN.
+ */
+ skip_sort_order= 1;
+ }
}
}
/* XXX: When can we have here thd->net.report_error not zero? */
@@ -5659,6 +5675,7 @@ make_join_readinfo(JOIN *join, uint options)
uint i;
bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
+ bool sorted= 1;
DBUG_ENTER("make_join_readinfo");
for (i=join->const_tables ; i < join->tables ; i++)
@@ -5668,6 +5685,8 @@ make_join_readinfo(JOIN *join, uint options)
tab->read_record.table= table;
tab->read_record.file=table->file;
tab->next_select=sub_select; /* normal select */
+ tab->sorted= sorted;
+ sorted= 0; // only first must be sorted
switch (tab->type) {
case JT_SYSTEM: // Only happens with left join
table->status=STATUS_NO_RECORD;
@@ -8915,7 +8934,12 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
new_table.file->extra(HA_EXTRA_WRITE_CACHE);
#endif
- /* copy all old rows */
+ /*
+ copy all old rows from heap table to MyISAM table
+ This is the only code that uses record[1] to read/write but this
+ is safe as this is a temporary MyISAM table without timestamp/autoincrement
+ or partitioning.
+ */
while (!table->file->rnd_next(new_table.record[1]))
{
if ((write_err=new_table.file->write_row(new_table.record[1])))
@@ -9046,7 +9070,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
empty_record(table);
if (table->group && join->tmp_table_param.sum_func_count &&
table->s->keys && !table->file->inited)
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 0);
}
/* Set up select_end */
join->join_tab[join->tables-1].next_select= setup_end_select_func(join);
@@ -9660,7 +9684,13 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
table->file->extra(HA_EXTRA_KEYREAD);
tab->index= tab->ref.key;
}
- if ((error=join_read_const(tab)))
+ error=join_read_const(tab);
+ if (table->key_read)
+ {
+ table->key_read=0;
+ table->file->extra(HA_EXTRA_NO_KEYREAD);
+ }
+ if (error)
{
tab->info="unique row not found";
/* Mark for EXPLAIN that the row was not found */
@@ -9668,11 +9698,6 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
if (!table->maybe_null || error > 0)
DBUG_RETURN(error);
}
- if (table->key_read)
- {
- table->key_read=0;
- table->file->extra(HA_EXTRA_NO_KEYREAD);
- }
}
if (*tab->on_expr_ref && !table->null_row)
{
@@ -9744,7 +9769,7 @@ join_read_const(JOIN_TAB *tab)
table->status= STATUS_NOT_FOUND;
mark_as_null_row(tab->table);
empty_record(table);
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
return -1;
}
@@ -9767,7 +9792,9 @@ join_read_key(JOIN_TAB *tab)
TABLE *table= tab->table;
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ {
+ table->file->ha_index_init(tab->ref.key, tab->sorted);
+ }
if (cmp_buffer_with_ref(tab) ||
(table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
{
@@ -9779,7 +9806,7 @@ join_read_key(JOIN_TAB *tab)
error=table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT);
- if (error && error != HA_ERR_KEY_NOT_FOUND)
+ if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
}
table->null_row=0;
@@ -9794,14 +9821,16 @@ join_read_always_key(JOIN_TAB *tab)
TABLE *table= tab->table;
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ {
+ table->file->ha_index_init(tab->ref.key, tab->sorted);
+ }
if (cp_buffer_from_ref(tab->join->thd, &tab->ref))
return -1;
if ((error=table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT)))
{
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
return -1; /* purecov: inspected */
}
@@ -9821,14 +9850,14 @@ join_read_last_key(JOIN_TAB *tab)
TABLE *table= tab->table;
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, tab->sorted);
if (cp_buffer_from_ref(tab->join->thd, &tab->ref))
return -1;
if ((error=table->file->index_read_last(table->record[0],
tab->ref.key_buff,
tab->ref.key_length)))
{
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
return -1; /* purecov: inspected */
}
@@ -9931,7 +9960,7 @@ join_read_first(JOIN_TAB *tab)
tab->read_record.index=tab->index;
tab->read_record.record=table->record[0];
if (!table->file->inited)
- table->file->ha_index_init(tab->index);
+ table->file->ha_index_init(tab->index, tab->sorted);
if ((error=tab->table->file->index_first(tab->table->record[0])))
{
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
@@ -9970,7 +9999,7 @@ join_read_last(JOIN_TAB *tab)
tab->read_record.index=tab->index;
tab->read_record.record=table->record[0];
if (!table->file->inited)
- table->file->ha_index_init(tab->index);
+ table->file->ha_index_init(tab->index, 1);
if ((error= tab->table->file->index_last(tab->table->record[0])))
return report_error(table, error);
return 0;
@@ -9994,7 +10023,7 @@ join_ft_read_first(JOIN_TAB *tab)
TABLE *table= tab->table;
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, 1);
#if NOT_USED_YET
if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) // as ft-key doesn't use store_key's
return -1; // see also FT_SELECT::init()
@@ -10380,7 +10409,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
error, 0))
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
/* Change method to update rows */
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 0);
join->join_tab[join->tables-1].next_select=end_unique_update;
}
join->send_records++;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 9285e33be33..ec424c48366 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -133,6 +133,7 @@ typedef struct st_join_table {
uint used_fields,used_fieldlength,used_blobs;
enum join_type type;
bool cached_eq_ref_table,eq_ref_table,not_used_in_distinct;
+ bool sorted;
TABLE_REF ref;
JOIN_CACHE cache;
JOIN *join;
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 8343f9ec582..e786b70114d 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -963,11 +963,16 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet)
packet->append("\n)", 2);
if (!(thd->variables.sql_mode & MODE_NO_TABLE_OPTIONS) && !foreign_db_mode)
{
- if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40))
- packet->append(" TYPE=", 6);
- else
- packet->append(" ENGINE=", 8);
- packet->append(file->table_type());
+#ifdef HAVE_PARTITION_DB
+ if (!table->s->part_info)
+#endif
+ {
+ if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40))
+ packet->append(" TYPE=", 6);
+ else
+ packet->append(" ENGINE=", 8);
+ packet->append(file->table_type());
+ }
if (share->table_charset &&
!(thd->variables.sql_mode & MODE_MYSQL323) &&
@@ -1034,6 +1039,23 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet)
append_directory(thd, packet, "DATA", create_info.data_file_name);
append_directory(thd, packet, "INDEX", create_info.index_file_name);
}
+#ifdef HAVE_PARTITION_DB
+ {
+ /*
+ Partition syntax for CREATE TABLE is at the end of the syntax.
+ */
+ uint part_syntax_len;
+ char *part_syntax;
+ if (table->s->part_info &&
+ ((part_syntax= generate_partition_syntax(table->s->part_info,
+ &part_syntax_len,
+ FALSE))))
+ {
+ packet->append(part_syntax, part_syntax_len);
+ my_free(part_syntax, MYF(0));
+ }
+ }
+#endif
DBUG_RETURN(0);
}
@@ -2728,7 +2750,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond)
{
DBUG_RETURN(1);
}
- proc_table->file->ha_index_init(0);
+ proc_table->file->ha_index_init(0, 1);
if ((res= proc_table->file->index_first(proc_table->record[0])))
{
res= (res == HA_ERR_END_OF_FILE) ? 0 : 1;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 1d7f3ca87be..21348908bf3 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -28,6 +28,7 @@
#include <io.h>
#endif
+
const char *primary_key_name="PRIMARY";
static bool check_if_keyname_exists(const char *name,KEY *start, KEY *end);
@@ -1513,7 +1514,66 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
if (create_info->row_type == ROW_TYPE_DYNAMIC)
db_options|=HA_OPTION_PACK_RECORD;
alias= table_case_name(create_info, table_name);
- file=get_new_handler((TABLE*) 0, create_info->db_type);
+ if (!(file=get_new_handler((TABLE*) 0, create_info->db_type)))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), 128);//128 bytes invented
+ DBUG_RETURN(TRUE);
+ }
+#ifdef HAVE_PARTITION_DB
+ partition_info *part_info= thd->lex->part_info;
+ if (part_info)
+ {
+ /*
+ The table has been specified as a partitioned table.
+ If this is part of an ALTER TABLE the handler will be the partition
+ handler but we need to specify the default handler to use for
+ partitions also in the call to check_partition_info. We transport
+ this information in the default_db_type variable, it is either
+ DB_TYPE_DEFAULT or the engine set in the ALTER TABLE command.
+ */
+ enum db_type part_engine_type= create_info->db_type;
+ char *part_syntax_buf;
+ uint syntax_len;
+ if (part_engine_type == DB_TYPE_PARTITION_DB)
+ {
+ /*
+ This only happens at ALTER TABLE.
+ default_engine_type was assigned from the engine set in the ALTER
+ TABLE command.
+ */
+ part_engine_type= ha_checktype(thd,
+ part_info->default_engine_type, 0, 0);
+ }
+ if (check_partition_info(part_info, part_engine_type,
+ file, create_info->max_rows))
+ DBUG_RETURN(TRUE);
+ /*
+ We reverse the partitioning parser and generate a standard format
+ for syntax stored in frm file.
+ */
+ if (!(part_syntax_buf= generate_partition_syntax(part_info,
+ &syntax_len,
+ TRUE)))
+ DBUG_RETURN(TRUE);
+ part_info->part_info_string= part_syntax_buf;
+ part_info->part_info_len= syntax_len;
+ if ((!(file->partition_flags() & HA_CAN_PARTITION)) ||
+ create_info->db_type == DB_TYPE_PARTITION_DB)
+ {
+ /*
+ The handler assigned to the table cannot handle partitioning.
+ Assign the partition handler as the handler of the table.
+ */
+ DBUG_PRINT("info", ("db_type= %d, part_flag= %d", create_info->db_type,file->partition_flags()));
+ delete file;
+ create_info->db_type= DB_TYPE_PARTITION_DB;
+ if (!(file= get_ha_partition(part_info)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ }
+ }
+#endif
#ifdef NOT_USED
/*
@@ -1527,7 +1587,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
(file->table_flags() & HA_NO_TEMP_TABLES))
{
my_error(ER_ILLEGAL_HA, MYF(0), table_name);
- DBUG_RETURN(TRUE);
+ goto err;
}
#endif
@@ -1550,7 +1610,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
&keys, internal_tmp_table, &db_options, file,
&key_info_buffer, &key_count,
select_field_count))
- DBUG_RETURN(TRUE);
+ goto err;
/* Check if table exists */
if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
@@ -1572,13 +1632,13 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS)
{
create_info->table_existed= 1; // Mark that table existed
- DBUG_RETURN(FALSE);
+ goto no_err;
}
my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alias);
- DBUG_RETURN(TRUE);
+ goto err;
}
if (wait_if_global_read_lock(thd, 0, 1))
- DBUG_RETURN(error);
+ goto err;
VOID(pthread_mutex_lock(&LOCK_open));
if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE))
{
@@ -1631,7 +1691,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
create_info->table_options=db_options;
if (rea_create_table(thd, path, create_info, fields, key_count,
- key_info_buffer))
+ key_info_buffer, file))
{
/* my_error(ER_CANT_CREATE_TABLE,MYF(0),table_name,my_errno); */
goto end;
@@ -1660,6 +1720,13 @@ end:
delete file;
thd->proc_info="After create";
DBUG_RETURN(error);
+
+err:
+ delete file;
+ DBUG_RETURN(TRUE);
+no_err:
+ delete file;
+ DBUG_RETURN(FALSE);
}
/*
@@ -3138,6 +3205,59 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
old_db_type= table->s->db_type;
if (create_info->db_type == DB_TYPE_DEFAULT)
create_info->db_type= old_db_type;
+#ifdef HAVE_PARTITION_DB
+ /*
+ When thd->lex->part_info has a reference to a partition_info the
+ ALTER TABLE contained a definition of a partitioning.
+
+ Case I:
+ If there was a partition before and there is a new one defined.
+ We use the new partitioning. The new partitioning is already
+ defined in the correct variable so no work is needed to
+ accomplish this.
+
+ Case IIa:
+ There was a partitioning before and there is no new one defined.
+ Also the user has not specified an explicit engine to use.
+
+ We use the old partitioning also for the new table. We do this
+ by assigning the partition_info from the table loaded in
+ open_ltable to the partition_info struct used by mysql_create_table
+ later in this method.
+
+ Case IIb:
+ There was a partitioning before and there is no new one defined.
+ The user has specified an explicit engine to use.
+
+ Since the user has specified an explicit engine to use we override
+ the old partitioning info and create a new table using the specified
+ engine. This is the reason for the extra check if old and new engine
+ is equal.
+
+ Case III:
+ There was no partitioning before altering the table, there is
+ partitioning defined in the altered table. Use the new partitioning.
+ No work needed since the partitioning info is already in the
+ correct variable.
+
+ Case IV:
+ There was no partitioning before and no partitioning defined. Obviously
+ no work needed.
+ */
+ if (table->s->part_info)
+ if (!thd->lex->part_info &&
+ create_info->db_type == old_db_type)
+ thd->lex->part_info= table->s->part_info;
+ if (thd->lex->part_info)
+ {
+ /*
+ Need to cater for engine types that can handle partition without
+ using the partition handler.
+ */
+ thd->lex->part_info->default_engine_type= create_info->db_type;
+ create_info->db_type= DB_TYPE_PARTITION_DB;
+ }
+#endif
if (check_engine(thd, new_name, &create_info->db_type))
DBUG_RETURN(TRUE);
new_db_type= create_info->db_type;
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 5c6324e15fd..95d0f500df8 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -148,7 +148,7 @@ int mysql_update(THD *thd,
/* pass counter value */
thd->lex->table_count= table_count;
/* convert to multiupdate */
- return 2;
+ DBUG_RETURN(2);
}
if (lock_tables(thd, table_list, table_count) ||
@@ -265,7 +265,12 @@ int mysql_update(THD *thd,
else
used_key_is_modified=0;
+#ifdef HAVE_PARTITION_DB
+ if (used_key_is_modified || order ||
+ partition_key_modified(table, fields))
+#else
if (used_key_is_modified || order)
+#endif
{
/*
We can't update table directly; We must first search after all
@@ -452,8 +457,8 @@ int mysql_update(THD *thd,
call then it should be included in the count of dup_key_found
and error should be set to 0 (only if these errors are ignored).
*/
- error= table->file->bulk_update_row(table->record[0],
- table->record[1],
+ error= table->file->bulk_update_row(table->record[1],
+ table->record[0],
&dup_key_found);
limit+= dup_key_found;
updated-= dup_key_found;
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 92680e48945..e4259197292 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -356,13 +356,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token LEAVES
%token LEAVE_SYM
%token LEFT
+%token LESS_SYM
%token LEVEL_SYM
%token LEX_HOSTNAME
%token LIKE
%token LIMIT
+%token LINEAR_SYM
%token LINEFROMTEXT
%token LINES
%token LINESTRING
+%token LIST_SYM
%token LOAD
%token LOCAL_SYM
%token LOCATE
@@ -402,6 +405,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token MAX_SYM
%token MAX_UPDATES_PER_HOUR
%token MAX_USER_CONNECTIONS_SYM
+%token MAX_VALUE_SYM
%token MEDIUMBLOB
%token MEDIUMINT
%token MEDIUMTEXT
@@ -436,6 +440,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token NE
%token NEW_SYM
%token NEXT_SYM
+%token NODEGROUP_SYM
%token NONE_SYM
%token NOT2_SYM
%token NOT_SYM
@@ -464,6 +469,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token OUT_SYM
%token PACK_KEYS_SYM
%token PARTIAL
+%token PARTITION_SYM
+%token PARTITIONS_SYM
%token PASSWORD
%token PARAM_MARKER
%token PHASE_SYM
@@ -490,6 +497,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token RAID_STRIPED_SYM
%token RAID_TYPE
%token RAND
+%token RANGE_SYM
%token READS_SYM
%token READ_SYM
%token REAL
@@ -575,6 +583,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token STRING_SYM
%token SUBDATE_SYM
%token SUBJECT_SYM
+%token SUBPARTITION_SYM
+%token SUBPARTITIONS_SYM
%token SUBSTRING
%token SUBSTRING_INDEX
%token SUM_SYM
@@ -595,6 +605,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token TINYBLOB
%token TINYINT
%token TINYTEXT
+%token THAN_SYM
%token TO_SYM
%token TRAILING
%token TRANSACTION_SYM
@@ -618,11 +629,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token UNIX_TIMESTAMP
%token UNKNOWN_SYM
%token UNLOCK_SYM
-%token UNLOCK_SYM
%token UNSIGNED
%token UNTIL_SYM
-%token UNTIL_SYM
-%token UPDATE_SYM
%token UPDATE_SYM
%token USAGE
%token USER
@@ -723,6 +731,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
sp_opt_default
simple_ident_nospvar simple_ident_q
field_or_var limit_option
+ part_bit_expr part_func_expr
%type <item_num>
NUM_literal
@@ -821,6 +830,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
statement sp_suid opt_view_list view_list or_replace algorithm
sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic xa
load_data opt_field_or_var_spec fields_or_vars opt_load_data_set_spec
+ partition_entry
END_OF_INPUT
%type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt
@@ -886,6 +896,7 @@ statement:
| lock
| optimize
| keycache
+ | partition_entry
| preload
| prepare
| purge
@@ -2538,7 +2549,9 @@ trg_event:
create2:
'(' create2a {}
- | opt_create_table_options create3 {}
+ | opt_create_table_options
+ opt_partitioning {}
+ create3 {}
| LIKE table_ident
{
LEX *lex=Lex;
@@ -2554,8 +2567,12 @@ create2:
;
create2a:
- field_list ')' opt_create_table_options create3 {}
- | create_select ')' { Select->set_braces(1);} union_opt {}
+ field_list ')' opt_create_table_options
+ opt_partitioning {}
+ create3 {}
+ | opt_partitioning {}
+ create_select ')'
+ { Select->set_braces(1);} union_opt {}
;
create3:
@@ -2566,6 +2583,411 @@ create3:
{ Select->set_braces(1);} union_opt {}
;
+/*
+ This part of the parser is about handling of the partition information.
+
+ It's first version was written by Mikael Ronström with lots of answers to
+ questions provided by Antony Curtis.
+
+ The partition grammar can be called from three places.
+ 1) CREATE TABLE ... PARTITION ..
+ 2) ALTER TABLE table_name PARTITION ...
+ 3) PARTITION ...
+
+ The first place is called when a new table is created from a MySQL client.
+ The second place is called when a table is altered with the ALTER TABLE
+ command from a MySQL client.
+ The third place is called when opening an frm file and finding partition
+ info in the .frm file. It is necessary to avoid allowing PARTITION to be
+ an allowed entry point for SQL client queries. This is arranged by setting
+ some state variables before arriving here.
+
+ To be able to handle errors we will only set error code in this code
+ and handle the error condition in the function calling the parser. This
+ is necessary to ensure we can also handle errors when calling the parser
+ from the openfrm function.
+*/
+opt_partitioning:
+ /* empty */ {}
+ | partitioning
+ ;
+
+partitioning:
+ PARTITION_SYM
+ { Lex->part_info= new partition_info(); }
+ partition
+ ;
+
+partition_entry:
+ PARTITION_SYM
+ {
+ LEX *lex= Lex;
+ if (lex->part_info)
+ {
+ /*
+ We enter here when opening the frm file to translate
+ partition info string into part_info data structure.
+ */
+ lex->part_info= new partition_info();
+ }
+ else
+ {
+ yyerror(ER(ER_PARTITION_ENTRY_ERROR));
+ YYABORT;
+ }
+ }
+ partition {};
+
+partition:
+ BY part_type_def opt_no_parts {} opt_sub_part {} part_defs;
+
+part_type_def:
+ opt_linear KEY_SYM '(' part_field_list ')'
+ {
+ LEX *lex= Lex;
+ lex->part_info->list_of_part_fields= TRUE;
+ lex->part_info->part_type= HASH_PARTITION;
+ }
+ | opt_linear HASH_SYM
+ { Lex->part_info->part_type= HASH_PARTITION; }
+ part_func {}
+ | RANGE_SYM
+ { Lex->part_info->part_type= RANGE_PARTITION; }
+ part_func {}
+ | LIST_SYM
+ { Lex->part_info->part_type= LIST_PARTITION; }
+ part_func {};
+
+opt_linear:
+ /* empty */ {}
+ | LINEAR_SYM
+ { Lex->part_info->linear_hash_ind= TRUE;};
+
+part_field_list:
+ part_field_item {}
+ | part_field_list ',' part_field_item {};
+
+part_field_item:
+ ident
+ {
+ Lex->part_info->part_field_list.push_back($1.str);
+ };
+
+part_func:
+ '(' remember_name part_func_expr remember_end ')'
+ {
+ LEX *lex= Lex;
+ uint expr_len= (uint)($4 - $2) - 1;
+ lex->part_info->list_of_part_fields= FALSE;
+ lex->part_info->part_expr= $3;
+ lex->part_info->part_func_string= $2+1;
+ lex->part_info->part_func_len= expr_len;
+ };
+
+sub_part_func:
+ '(' remember_name part_func_expr remember_end ')'
+ {
+ LEX *lex= Lex;
+ uint expr_len= (uint)($4 - $2) - 1;
+ lex->part_info->list_of_subpart_fields= FALSE;
+ lex->part_info->subpart_expr= $3;
+ lex->part_info->subpart_func_string= $2+1;
+ lex->part_info->subpart_func_len= expr_len;
+ };
+
+
+opt_no_parts:
+ /* empty */ {}
+ | PARTITIONS_SYM ulong_num
+ {
+ uint no_parts= $2;
+ if (no_parts == 0)
+ {
+ my_error(ER_NO_PARTS_ERROR, MYF(0), "partitions");
+ YYABORT;
+ }
+ Lex->part_info->no_parts= no_parts;
+ };
+
+opt_sub_part:
+ /* empty */ {}
+ | SUBPARTITION_SYM BY opt_linear HASH_SYM sub_part_func
+ { Lex->part_info->subpart_type= HASH_PARTITION; }
+ opt_no_subparts {}
+ | SUBPARTITION_SYM BY opt_linear KEY_SYM
+ '(' sub_part_field_list ')'
+ {
+ LEX *lex= Lex;
+ lex->part_info->subpart_type= HASH_PARTITION;
+ lex->part_info->list_of_subpart_fields= TRUE;
+ }
+ opt_no_subparts {};
+
+sub_part_field_list:
+ sub_part_field_item {}
+ | sub_part_field_list ',' sub_part_field_item {};
+
+sub_part_field_item:
+ ident
+ { Lex->part_info->subpart_field_list.push_back($1.str); };
+
+part_func_expr:
+ bit_expr
+ {
+ LEX *lex= Lex;
+ bool not_corr_func;
+ not_corr_func= !lex->safe_to_cache_query;
+ lex->safe_to_cache_query= 1;
+ if (not_corr_func)
+ {
+ yyerror(ER(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR));
+ YYABORT;
+ }
+ $$=$1;
+ }
+
+opt_no_subparts:
+ /* empty */ {}
+ | SUBPARTITIONS_SYM ulong_num
+ {
+ uint no_parts= $2;
+ if (no_parts == 0)
+ {
+ my_error(ER_NO_PARTS_ERROR, MYF(0), "subpartitions");
+ YYABORT;
+ }
+ Lex->part_info->no_subparts= no_parts;
+ };
+
+part_defs:
+ /* empty */
+ {}
+ | '(' part_def_list ')'
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ if (part_info->no_parts != 0)
+ {
+ if (part_info->no_parts !=
+ part_info->count_curr_parts)
+ {
+ yyerror(ER(ER_PARTITION_WRONG_NO_PART_ERROR));
+ YYABORT;
+ }
+ }
+ else if (part_info->count_curr_parts > 0)
+ {
+ part_info->no_parts= part_info->count_curr_parts;
+ }
+ part_info->count_curr_subparts= 0;
+ part_info->count_curr_parts= 0;
+ };
+
+part_def_list:
+ part_definition {}
+ | part_def_list ',' part_definition {};
+
+part_definition:
+ PARTITION_SYM
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ partition_element *p_elem= new partition_element();
+ if (!p_elem)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ YYABORT;
+ }
+ part_info->curr_part_elem= p_elem;
+ part_info->current_partition= p_elem;
+ part_info->use_default_partitions= FALSE;
+ part_info->partitions.push_back(p_elem);
+ p_elem->engine_type= DB_TYPE_UNKNOWN;
+ part_info->count_curr_parts++;
+ }
+ part_name {}
+ opt_part_values {}
+ opt_part_options {}
+ opt_sub_partition {};
+
+part_name:
+ ident_or_text
+ { Lex->part_info->curr_part_elem->partition_name= $1.str; };
+
+opt_part_values:
+ /* empty */
+ {
+ LEX *lex= Lex;
+ if (lex->part_info->part_type == RANGE_PARTITION)
+ {
+ my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0),
+ "RANGE", "LESS THAN");
+ YYABORT;
+ }
+ if (lex->part_info->part_type == LIST_PARTITION)
+ {
+ my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0),
+ "LIST", "IN");
+ YYABORT;
+ }
+ }
+ | VALUES LESS_SYM THAN_SYM part_func_max
+ {
+ if (Lex->part_info->part_type != RANGE_PARTITION)
+ {
+ my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0),
+ "RANGE", "LESS THAN");
+ YYABORT;
+ }
+ }
+ | VALUES IN_SYM '(' part_list_func ')'
+ {
+ if (Lex->part_info->part_type != LIST_PARTITION)
+ {
+ my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0),
+ "LIST", "IN");
+ YYABORT;
+ }
+ };
+
+part_func_max:
+ MAX_VALUE_SYM
+ {
+ LEX *lex= Lex;
+ if (lex->part_info->defined_max_value)
+ {
+ yyerror(ER(ER_PARTITION_MAXVALUE_ERROR));
+ YYABORT;
+ }
+ lex->part_info->defined_max_value= TRUE;
+ }
+ | part_range_func
+ {
+ if (Lex->part_info->defined_max_value)
+ {
+ yyerror(ER(ER_PARTITION_MAXVALUE_ERROR));
+ YYABORT;
+ }
+ };
+
+part_range_func:
+ '(' part_bit_expr ')'
+ {
+ Lex->part_info->curr_part_elem->range_expr= $2;
+ };
+
+part_list_func:
+ part_list_item {}
+ | part_list_func ',' part_list_item {};
+
+part_list_item:
+ part_bit_expr
+ {
+ Lex->part_info->curr_part_elem->list_expr_list.push_back($1);
+ };
+
+part_bit_expr:
+ bit_expr
+ {
+ Item *part_expr= $1;
+ bool not_corr_func;
+ LEX *lex= Lex;
+ Name_resolution_context *context= &lex->current_select->context;
+ TABLE_LIST *save_list= context->table_list;
+
+ context->table_list= 0;
+ part_expr->fix_fields(YYTHD, (Item**)0);
+ context->table_list= save_list;
+ not_corr_func= !part_expr->const_item() ||
+ !lex->safe_to_cache_query;
+ lex->safe_to_cache_query= 1;
+ if (not_corr_func)
+ {
+ yyerror(ER(ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR));
+ YYABORT;
+ }
+ $$= part_expr;
+ }
+
+opt_sub_partition:
+ /* empty */ {}
+ | '(' sub_part_list ')'
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ if (part_info->no_subparts != 0)
+ {
+ if (part_info->no_subparts !=
+ part_info->count_curr_subparts)
+ {
+ yyerror(ER(ER_PARTITION_WRONG_NO_SUBPART_ERROR));
+ YYABORT;
+ }
+ }
+ else if (part_info->count_curr_subparts > 0)
+ {
+ part_info->no_subparts= part_info->count_curr_subparts;
+ }
+ part_info->count_curr_subparts= 0;
+ };
+
+sub_part_list:
+ sub_part_definition {}
+ | sub_part_list ',' sub_part_definition {};
+
+sub_part_definition:
+ SUBPARTITION_SYM
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ partition_element *p_elem= new partition_element();
+ if (!p_elem)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ YYABORT;
+ }
+ part_info->curr_part_elem= p_elem;
+ part_info->current_partition->subpartitions.push_back(p_elem);
+ part_info->use_default_subpartitions= FALSE;
+ part_info->count_curr_subparts++;
+ p_elem->engine_type= DB_TYPE_UNKNOWN;
+ }
+ sub_name opt_part_options {};
+
+sub_name:
+ ident_or_text
+ { Lex->part_info->curr_part_elem->partition_name= $1.str; };
+
+opt_part_options:
+ /* empty */ {}
+ | opt_part_option_list {};
+
+opt_part_option_list:
+ opt_part_option_list opt_part_option {}
+ | opt_part_option {};
+
+opt_part_option:
+ TABLESPACE opt_equal ident_or_text
+ { Lex->part_info->curr_part_elem->tablespace_name= $3.str; }
+ | opt_storage ENGINE_SYM opt_equal storage_engines
+ { Lex->part_info->curr_part_elem->engine_type= $4; }
+ | NODEGROUP_SYM opt_equal ulong_num
+ { Lex->part_info->curr_part_elem->nodegroup_id= $3; }
+ | MAX_ROWS opt_equal ulonglong_num
+ { Lex->part_info->curr_part_elem->part_max_rows= $3; }
+ | MIN_ROWS opt_equal ulonglong_num
+ { Lex->part_info->curr_part_elem->part_min_rows= $3; }
+ | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys
+ { Lex->part_info->curr_part_elem->data_file_name= $4.str; }
+ | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys
+ { Lex->part_info->curr_part_elem->index_file_name= $4.str; }
+ | COMMENT_SYM opt_equal TEXT_STRING_sys
+ { Lex->part_info->curr_part_elem->part_comment= $3.str; };
+
+/*
+ End of partition parser part
+*/
+
create_select:
SELECT_SYM
{
@@ -3338,7 +3760,7 @@ alter:
lex->alter_info.reset();
lex->alter_info.flags= 0;
}
- alter_list
+ alter_commands
{}
| ALTER DATABASE ident_or_empty
{
@@ -3404,11 +3826,18 @@ ident_or_empty:
/* empty */ { $$= 0; }
| ident { $$= $1.str; };
-alter_list:
+alter_commands:
| DISCARD TABLESPACE { Lex->alter_info.tablespace_op= DISCARD_TABLESPACE; }
| IMPORT TABLESPACE { Lex->alter_info.tablespace_op= IMPORT_TABLESPACE; }
- | alter_list_item
- | alter_list ',' alter_list_item;
+ | alter_list
+ opt_partitioning
+ | partitioning
+ ;
+
+alter_list:
+ alter_list_item
+ | alter_list ',' alter_list_item
+ ;
add_column:
ADD opt_column
@@ -7363,6 +7792,7 @@ keyword:
| LANGUAGE_SYM {}
| NO_SYM {}
| OPEN_SYM {}
+ | PARTITION_SYM {}
| PREPARE_SYM {}
| REPAIR {}
| RESET_SYM {}
@@ -7463,8 +7893,10 @@ keyword_sp:
| RELAY_THREAD {}
| LAST_SYM {}
| LEAVES {}
+ | LESS_SYM {}
| LEVEL_SYM {}
| LINESTRING {}
+ | LIST_SYM {}
| LOCAL_SYM {}
| LOCKS_SYM {}
| LOGS_SYM {}
@@ -7488,6 +7920,7 @@ keyword_sp:
| MAX_QUERIES_PER_HOUR {}
| MAX_UPDATES_PER_HOUR {}
| MAX_USER_CONNECTIONS_SYM {}
+ | MAX_VALUE_SYM {}
| MEDIUM_SYM {}
| MERGE_SYM {}
| MICROSECOND_SYM {}
@@ -7508,6 +7941,7 @@ keyword_sp:
| NDBCLUSTER_SYM {}
| NEXT_SYM {}
| NEW_SYM {}
+ | NODEGROUP_SYM {}
| NONE_SYM {}
| NVARCHAR_SYM {}
| OFFSET_SYM {}
@@ -7516,6 +7950,7 @@ keyword_sp:
| ONE_SYM {}
| PACK_KEYS_SYM {}
| PARTIAL {}
+ | PARTITIONS_SYM {}
| PASSWORD {}
| PHASE_SYM {}
| POINT_SYM {}
@@ -7566,6 +8001,8 @@ keyword_sp:
| STRING_SYM {}
| SUBDATE_SYM {}
| SUBJECT_SYM {}
+ | SUBPARTITION_SYM {}
+ | SUBPARTITIONS_SYM {}
| SUPER_SYM {}
| SUSPEND_SYM {}
| TABLES {}
@@ -7573,6 +8010,7 @@ keyword_sp:
| TEMPORARY {}
| TEMPTABLE_SYM {}
| TEXT_SYM {}
+ | THAN_SYM {}
| TRANSACTION_SYM {}
| TIMESTAMP {}
| TIMESTAMP_ADD {}
diff --git a/sql/table.cc b/sql/table.cc
index bbf34ae2c25..8852e1fa9dd 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -70,7 +70,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
int j,error, errarg= 0;
uint rec_buff_length,n_length,int_length,records,key_parts,keys,
interval_count,interval_parts,read_length,db_create_options;
- uint key_info_length, com_length;
+ uint key_info_length, com_length, part_info_len, extra_rec_buf_length;
ulong pos;
char index_file[FN_REFLEN], *names, *keynames, *comment_pos;
uchar head[288],*disk_buff,new_field_pack_flag;
@@ -153,6 +153,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
goto err; /* purecov: inspected */
*fn_ext(index_file)='\0'; // Remove .frm extension
+ part_info_len= uint4korr(head+55);
share->frm_version= head[2];
/*
Check if .frm file created by MySQL 5.0. In this case we want to
@@ -300,10 +301,6 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
}
#endif
- /* Allocate handler */
- if (!(outparam->file= get_new_handler(outparam, share->db_type)))
- goto err;
-
error=4;
outparam->reginfo.lock_type= TL_UNLOCK;
outparam->current_lock=F_UNLCK;
@@ -314,8 +311,9 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
if (prgflag & (READ_ALL+EXTRA_RECORD))
records++;
/* QQ: TODO, remove the +1 from below */
+ extra_rec_buf_length= uint2korr(head+59);
rec_buff_length= ALIGN_SIZE(share->reclength + 1 +
- outparam->file->extra_rec_buf_length());
+ extra_rec_buf_length);
share->rec_buff_length= rec_buff_length;
if (!(record= (char *) alloc_root(&outparam->mem_root,
rec_buff_length * records)))
@@ -435,9 +433,22 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
if (keynames)
fix_type_pointers(&int_array, &share->keynames, 1, &keynames);
+ if (part_info_len > 0)
+ {
+#ifdef HAVE_PARTITION_DB
+ if (mysql_unpack_partition(file, thd, part_info_len, outparam))
+ goto err;
+#else
+ goto err;
+#endif
+ }
VOID(my_close(file,MYF(MY_WME)));
file= -1;
+ /* Allocate handler */
+ if (!(outparam->file= get_new_handler(outparam, share->db_type)))
+ goto err;
+
record= (char*) outparam->record[0]-1; /* Fieldstart = 1 */
if (null_field_first)
{
@@ -859,6 +870,13 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
if (outparam->file->ha_allocate_read_write_set(share->fields))
goto err;
+ /* Fix the partition functions and ensure they are not constant functions*/
+ if (part_info_len > 0)
+#ifdef HAVE_PARTITION_DB
+ if (fix_partition_func(thd,name,outparam))
+#endif
+ goto err;
+
/* The table struct is now initialized; Open the table */
error=2;
if (db_stat)
@@ -916,6 +934,13 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
if (! error_reported)
frm_error(error,outparam,name,ME_ERROR+ME_WAITTANG, errarg);
delete outparam->file;
+#ifdef HAVE_PARTITION_DB
+ if (outparam->s->part_info)
+ {
+ free_items(outparam->s->part_info->item_free_list);
+ outparam->s->part_info->item_free_list= 0;
+ }
+#endif
outparam->file=0; // For easier errorchecking
outparam->db_stat=0;
hash_free(&share->name_hash);
@@ -942,6 +967,13 @@ int closefrm(register TABLE *table)
table->field= 0;
}
delete table->file;
+#ifdef HAVE_PARTITION_DB
+ if (table->s->part_info)
+ {
+ free_items(table->s->part_info->item_free_list);
+ table->s->part_info->item_free_list= 0;
+ }
+#endif
table->file= 0; /* For easier errorchecking */
hash_free(&table->s->name_hash);
free_root(&table->mem_root, MYF(0));
diff --git a/sql/table.h b/sql/table.h
index e5653a1f213..6ba9453b2f0 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -21,6 +21,7 @@ class Item; /* Needed by ORDER */
class GRANT_TABLE;
class st_select_lex_unit;
class st_select_lex;
+class partition_info;
class COND_EQUAL;
/* Order clause list element */
@@ -96,6 +97,9 @@ class Table_triggers_list;
typedef struct st_table_share
{
+#ifdef HAVE_PARTITION_DB
+ partition_info *part_info; /* Partition related information */
+#endif
/* hash of field names (contains pointers to elements of field array) */
HASH name_hash; /* hash of field names */
MEM_ROOT mem_root;
@@ -203,6 +207,8 @@ struct st_table {
ORDER *group;
const char *alias; /* alias or table name */
uchar *null_flags;
+ MY_BITMAP *read_set;
+ MY_BITMAP *write_set;
query_id_t query_id;
ha_rows quick_rows[MAX_KEY];
@@ -256,6 +262,7 @@ struct st_table {
my_bool auto_increment_field_not_null;
my_bool insert_or_update; /* Can be used by the handler */
my_bool alias_name_used; /* true if table_name is alias */
+ my_bool get_fields_in_item_tree; /* Signal to fix_field */
REGINFO reginfo; /* field connections */
MEM_ROOT mem_root;
diff --git a/sql/tztime.cc b/sql/tztime.cc
index f5111459da2..bb516731440 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -1623,7 +1623,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap)
mysql.time_zone* tables are MyISAM and these operations always succeed
for MyISAM.
*/
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
tz_leapcnt= 0;
res= table->file->index_first(table->record[0]);
@@ -1800,7 +1800,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
mysql.time_zone* tables are MyISAM and these operations always succeed
for MyISAM.
*/
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
0, HA_READ_KEY_EXACT))
@@ -1827,7 +1827,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
table= tz_tables->table;
tz_tables= tz_tables->next_local;
table->field[0]->store((longlong)tzid);
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
0, HA_READ_KEY_EXACT))
@@ -1854,7 +1854,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
table= tz_tables->table;
tz_tables= tz_tables->next_local;
table->field[0]->store((longlong)tzid);
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
// FIXME Is there any better approach than explicitly specifying 4 ???
res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
@@ -1926,7 +1926,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
*/
table= tz_tables->table;
table->field[0]->store((longlong)tzid);
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
// FIXME Is there any better approach than explicitly specifying 4 ???
res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
diff --git a/sql/unireg.cc b/sql/unireg.cc
index 2ea79d92e37..cdbae4f1eb9 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -46,7 +46,8 @@ static bool pack_fields(File file, List<create_field> &create_fields,
static bool make_empty_rec(THD *thd, int file, enum db_type table_type,
uint table_options,
List<create_field> &create_fields,
- uint reclength, ulong data_offset);
+ uint reclength, ulong data_offset,
+ handler *handler);
/*
Create a frm (table definition) file
@@ -79,13 +80,18 @@ bool mysql_create_frm(THD *thd, my_string file_name,
uchar fileinfo[64],forminfo[288],*keybuff;
TYPELIB formnames;
uchar *screen_buff;
+#ifdef HAVE_PARTITION_DB
+ partition_info *part_info= thd->lex->part_info;
+#endif
DBUG_ENTER("mysql_create_frm");
+#ifdef HAVE_PARTITION_DB
+ thd->lex->part_info= NULL;
+#endif
formnames.type_names=0;
if (!(screen_buff=pack_screens(create_fields,&info_length,&screens,0)))
DBUG_RETURN(1);
- if (db_file == NULL)
- db_file= get_new_handler((TABLE*) 0, create_info->db_type);
+ DBUG_ASSERT(db_file != NULL);
/* If fixed row records, we need one bit to check for deleted rows */
if (!(create_info->table_options & HA_OPTION_PACK_RECORD))
@@ -136,6 +142,13 @@ bool mysql_create_frm(THD *thd, my_string file_name,
60);
forminfo[46]=(uchar) strlen((char*)forminfo+47); // Length of comment
+#ifdef HAVE_PARTITION_DB
+ if (part_info)
+ {
+ int4store(fileinfo+55,part_info->part_info_len);
+ }
+#endif
+ int2store(fileinfo+59,db_file->extra_rec_buf_length());
if (my_pwrite(file,(byte*) fileinfo,64,0L,MYF_RW) ||
my_pwrite(file,(byte*) keybuff,key_info_length,
(ulong) uint2korr(fileinfo+6),MYF_RW))
@@ -144,7 +157,7 @@ bool mysql_create_frm(THD *thd, my_string file_name,
(ulong) uint2korr(fileinfo+6)+ (ulong) key_buff_length,
MY_SEEK_SET,MYF(0)));
if (make_empty_rec(thd,file,create_info->db_type,create_info->table_options,
- create_fields,reclength, data_offset))
+ create_fields,reclength, data_offset, db_file))
goto err;
VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
@@ -153,6 +166,14 @@ bool mysql_create_frm(THD *thd, my_string file_name,
pack_fields(file, create_fields, data_offset))
goto err;
+#ifdef HAVE_PARTITION_DB
+ if (part_info)
+ {
+ if (my_write(file, (byte*) part_info->part_info_string,
+ part_info->part_info_len, MYF_RW))
+ goto err;
+ }
+#endif
#ifdef HAVE_CRYPTED_FRM
if (create_info->password)
{
@@ -211,15 +232,14 @@ err3:
Create a frm (table definition) file and the tables
SYNOPSIS
- mysql_create_frm()
+ rea_create_table()
thd Thread handler
file_name Name of file (including database and .frm)
create_info create info parameters
create_fields Fields to create
keys number of keys to create
key_info Keys to create
- db_file Handler to use. May be zero, in which case we use
- create_info->db_type
+ file Handler to use.
RETURN
0 ok
1 error
@@ -228,19 +248,21 @@ err3:
int rea_create_table(THD *thd, my_string file_name,
HA_CREATE_INFO *create_info,
List<create_field> &create_fields,
- uint keys, KEY *key_info)
+ uint keys, KEY *key_info, handler *file)
{
DBUG_ENTER("rea_create_table");
if (mysql_create_frm(thd, file_name, create_info,
- create_fields, keys, key_info, NULL))
+ create_fields, keys, key_info, file))
DBUG_RETURN(1);
+ if (file->create_handler_files(file_name))
+ goto err_handler;
if (!create_info->frm_only && ha_create_table(file_name,create_info,0))
- {
- my_delete(file_name,MYF(0));
- DBUG_RETURN(1);
- }
+ goto err_handler;
DBUG_RETURN(0);
+err_handler:
+ my_delete(file_name, MYF(0));
+ DBUG_RETURN(1);
} /* rea_create_table */
@@ -664,7 +686,8 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type,
uint table_options,
List<create_field> &create_fields,
uint reclength,
- ulong data_offset)
+ ulong data_offset,
+ handler *handler)
{
int error;
Field::utype type;
@@ -672,19 +695,15 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type,
uchar *buff,*null_pos;
TABLE table;
create_field *field;
- handler *handler;
enum_check_fields old_count_cuted_fields= thd->count_cuted_fields;
DBUG_ENTER("make_empty_rec");
/* We need a table to generate columns for default values */
bzero((char*) &table,sizeof(table));
table.s= &table.share_not_to_be_used;
- handler= get_new_handler((TABLE*) 0, table_type);
- if (!handler ||
- !(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL))))
+ if (!(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL))))
{
- delete handler;
DBUG_RETURN(1);
}
@@ -771,7 +790,6 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type,
err:
my_free((gptr) buff,MYF(MY_FAE));
- delete handler;
thd->count_cuted_fields= old_count_cuted_fields;
DBUG_RETURN(error);
} /* make_empty_rec */
diff --git a/sql/unireg.h b/sql/unireg.h
index 8d88683241b..aafb96ef7c3 100644
--- a/sql/unireg.h
+++ b/sql/unireg.h
@@ -80,6 +80,7 @@
#define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \
RAND_TABLE_BIT)
#define MAX_FIELDS 4096 /* Limit in the .frm file */
+#define MAX_PARTITIONS 1024
#define MAX_SORT_MEMORY (2048*1024-MALLOC_OVERHEAD)
#define MIN_SORT_MEMORY (32*1024-MALLOC_OVERHEAD)