summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/archive/ha_archive.cc6
-rw-r--r--storage/federatedx/ha_federatedx.cc3
-rw-r--r--storage/heap/ha_heap.cc46
-rw-r--r--storage/heap/ha_heap.h4
-rw-r--r--storage/heap/hp_clear.c2
-rw-r--r--storage/heap/hp_create.c18
-rw-r--r--storage/heap/hp_delete.c1
-rw-r--r--storage/heap/hp_hash.c58
-rw-r--r--storage/heap/hp_rfirst.c16
-rw-r--r--storage/heap/hp_rkey.c5
-rw-r--r--storage/heap/hp_rlast.c8
-rw-r--r--storage/heap/hp_rnext.c16
-rw-r--r--storage/heap/hp_rprev.c16
-rw-r--r--storage/heap/hp_rsame.c2
-rw-r--r--storage/heap/hp_scan.c2
-rw-r--r--storage/heap/hp_test2.c8
-rw-r--r--storage/heap/hp_update.c4
-rw-r--r--storage/heap/hp_write.c1
-rw-r--r--storage/innobase/handler/ha_innodb.cc139
-rw-r--r--storage/innobase/handler/ha_innodb.h4
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.cc421
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.h15
-rw-r--r--storage/innodb_plugin/handler/i_s.cc2
-rw-r--r--storage/innodb_plugin/include/trx0trx.h11
-rw-r--r--storage/innodb_plugin/trx/trx0trx.c2
-rw-r--r--storage/maria/CMakeLists.txt3
-rw-r--r--storage/maria/Makefile.am6
-rw-r--r--storage/maria/compat_aliases.cc250
-rw-r--r--storage/maria/compat_aliases.h27
-rw-r--r--storage/maria/ha_maria.cc247
-rw-r--r--storage/maria/ha_maria.h38
-rw-r--r--storage/maria/lockman.c6
-rw-r--r--storage/maria/ma_check.c69
-rw-r--r--storage/maria/ma_check_standalone.h7
-rw-r--r--storage/maria/ma_checkpoint.c88
-rw-r--r--storage/maria/ma_create.c12
-rw-r--r--storage/maria/ma_extra.c18
-rw-r--r--storage/maria/ma_ft_boolean_search.c13
-rw-r--r--storage/maria/ma_ft_nlq_search.c4
-rw-r--r--storage/maria/ma_key.c78
-rw-r--r--storage/maria/ma_loghandler.c29
-rw-r--r--storage/maria/ma_loghandler.h3
-rw-r--r--storage/maria/ma_norec.c66
-rw-r--r--storage/maria/ma_open.c27
-rw-r--r--storage/maria/ma_recovery.c14
-rw-r--r--storage/maria/ma_rkey.c83
-rw-r--r--storage/maria/ma_rnext.c24
-rw-r--r--storage/maria/ma_rnext_same.c20
-rw-r--r--storage/maria/ma_rprev.c26
-rw-r--r--storage/maria/ma_search.c8
-rw-r--r--storage/maria/ma_sort.c31
-rw-r--r--storage/maria/ma_static.c4
-rw-r--r--storage/maria/ma_test1.c19
-rw-r--r--storage/maria/ma_unique.c14
-rw-r--r--storage/maria/ma_write.c21
-rw-r--r--storage/maria/maria_chk.c4
-rw-r--r--storage/maria/maria_def.h33
-rw-r--r--storage/maria/maria_pack.c36
-rw-r--r--storage/maria/tablockman.c2
-rw-r--r--storage/maria/unittest/ma_loghandler_examples.c3
-rwxr-xr-xstorage/maria/unittest/ma_test_all-t1
-rw-r--r--storage/maria/unittest/trnman-t.c12
-rw-r--r--storage/myisam/ft_boolean_search.c14
-rw-r--r--storage/myisam/ft_nlq_search.c4
-rw-r--r--storage/myisam/ha_myisam.cc168
-rw-r--r--storage/myisam/ha_myisam.h31
-rw-r--r--storage/myisam/mi_check.c18
-rw-r--r--storage/myisam/mi_extra.c11
-rw-r--r--storage/myisam/mi_key.c68
-rw-r--r--storage/myisam/mi_locking.c8
-rw-r--r--storage/myisam/mi_open.c35
-rw-r--r--storage/myisam/mi_rkey.c178
-rw-r--r--storage/myisam/mi_rnext.c44
-rw-r--r--storage/myisam/mi_rnext_same.c27
-rw-r--r--storage/myisam/mi_rprev.c47
-rw-r--r--storage/myisam/mi_search.c5
-rw-r--r--storage/myisam/mi_static.c1
-rwxr-xr-xstorage/myisam/mi_test_all.sh1
-rw-r--r--storage/myisam/myisamdef.h18
-rw-r--r--storage/myisam/myisampack.c36
-rw-r--r--storage/myisam/sort.c6
-rw-r--r--storage/myisammrg/myrg_queue.c4
-rw-r--r--storage/myisammrg/myrg_rnext.c4
-rw-r--r--storage/myisammrg/myrg_rnext_same.c4
-rw-r--r--storage/myisammrg/myrg_rprev.c4
-rw-r--r--storage/pbxt/src/filesys_xt.cc3
-rw-r--r--storage/pbxt/src/ha_pbxt.cc39
-rwxr-xr-xstorage/pbxt/src/pthread_xt.cc43
-rw-r--r--storage/pbxt/src/thread_xt.cc16
-rw-r--r--storage/pbxt/src/thread_xt.h3
-rw-r--r--storage/pbxt/src/xaction_xt.cc149
-rw-r--r--storage/pbxt/src/xaction_xt.h2
-rw-r--r--storage/xtradb/CMakeLists.txt7
-rw-r--r--storage/xtradb/buf/buf0buf.c2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc535
-rw-r--r--storage/xtradb/handler/ha_innodb.h37
-rw-r--r--storage/xtradb/handler/i_s.cc2
-rw-r--r--storage/xtradb/include/db0err.h3
-rw-r--r--storage/xtradb/include/log0log.h17
-rw-r--r--storage/xtradb/include/os0file.h8
-rw-r--r--storage/xtradb/include/os0sync.h71
-rw-r--r--storage/xtradb/include/os0sync.ic9
-rw-r--r--storage/xtradb/include/row0mysql.h19
-rw-r--r--storage/xtradb/include/srv0srv.h6
-rw-r--r--storage/xtradb/include/sync0sync.h2
-rw-r--r--storage/xtradb/include/trx0trx.h11
-rw-r--r--storage/xtradb/log/log0log.c97
-rw-r--r--storage/xtradb/os/os0file.c498
-rw-r--r--storage/xtradb/os/os0sync.c559
-rw-r--r--storage/xtradb/row/row0sel.c121
-rw-r--r--storage/xtradb/srv/srv0srv.c20
-rw-r--r--storage/xtradb/srv/srv0start.c31
-rw-r--r--storage/xtradb/trx/trx0trx.c2
113 files changed, 3404 insertions, 1800 deletions
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 730d5b95abb..f7efaf4566f 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -684,11 +684,11 @@ int ha_archive::create(const char *name, TABLE *table_arg,
{
if (!my_fstat(frm_file, &file_stat, MYF(MY_WME)))
{
- frm_ptr= (uchar *)my_malloc(sizeof(uchar) * file_stat.st_size, MYF(0));
+ frm_ptr= (uchar *)my_malloc(sizeof(uchar) * (size_t)file_stat.st_size, MYF(0));
if (frm_ptr)
{
- my_read(frm_file, frm_ptr, file_stat.st_size, MYF(0));
- azwrite_frm(&create_stream, (char *)frm_ptr, file_stat.st_size);
+ my_read(frm_file, frm_ptr, (size_t)file_stat.st_size, MYF(0));
+ azwrite_frm(&create_stream, (char *)frm_ptr, (size_t)file_stat.st_size);
my_free((uchar*)frm_ptr, MYF(0));
}
}
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index d97588dcc3d..be90a1d1a08 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -1453,7 +1453,8 @@ static void fill_server(MEM_ROOT *mem_root, FEDERATEDX_SERVER *server,
key.q_append('\0');
server->password= (const char *) (intptr) key.length();
key.append(password);
-
+ key.c_ptr_safe(); // Ensure we have end \0
+
server->key_length= key.length();
/* Copy and add end \0 */
server->key= (uchar *) strmake_root(mem_root, key.ptr(), key.length());
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 7757d0a6210..ae45e85c1b3 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -183,6 +183,19 @@ void ha_heap::set_keys_for_scanning(void)
}
+int ha_heap::can_continue_handler_scan()
+{
+ int error= 0;
+ if ((file->key_version != file->s->key_version && inited == INDEX) ||
+ (file->file_version != file->s->file_version && inited == RND))
+ {
+ /* Data changed, not safe to do index or rnd scan */
+ error= HA_ERR_RECORD_CHANGED;
+ }
+ return error;
+}
+
+
void ha_heap::update_key_stats()
{
for (uint i= 0; i < table->s->keys; i++)
@@ -213,7 +226,6 @@ void ha_heap::update_key_stats()
int ha_heap::write_row(uchar * buf)
{
int res;
- ha_statistic_increment(&SSV::ha_write_count);
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
table->timestamp_field->set_time();
if (table->next_number_field && buf == table->record[0])
@@ -237,7 +249,6 @@ int ha_heap::write_row(uchar * buf)
int ha_heap::update_row(const uchar * old_data, uchar * new_data)
{
int res;
- ha_statistic_increment(&SSV::ha_update_count);
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
table->timestamp_field->set_time();
res= heap_update(file,old_data,new_data);
@@ -256,7 +267,6 @@ int ha_heap::update_row(const uchar * old_data, uchar * new_data)
int ha_heap::delete_row(const uchar * buf)
{
int res;
- ha_statistic_increment(&SSV::ha_delete_count);
res= heap_delete(file,buf);
if (!res && table->s->tmp_table == NO_TMP_TABLE &&
++records_changed*HEAP_STATS_UPDATE_THRESHOLD > file->s->records)
@@ -275,7 +285,6 @@ int ha_heap::index_read_map(uchar *buf, const uchar *key,
enum ha_rkey_function find_flag)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_key_count);
int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag);
table->status = error ? STATUS_NOT_FOUND : 0;
return error;
@@ -285,7 +294,6 @@ int ha_heap::index_read_last_map(uchar *buf, const uchar *key,
key_part_map keypart_map)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_key_count);
int error= heap_rkey(file, buf, active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status= error ? STATUS_NOT_FOUND : 0;
@@ -296,7 +304,6 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
- ha_statistic_increment(&SSV::ha_read_key_count);
int error = heap_rkey(file, buf, index, key, keypart_map, find_flag);
table->status = error ? STATUS_NOT_FOUND : 0;
return error;
@@ -305,7 +312,6 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key,
int ha_heap::index_next(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_next_count);
int error=heap_rnext(file,buf);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -314,7 +320,6 @@ int ha_heap::index_next(uchar * buf)
int ha_heap::index_prev(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_prev_count);
int error=heap_rprev(file,buf);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -323,7 +328,6 @@ int ha_heap::index_prev(uchar * buf)
int ha_heap::index_first(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_first_count);
int error=heap_rfirst(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -332,7 +336,6 @@ int ha_heap::index_first(uchar * buf)
int ha_heap::index_last(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_last_count);
int error=heap_rlast(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -345,7 +348,6 @@ int ha_heap::rnd_init(bool scan)
int ha_heap::rnd_next(uchar *buf)
{
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error=heap_scan(file, buf);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -355,7 +357,6 @@ int ha_heap::rnd_pos(uchar * buf, uchar *pos)
{
int error;
HEAP_PTR heap_position;
- ha_statistic_increment(&SSV::ha_read_rnd_count);
memcpy_fixed((char*) &heap_position, pos, sizeof(HEAP_PTR));
error=heap_rrnd(file, buf, heap_position);
table->status=error ? STATUS_NOT_FOUND: 0;
@@ -370,6 +371,10 @@ void ha_heap::position(const uchar *record)
int ha_heap::info(uint flag)
{
HEAPINFO hp_info;
+
+ if (!table)
+ return 1;
+
(void) heap_info(file,&hp_info,flag);
errkey= hp_info.errkey;
@@ -565,7 +570,7 @@ int ha_heap::delete_table(const char *name)
void ha_heap::drop_table(const char *name)
{
file->s->delete_on_close= 1;
- close();
+ ha_close();
}
@@ -654,7 +659,8 @@ int ha_heap::create(const char *name, TABLE *table_arg,
seg->type != HA_KEYTYPE_VARTEXT1 &&
seg->type != HA_KEYTYPE_VARTEXT2 &&
seg->type != HA_KEYTYPE_VARBINARY1 &&
- seg->type != HA_KEYTYPE_VARBINARY2)
+ seg->type != HA_KEYTYPE_VARBINARY2 &&
+ seg->type != HA_KEYTYPE_BIT)
seg->type= HA_KEYTYPE_BINARY;
}
seg->start= (uint) key_part->offset;
@@ -686,6 +692,18 @@ int ha_heap::create(const char *name, TABLE *table_arg,
auto_key= key+ 1;
auto_key_type= field->key_type();
}
+ if (seg->type == HA_KEYTYPE_BIT)
+ {
+ seg->bit_length= ((Field_bit *) field)->bit_len;
+ seg->bit_start= ((Field_bit *) field)->bit_ofs;
+ seg->bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
+ (uchar*) table_arg->record[0]);
+ }
+ else
+ {
+ seg->bit_length= seg->bit_start= 0;
+ seg->bit_pos= 0;
+ }
}
}
mem_per_row+= MY_ALIGN(share->reclength + 1, sizeof(char*));
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index 69751101645..799e52d903b 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2006 MySQL AB
+/* Copyright (C) 2000-2006 MySQL AB, 2009-2011 Monty Program Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -52,6 +52,7 @@ public:
{
return (HA_FAST_KEY_READ | HA_NO_BLOBS | HA_NULL_IN_KEY |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
+ HA_CAN_SQL_HANDLER |
HA_REC_NOT_IN_SEQ | HA_CAN_INSERT_DELAYED | HA_NO_TRANSACTIONS |
HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT);
}
@@ -93,6 +94,7 @@ public:
int rnd_next(uchar *buf);
int rnd_pos(uchar * buf, uchar *pos);
void position(const uchar *record);
+ int can_continue_handler_scan();
int info(uint);
int extra(enum ha_extra_function operation);
int reset();
diff --git a/storage/heap/hp_clear.c b/storage/heap/hp_clear.c
index babfcbd6f41..b999f1ff5e8 100644
--- a/storage/heap/hp_clear.c
+++ b/storage/heap/hp_clear.c
@@ -40,6 +40,8 @@ void hp_clear(HP_SHARE *info)
info->blength=1;
info->changed=0;
info->del_link=0;
+ info->key_version++;
+ info->file_version++;
DBUG_VOID_RETURN;
}
diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c
index bc8183bf777..f9d0a48324d 100644
--- a/storage/heap/hp_create.c
+++ b/storage/heap/hp_create.c
@@ -39,6 +39,10 @@ int heap_create(const char *name, uint keys, HP_KEYDEF *keydef,
share= 0;
}
}
+ else
+ {
+ DBUG_PRINT("info", ("Creating internal (no named) temporary table"));
+ }
if (!share)
{
@@ -104,6 +108,14 @@ int heap_create(const char *name, uint keys, HP_KEYDEF *keydef,
*/
keyinfo->seg[j].type= HA_KEYTYPE_VARTEXT1;
break;
+ case HA_KEYTYPE_BIT:
+ /*
+ The odd bits which stored separately (if they are present
+ (bit_pos, bit_length)) are already present in seg[j].length as
+ additional byte.
+ See field.h, function key_length()
+ */
+ break;
default:
break;
}
@@ -247,10 +259,15 @@ static void init_block(HP_BLOCK *block, uint reclength, ulong min_records,
static inline void heap_try_free(HP_SHARE *share)
{
+ DBUG_ENTER("heap_try_free");
if (share->open_count == 0)
hp_free(share);
else
+ {
+ DBUG_PRINT("info", ("Table is still in use. Will be freed on close"));
share->delete_on_close= 1;
+ }
+ DBUG_VOID_RETURN;
}
@@ -269,6 +286,7 @@ int heap_delete_table(const char *name)
else
{
result= my_errno=ENOENT;
+ DBUG_PRINT("error", ("Could not find table '%s'", name));
}
pthread_mutex_unlock(&THR_LOCK_heap);
DBUG_RETURN(result);
diff --git a/storage/heap/hp_delete.c b/storage/heap/hp_delete.c
index ceba0fcf12e..db2c0df6128 100644
--- a/storage/heap/hp_delete.c
+++ b/storage/heap/hp_delete.c
@@ -47,6 +47,7 @@ int heap_delete(HP_INFO *info, const uchar *record)
share->del_link=pos;
pos[share->reclength]=0; /* Record deleted */
share->deleted++;
+ share->key_version++;
info->current_hash_ptr=0;
#if !defined(DBUG_OFF) && defined(EXTRA_HEAP_DEBUG)
DBUG_EXECUTE("check_heap",heap_check_heap(info, 0););
diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c
index aaaa0fe833f..fb9ea44a424 100644
--- a/storage/heap/hp_hash.c
+++ b/storage/heap/hp_hash.c
@@ -349,6 +349,15 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec)
}
else
{
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) bits))+ (nr << 8);
+ nr2+=3;
+ end--;
+ }
+
for (; pos < end ; pos++)
{
nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
@@ -465,6 +474,14 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec)
else
{
uchar *end= pos+seg->length;
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ nr *=16777619;
+ nr ^=(uint) bits;
+ end--;
+ }
for ( ; pos < end ; pos++)
{
nr *=16777619;
@@ -577,7 +594,18 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2,
}
else
{
- if (bcmp(rec1+seg->start,rec2+seg->start,seg->length))
+ uint dec= 0;
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits1= get_rec_bits(rec1 + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ uchar bits2= get_rec_bits(rec2 + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ if (bits1 != bits2)
+ return 1;
+ dec= 1;
+ }
+ if (bcmp(rec1 + seg->start, rec2 + seg->start, seg->length - dec))
return 1;
}
}
@@ -660,7 +688,18 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key)
}
else
{
- if (bcmp(rec+seg->start,key,seg->length))
+ uint dec= 0;
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ if (bits != (*key))
+ return 1;
+ dec= 1;
+ key++;
+ }
+
+ if (bcmp(rec + seg->start, key, seg->length - dec))
return 1;
}
}
@@ -689,6 +728,12 @@ void hp_make_key(HP_KEYDEF *keydef, uchar *key, const uchar *rec)
}
if (seg->type == HA_KEYTYPE_VARTEXT1)
char_length+= seg->bit_start; /* Copy also length */
+ else if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ *key++= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ char_length--;
+ }
memcpy(key,rec+seg->start,(size_t) char_length);
key+= char_length;
}
@@ -720,7 +765,8 @@ uint hp_rb_make_key(HP_KEYDEF *keydef, uchar *key,
{
uint length= seg->length;
uchar *pos= (uchar*) rec + seg->start;
-
+ DBUG_ASSERT(seg->type != HA_KEYTYPE_BIT);
+
#ifdef HAVE_ISNAN
if (seg->type == HA_KEYTYPE_FLOAT)
{
@@ -784,6 +830,12 @@ uint hp_rb_make_key(HP_KEYDEF *keydef, uchar *key,
seg->charset->cset->fill(seg->charset, (char*) key + char_length,
seg->length - char_length, ' ');
}
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ *key++= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ char_length--;
+ }
memcpy(key, rec + seg->start, (size_t) char_length);
key+= seg->length;
}
diff --git a/storage/heap/hp_rfirst.c b/storage/heap/hp_rfirst.c
index d0d2ec9b506..e45af4a219f 100644
--- a/storage/heap/hp_rfirst.c
+++ b/storage/heap/hp_rfirst.c
@@ -24,6 +24,8 @@ int heap_rfirst(HP_INFO *info, uchar *record, int inx)
DBUG_ENTER("heap_rfirst");
info->lastinx= inx;
+ info->key_version= info->s->key_version;
+
if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
{
uchar *pos;
@@ -50,6 +52,7 @@ int heap_rfirst(HP_INFO *info, uchar *record, int inx)
}
else
{
+ info->update= HA_STATE_NO_KEY;
my_errno = HA_ERR_END_OF_FILE;
DBUG_RETURN(my_errno);
}
@@ -57,15 +60,8 @@ int heap_rfirst(HP_INFO *info, uchar *record, int inx)
}
else
{
- if (!(info->s->records))
- {
- my_errno=HA_ERR_END_OF_FILE;
- DBUG_RETURN(my_errno);
- }
- DBUG_ASSERT(0); /* TODO fix it */
- info->current_record=0;
- info->current_hash_ptr=0;
- info->update=HA_STATE_PREV_FOUND;
- DBUG_RETURN(heap_rnext(info,record));
+ /* We can't scan a non existing key value with hash index */
+ my_errno= HA_ERR_WRONG_COMMAND;
+ DBUG_RETURN(my_errno);
}
}
diff --git a/storage/heap/hp_rkey.c b/storage/heap/hp_rkey.c
index 27d1114770e..166ed28aed0 100644
--- a/storage/heap/hp_rkey.c
+++ b/storage/heap/hp_rkey.c
@@ -30,6 +30,7 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key,
}
info->lastinx= inx;
info->current_record= (ulong) ~0L; /* For heap_rrnd() */
+ info->key_version= info->s->key_version;
if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
{
@@ -50,7 +51,7 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key,
if (!(pos= tree_search_key(&keyinfo->rb_tree, info->lastkey, info->parents,
&info->last_pos, find_flag, &custom_arg)))
{
- info->update= 0;
+ info->update= HA_STATE_NO_KEY;
DBUG_RETURN(my_errno= HA_ERR_KEY_NOT_FOUND);
}
memcpy(&pos, pos + (*keyinfo->get_key_length)(keyinfo, pos), sizeof(uchar*));
@@ -60,7 +61,7 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key,
{
if (!(pos= hp_search(info, share->keydef + inx, key, 0)))
{
- info->update= 0;
+ info->update= HA_STATE_NO_KEY;
DBUG_RETURN(my_errno);
}
if (!(keyinfo->flag & HA_NOSAME))
diff --git a/storage/heap/hp_rlast.c b/storage/heap/hp_rlast.c
index 45ad7c21f49..0710401e5a5 100644
--- a/storage/heap/hp_rlast.c
+++ b/storage/heap/hp_rlast.c
@@ -25,6 +25,7 @@ int heap_rlast(HP_INFO *info, uchar *record, int inx)
DBUG_ENTER("heap_rlast");
info->lastinx= inx;
+ info->key_version= info->s->key_version;
if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
{
uchar *pos;
@@ -47,9 +48,8 @@ int heap_rlast(HP_INFO *info, uchar *record, int inx)
}
else
{
- info->current_ptr=0;
- info->current_hash_ptr=0;
- info->update=HA_STATE_NEXT_FOUND;
- DBUG_RETURN(heap_rprev(info,record));
+ /* We can't scan a non existing key value with hash index */
+ my_errno= HA_ERR_WRONG_COMMAND;
+ DBUG_RETURN(my_errno);
}
}
diff --git a/storage/heap/hp_rnext.c b/storage/heap/hp_rnext.c
index 3d715f4e6d3..7a759e70972 100644
--- a/storage/heap/hp_rnext.c
+++ b/storage/heap/hp_rnext.c
@@ -32,7 +32,20 @@ int heap_rnext(HP_INFO *info, uchar *record)
{
heap_rb_param custom_arg;
- if (info->last_pos)
+ /* If no active record and last was not deleted */
+ if (!(info->update & (HA_STATE_AKTIV | HA_STATE_NO_KEY |
+ HA_STATE_DELETED)))
+ {
+ if (info->update & HA_STATE_NEXT_FOUND)
+ pos= 0; /* Can't search after last row */
+ else
+ {
+ /* Last was 'prev' before first record; search after first record */
+ pos= tree_search_edge(&keyinfo->rb_tree, info->parents,
+ &info->last_pos, offsetof(TREE_ELEMENT, left));
+ }
+ }
+ else if (info->last_pos)
{
/*
We enter this branch for non-DELETE queries after heap_rkey()
@@ -70,6 +83,7 @@ int heap_rnext(HP_INFO *info, uchar *record)
custom_arg.keyseg = keyinfo->seg;
custom_arg.key_length = info->lastkey_len;
custom_arg.search_flag = SEARCH_SAME | SEARCH_FIND;
+ info->last_find_flag= HA_READ_KEY_OR_NEXT;
pos = tree_search_key(&keyinfo->rb_tree, info->lastkey, info->parents,
&info->last_pos, info->last_find_flag, &custom_arg);
}
diff --git a/storage/heap/hp_rprev.c b/storage/heap/hp_rprev.c
index 63bfffffba9..8a50444bb5f 100644
--- a/storage/heap/hp_rprev.c
+++ b/storage/heap/hp_rprev.c
@@ -32,7 +32,20 @@ int heap_rprev(HP_INFO *info, uchar *record)
{
heap_rb_param custom_arg;
- if (info->last_pos)
+ /* If no active record and last was not deleted */
+ if (!(info->update & (HA_STATE_AKTIV | HA_STATE_NO_KEY |
+ HA_STATE_DELETED)))
+ {
+ if (info->update & HA_STATE_PREV_FOUND)
+ pos= 0; /* Can't search before first row */
+ else
+ {
+ /* Last was 'next' after last record; search after last record */
+ pos= tree_search_edge(&keyinfo->rb_tree, info->parents,
+ &info->last_pos, offsetof(TREE_ELEMENT, right));
+ }
+ }
+ else if (info->last_pos)
pos = tree_search_next(&keyinfo->rb_tree, &info->last_pos,
offsetof(TREE_ELEMENT, right),
offsetof(TREE_ELEMENT, left));
@@ -41,6 +54,7 @@ int heap_rprev(HP_INFO *info, uchar *record)
custom_arg.keyseg = keyinfo->seg;
custom_arg.key_length = keyinfo->length;
custom_arg.search_flag = SEARCH_SAME;
+ info->last_find_flag= HA_READ_KEY_OR_PREV;
pos = tree_search_key(&keyinfo->rb_tree, info->lastkey, info->parents,
&info->last_pos, info->last_find_flag, &custom_arg);
}
diff --git a/storage/heap/hp_rsame.c b/storage/heap/hp_rsame.c
index 1a3724672b6..f93a443aa48 100644
--- a/storage/heap/hp_rsame.c
+++ b/storage/heap/hp_rsame.c
@@ -43,7 +43,7 @@ int heap_rsame(register HP_INFO *info, uchar *record, int inx)
hp_make_key(share->keydef + inx, info->lastkey, record);
if (!hp_search(info, share->keydef + inx, info->lastkey, 3))
{
- info->update=0;
+ info->update= 0;
DBUG_RETURN(my_errno);
}
}
diff --git a/storage/heap/hp_scan.c b/storage/heap/hp_scan.c
index e8913e92c86..397dd8b54d4 100644
--- a/storage/heap/hp_scan.c
+++ b/storage/heap/hp_scan.c
@@ -31,6 +31,8 @@ int heap_scan_init(register HP_INFO *info)
info->current_record= (ulong) ~0L; /* No current record */
info->update=0;
info->next_block=0;
+ info->key_version= info->s->key_version;
+ info->file_version= info->s->file_version;
DBUG_RETURN(0);
}
diff --git a/storage/heap/hp_test2.c b/storage/heap/hp_test2.c
index 67471e93a7a..ba593f31cb0 100644
--- a/storage/heap/hp_test2.c
+++ b/storage/heap/hp_test2.c
@@ -21,9 +21,6 @@
#ifdef DBUG_OFF
#undef DBUG_OFF
#endif
-#ifndef SAFEMALLOC
-#define SAFEMALLOC 1
-#endif
#include "heapdef.h" /* Because of hp_find_block */
#include <signal.h>
@@ -319,7 +316,8 @@ int main(int argc, char *argv[])
if (!silent)
printf("- Read last key - delete - prev - prev - opt_delete - prev -> first\n");
- if (heap_rlast(file,record3,0)) goto err;
+ if (heap_rprev(file,record))
+ goto err;
if (heap_delete(file,record3)) goto err;
key_check-=atoi((char*) record3);
key1[atoi((char*) record+keyinfo[0].seg[0].start)]--;
@@ -526,7 +524,7 @@ int main(int argc, char *argv[])
}
ant=0;
- for (error=heap_rlast(file,record,0) ;
+ for (error=heap_rprev(file,record) ;
! error ;
error=heap_rprev(file,record))
{
diff --git a/storage/heap/hp_update.c b/storage/heap/hp_update.c
index 7f469af3c96..ab831382325 100644
--- a/storage/heap/hp_update.c
+++ b/storage/heap/hp_update.c
@@ -21,7 +21,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new)
{
HP_KEYDEF *keydef, *end, *p_lastinx;
uchar *pos;
- my_bool auto_key_changed= 0;
+ my_bool auto_key_changed= 0, key_changed= 0;
HP_SHARE *share= info->s;
DBUG_ENTER("heap_update");
@@ -54,6 +54,8 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new)
#endif
if (auto_key_changed)
heap_update_auto_increment(info, heap_new);
+ if (key_changed)
+ share->key_version++;
DBUG_RETURN(0);
err:
diff --git a/storage/heap/hp_write.c b/storage/heap/hp_write.c
index 4e8fa7e3580..bf27503de9b 100644
--- a/storage/heap/hp_write.c
+++ b/storage/heap/hp_write.c
@@ -56,6 +56,7 @@ int heap_write(HP_INFO *info, const uchar *record)
pos[share->reclength]=1; /* Mark record as not deleted */
if (++share->records == share->blength)
share->blength+= share->blength;
+ info->s->key_version++;
info->current_ptr=pos;
info->current_hash_ptr=0;
info->update|=HA_STATE_AKTIV;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 6f58fd70fbd..eaf5ec4bed5 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1061,7 +1061,29 @@ innobase_mysql_tmpfile(void)
will be passed to fdopen(), it will be closed by invoking
fclose(), which in turn will invoke close() instead of
my_close(). */
+
+#ifdef _WIN32
+ /* Note that on Windows, the integer returned by mysql_tmpfile
+ has no relation to C runtime file descriptor. Here, we need
+ to call my_get_osfhandle to get the HANDLE and then convert it
+ to C runtime filedescriptor. */
+ {
+ HANDLE hFile = my_get_osfhandle(fd);
+ HANDLE hDup;
+ BOOL bOK =
+ DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(),
+ &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
+ if(bOK) {
+ fd2 = _open_osfhandle((intptr_t)hDup,0);
+ }
+ else {
+ my_osmaperr(GetLastError());
+ fd2 = -1;
+ }
+ }
+#else
fd2 = dup(fd);
+#endif
if (fd2 < 0) {
DBUG_PRINT("error",("Got error %d on dup",fd2));
my_errno=errno;
@@ -3203,90 +3225,64 @@ get_innobase_type_from_mysql_type(
8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
the type */
- DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_STRING < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_VAR_STRING < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_DOUBLE < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_FLOAT < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_DECIMAL < 256);
- if (field->flags & UNSIGNED_FLAG) {
+ *unsigned_flag = 0;
+ switch (field->key_type()) {
+ case HA_KEYTYPE_USHORT_INT:
+ case HA_KEYTYPE_ULONG_INT:
+ case HA_KEYTYPE_UINT24:
+ case HA_KEYTYPE_ULONGLONG:
*unsigned_flag = DATA_UNSIGNED;
- } else {
- *unsigned_flag = 0;
- }
-
- if (field->real_type() == MYSQL_TYPE_ENUM
- || field->real_type() == MYSQL_TYPE_SET) {
-
- /* MySQL has field->type() a string type for these, but the
- data is actually internally stored as an unsigned integer
- code! */
-
- *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
- flag set to zero, even though
- internally this is an unsigned
- integer type */
+ /* fall through */
+ case HA_KEYTYPE_SHORT_INT:
+ case HA_KEYTYPE_LONG_INT:
+ case HA_KEYTYPE_INT24:
+ case HA_KEYTYPE_INT8:
+ case HA_KEYTYPE_LONGLONG:
return(DATA_INT);
- }
-
- switch (field->type()) {
- /* NOTE that we only allow string types in DATA_MYSQL and
- DATA_VARMYSQL */
- case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
- case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
- if (field->binary()) {
- return(DATA_BINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
+ case HA_KEYTYPE_FLOAT:
+ return(DATA_FLOAT);
+ case HA_KEYTYPE_DOUBLE:
+ return(DATA_DOUBLE);
+ case HA_KEYTYPE_BINARY:
+ if (field->type() == MYSQL_TYPE_TINY)
+ { // compatibility workaround
+ *unsigned_flag= DATA_UNSIGNED ;
+ return DATA_INT;
+ }
+ return(DATA_FIXBINARY);
+ case HA_KEYTYPE_VARBINARY2:
+ if (field->type() != MYSQL_TYPE_VARCHAR)
+ return(DATA_BLOB);
+ /* fall through */
+ case HA_KEYTYPE_VARBINARY1:
+ return(DATA_BINARY);
+ case HA_KEYTYPE_VARTEXT2:
+ if (field->type() != MYSQL_TYPE_VARCHAR)
+ return(DATA_BLOB);
+ /* fall through */
+ case HA_KEYTYPE_VARTEXT1:
+ if (field->charset() == &my_charset_latin1) {
return(DATA_VARCHAR);
} else {
return(DATA_VARMYSQL);
}
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING: if (field->binary()) {
-
- return(DATA_FIXBINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
+ case HA_KEYTYPE_TEXT:
+ if (field->charset() == &my_charset_latin1) {
return(DATA_CHAR);
} else {
return(DATA_MYSQL);
}
- case MYSQL_TYPE_NEWDECIMAL:
- return(DATA_FIXBINARY);
- case MYSQL_TYPE_LONG:
- case MYSQL_TYPE_LONGLONG:
- case MYSQL_TYPE_TINY:
- case MYSQL_TYPE_SHORT:
- case MYSQL_TYPE_INT24:
- case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
- case MYSQL_TYPE_YEAR:
- case MYSQL_TYPE_NEWDATE:
- case MYSQL_TYPE_TIME:
- case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
- case MYSQL_TYPE_FLOAT:
- return(DATA_FLOAT);
- case MYSQL_TYPE_DOUBLE:
- return(DATA_DOUBLE);
- case MYSQL_TYPE_DECIMAL:
+ case HA_KEYTYPE_NUM:
return(DATA_DECIMAL);
- case MYSQL_TYPE_GEOMETRY:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- return(DATA_BLOB);
- case MYSQL_TYPE_NULL:
- /* MySQL currently accepts "NULL" datatype, but will
- reject such datatype in the next release. We will cope
- with it and not trigger assertion failure in 5.1 */
- break;
- default:
+ case HA_KEYTYPE_BIT:
+ case HA_KEYTYPE_END:
assert(0);
}
@@ -6599,6 +6595,7 @@ ha_innobase::info_low(
}
stats.check_time = 0;
+ stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
if (stats.records == 0) {
stats.mean_rec_length = 0;
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 8b91f7d4c51..06b30d4a5b0 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -98,10 +98,14 @@ class ha_innobase: public handler
Table_flags table_flags() const;
ulong index_flags(uint idx, uint part, bool all_parts) const
{
+ ulong extra_flag= 0;
+ if (table && idx == table->s->primary_key)
+ extra_flag= HA_CLUSTERED_INDEX;
return (HA_READ_NEXT |
HA_READ_PREV |
HA_READ_ORDER |
HA_READ_RANGE |
+ extra_flag |
HA_KEYREAD_ONLY);
}
uint max_supported_keys() const { return MAX_KEY; }
diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
index a9b81116a90..e470616d169 100644
--- a/storage/innodb_plugin/handler/ha_innodb.cc
+++ b/storage/innodb_plugin/handler/ha_innodb.cc
@@ -32,7 +32,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/* TODO list for the InnoDB handler in 5.0:
- - Remove the flag trx->active_trans and look at trx->conc_state
+ - Remove the flag trx->active_flag & TRX_ACTIVE_IN_MYSQL and look
+ at trx->conc_state
- fix savepoint functions to use savepoint storage area
- Find out what kind of problems the OS X case-insensitivity causes to
table and database names; should we 'normalize' the names like we do
@@ -48,7 +49,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include <m_ctype.h>
#include <mysys_err.h>
#include <mysql/plugin.h>
-
+#ifdef _WIN32
+#include <io.h>
+#endif
/** @file ha_innodb.cc */
/* Include necessary InnoDB headers */
@@ -102,8 +105,6 @@ bool check_global_access(THD *thd, ulong want_access);
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
@@ -216,6 +217,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -1172,7 +1174,28 @@ innobase_mysql_tmpfile(void)
will be passed to fdopen(), it will be closed by invoking
fclose(), which in turn will invoke close() instead of
my_close(). */
+#ifdef _WIN32
+ /* Note that on Windows, the integer returned by mysql_tmpfile
+ has no relation to C runtime file descriptor. Here, we need
+ to call my_get_osfhandle to get the HANDLE and then convert it
+ to C runtime filedescriptor. */
+ {
+ HANDLE hFile = my_get_osfhandle(fd);
+ HANDLE hDup;
+ BOOL bOK =
+ DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(),
+ &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
+ if(bOK) {
+ fd2 = _open_osfhandle((intptr_t)hDup,0);
+ }
+ else {
+ my_osmaperr(GetLastError());
+ fd2 = -1;
+ }
+ }
+#else
fd2 = dup(fd);
+#endif
if (fd2 < 0) {
DBUG_PRINT("error",("Got error %d on dup",fd2));
my_errno=errno;
@@ -1339,7 +1362,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1388,8 +1410,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1655,10 +1675,10 @@ innobase_query_caching_of_table_permitted(
/* The call of row_search_.. will start a new transaction if it is
not yet started */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
@@ -1928,11 +1948,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* Set the MySQL flag to mark that there is an active transaction */
- if (prebuilt->trx->active_trans == 0) {
+ if ((prebuilt->trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, user_thd);
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
/* We did the necessary inits in this function, no need to repeat them
@@ -1983,6 +2003,8 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->prepare_ordered=NULL;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
@@ -2287,7 +2309,6 @@ innobase_change_buffering_inited_ok:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_cond_init(&commit_cond, NULL);
@@ -2342,7 +2363,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_cond_destroy(&commit_cond);
@@ -2441,14 +2461,118 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(hton, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
DBUG_RETURN(0);
}
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ pthread_cond_wait(&commit_cond,
+ &commit_cond_m);
+ pthread_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ pthread_cond_signal(&commit_cond);
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
+ && trx->conc_state != TRX_NOT_STARTED) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx->active_flag |= TRX_ACTIVE_COMMIT_ORDERED;
+
+ DBUG_VOID_RETURN;
+}
+
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@@ -2474,11 +2598,12 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch &&
+ (trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
trx_search_latch_release_if_reserved(trx);
}
- /* The flag trx->active_trans is set to 1 in
+ /* The flag TRX_ACTIVE_IN_MYSQL in trx->active_flag is set in
1. ::external_lock(),
2. ::start_stmt(),
@@ -2488,81 +2613,33 @@ innobase_commit(
6. innobase_start_trx_and_assign_read_view(),
7. ::transactional_table_lock()
- and it is only set to 0 in a commit or a rollback. If it is 0 we know
+ and it is only cleared in a commit or a rollback. If it is unset we know
there cannot be resources to be freed and we could return immediately.
For the time being, we play safe and do the cleanup though there should
be nothing to clean up. */
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL== 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
+
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if ((trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
/* We just mark the SQL statement ended and do not do a
@@ -2635,7 +2712,7 @@ innobase_rollback(
|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
error = trx_rollback_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
@@ -2779,7 +2856,7 @@ innobase_savepoint(
innobase_release_stat_resources(trx);
/* cannot happen outside of transaction */
- DBUG_ASSERT(trx->active_trans);
+ DBUG_ASSERT(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
/* TODO: use provided savepoint data area to store savepoint data */
char name[64];
@@ -2809,11 +2886,11 @@ innobase_close_connection(
ut_a(trx);
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
@@ -2941,12 +3018,15 @@ UNIV_INTERN
ulong
ha_innobase::index_flags(
/*=====================*/
- uint,
+ uint index,
uint,
bool)
const
{
- return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+ ulong extra_flag= 0;
+ if (table && index == table->s->primary_key)
+ extra_flag= HA_CLUSTERED_INDEX;
+ return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
| HA_READ_RANGE | HA_KEYREAD_ONLY);
}
@@ -3912,90 +3992,64 @@ get_innobase_type_from_mysql_type(
8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
the type */
- DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_STRING < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_VAR_STRING < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_DOUBLE < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_FLOAT < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_DECIMAL < 256);
- if (field->flags & UNSIGNED_FLAG) {
+ *unsigned_flag = 0;
+ switch (field->key_type()) {
+ case HA_KEYTYPE_USHORT_INT:
+ case HA_KEYTYPE_ULONG_INT:
+ case HA_KEYTYPE_UINT24:
+ case HA_KEYTYPE_ULONGLONG:
*unsigned_flag = DATA_UNSIGNED;
- } else {
- *unsigned_flag = 0;
- }
-
- if (field->real_type() == MYSQL_TYPE_ENUM
- || field->real_type() == MYSQL_TYPE_SET) {
-
- /* MySQL has field->type() a string type for these, but the
- data is actually internally stored as an unsigned integer
- code! */
-
- *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
- flag set to zero, even though
- internally this is an unsigned
- integer type */
+ /* fall through */
+ case HA_KEYTYPE_SHORT_INT:
+ case HA_KEYTYPE_LONG_INT:
+ case HA_KEYTYPE_INT24:
+ case HA_KEYTYPE_INT8:
+ case HA_KEYTYPE_LONGLONG:
return(DATA_INT);
- }
-
- switch (field->type()) {
- /* NOTE that we only allow string types in DATA_MYSQL and
- DATA_VARMYSQL */
- case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
- case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
- if (field->binary()) {
- return(DATA_BINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
+ case HA_KEYTYPE_FLOAT:
+ return(DATA_FLOAT);
+ case HA_KEYTYPE_DOUBLE:
+ return(DATA_DOUBLE);
+ case HA_KEYTYPE_BINARY:
+ if (field->type() == MYSQL_TYPE_TINY)
+ { // compatibility workaround
+ *unsigned_flag= DATA_UNSIGNED;
+ return DATA_INT;
+ }
+ return(DATA_FIXBINARY);
+ case HA_KEYTYPE_VARBINARY2:
+ if (field->type() != MYSQL_TYPE_VARCHAR)
+ return(DATA_BLOB);
+ /* fall through */
+ case HA_KEYTYPE_VARBINARY1:
+ return(DATA_BINARY);
+ case HA_KEYTYPE_VARTEXT2:
+ if (field->type() != MYSQL_TYPE_VARCHAR)
+ return(DATA_BLOB);
+ /* fall through */
+ case HA_KEYTYPE_VARTEXT1:
+ if (field->charset() == &my_charset_latin1) {
return(DATA_VARCHAR);
} else {
return(DATA_VARMYSQL);
}
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING: if (field->binary()) {
-
- return(DATA_FIXBINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
+ case HA_KEYTYPE_TEXT:
+ if (field->charset() == &my_charset_latin1) {
return(DATA_CHAR);
} else {
return(DATA_MYSQL);
}
- case MYSQL_TYPE_NEWDECIMAL:
- return(DATA_FIXBINARY);
- case MYSQL_TYPE_LONG:
- case MYSQL_TYPE_LONGLONG:
- case MYSQL_TYPE_TINY:
- case MYSQL_TYPE_SHORT:
- case MYSQL_TYPE_INT24:
- case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
- case MYSQL_TYPE_YEAR:
- case MYSQL_TYPE_NEWDATE:
- case MYSQL_TYPE_TIME:
- case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
- case MYSQL_TYPE_FLOAT:
- return(DATA_FLOAT);
- case MYSQL_TYPE_DOUBLE:
- return(DATA_DOUBLE);
- case MYSQL_TYPE_DECIMAL:
+ case HA_KEYTYPE_NUM:
return(DATA_DECIMAL);
- case MYSQL_TYPE_GEOMETRY:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- return(DATA_BLOB);
- case MYSQL_TYPE_NULL:
- /* MySQL currently accepts "NULL" datatype, but will
- reject such datatype in the next release. We will cope
- with it and not trigger assertion failure in 5.1 */
- break;
- default:
+ case HA_KEYTYPE_BIT:
+ case HA_KEYTYPE_END:
ut_error;
}
@@ -4710,7 +4764,7 @@ no_commit:
/* Altering to InnoDB format */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* We will need an IX lock on the destination table. */
prebuilt->sql_stat_start = TRUE;
} else {
@@ -4726,7 +4780,7 @@ no_commit:
locks, so they have to be acquired again. */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* Re-acquire the table lock on the source table. */
row_lock_table_for_mysql(prebuilt, src_table, mode);
/* We will need an IX lock on the destination table. */
@@ -7762,6 +7816,8 @@ ha_innobase::info_low(
}
stats.check_time = 0;
+ stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
+
if (stats.records == 0) {
stats.mean_rec_length = 0;
@@ -8571,10 +8627,10 @@ ha_innobase::start_stmt(
trx->detailed_error[0] = '\0';
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else {
innobase_register_stmt(ht, thd);
}
@@ -8672,10 +8728,10 @@ ha_innobase::external_lock(
/* Set the MySQL flag to mark that there is an active
transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else if (trx->n_mysql_tables_in_use == 0) {
innobase_register_stmt(ht, thd);
}
@@ -8754,7 +8810,7 @@ ha_innobase::external_lock(
prebuilt->used_in_HANDLER = FALSE;
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
- if (trx->active_trans != 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) != 0) {
innobase_commit(ht, thd, TRUE);
}
} else {
@@ -8835,10 +8891,10 @@ ha_innobase::transactional_table_lock(
/* MySQL is setting a new transactional table lock */
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
@@ -9894,10 +9950,11 @@ innobase_xa_prepare(
innobase_release_stat_resources(trx);
- if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0 &&
+ trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but trx->conc_state != "
- "TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0, but"
+ " trx->conc_state != TRX_NOT_STARTED");
}
if (all
@@ -9906,7 +9963,7 @@ innobase_xa_prepare(
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
- ut_ad(trx->active_trans);
+ ut_ad(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
error = (int) trx_prepare_for_mysql(trx);
} else {
@@ -9930,32 +9987,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
return(error);
}
diff --git a/storage/innodb_plugin/handler/ha_innodb.h b/storage/innodb_plugin/handler/ha_innodb.h
index 7a8f29853de..1dcd7ce203f 100644
--- a/storage/innodb_plugin/handler/ha_innodb.h
+++ b/storage/innodb_plugin/handler/ha_innodb.h
@@ -238,16 +238,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
#endif
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
/**
Check if a user thread is a replication slave thread
@param thd user thread
@@ -288,6 +278,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */
}
+/** Get the file name and position of the MySQL binlog corresponding to the
+ * current commit.
+ */
+extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
diff --git a/storage/innodb_plugin/handler/i_s.cc b/storage/innodb_plugin/handler/i_s.cc
index b0149967e9b..4fe8beafedc 100644
--- a/storage/innodb_plugin/handler/i_s.cc
+++ b/storage/innodb_plugin/handler/i_s.cc
@@ -161,7 +161,7 @@ field_store_time_t(
my_time.time_type = MYSQL_TIMESTAMP_DATETIME;
#endif
- return(field->store_time(&my_time, MYSQL_TIMESTAMP_DATETIME));
+ return(field->store_time(&my_time));
}
/*******************************************************************//**
diff --git a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h
index 833bae4a4ff..53f9648d30a 100644
--- a/storage/innodb_plugin/include/trx0trx.h
+++ b/storage/innodb_plugin/include/trx0trx.h
@@ -510,9 +510,10 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ulint active_trans; /*!< 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
+ ulint active_flag; /*!< TRX_ACTIVE_IN_MYSQL - set if a
+ transaction in MySQL is active.
+ TRX_ACTIVE_COMMIT_ORDERED - set if
+ innobase_commit_ordered has run */
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@@ -797,6 +798,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
+/* Flag bits for trx_struct.active_flag */
+#define TRX_ACTIVE_IN_MYSQL (1<<0)
+#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
+
/** Commit node states */
enum commit_node_state {
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c
index f0bbf220815..1400b11035f 100644
--- a/storage/innodb_plugin/trx/trx0trx.c
+++ b/storage/innodb_plugin/trx/trx0trx.c
@@ -119,7 +119,7 @@ trx_create(
trx->table_id = ut_dulint_zero;
trx->mysql_thd = NULL;
- trx->active_trans = 0;
+ trx->active_flag = 0;
trx->duplicates = 0;
trx->n_mysql_tables_in_use = 0;
diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt
index 82a631d5086..77083c5839f 100644
--- a/storage/maria/CMakeLists.txt
+++ b/storage/maria/CMakeLists.txt
@@ -40,9 +40,10 @@ SET(ARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c
ha_maria.cc trnman.c lockman.c tablockman.c
ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c
ma_sp_key.c ma_control_file.c ma_loghandler.c
- ma_pagecache.c ma_pagecaches.c compat_aliases.cc compat_aliases.h
+ ma_pagecache.c ma_pagecaches.c
ma_checkpoint.c ma_recovery.c ma_commit.c ma_pagecrc.c
ha_maria.h maria_def.h ma_recovery_util.c ma_servicethread.c
+ ma_norec.c
)
MYSQL_STORAGE_ENGINE(ARIA)
diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am
index c2bb61e77dd..fa01c2a602d 100644
--- a/storage/maria/Makefile.am
+++ b/storage/maria/Makefile.am
@@ -79,7 +79,7 @@ noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \
ma_checkpoint.h ma_recovery.h ma_commit.h ma_state.h \
trnman_public.h ma_check_standalone.h \
ma_key_recover.h ma_recovery_util.h \
- ma_servicethread.h compat_aliases.h
+ ma_servicethread.h
ma_test1_DEPENDENCIES= $(LIBRARIES)
ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \
$(top_builddir)/storage/myisam/libmyisam.a \
@@ -124,7 +124,7 @@ libaria_la_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \
ma_search.c ma_page.c ma_key_recover.c ma_key.c \
ma_locking.c ma_state.c \
ma_rrnd.c ma_scan.c ma_cache.c \
- ma_statrec.c ma_packrec.c ma_dynrec.c \
+ ma_statrec.c ma_packrec.c ma_dynrec.c ma_norec.c \
ma_blockrec.c ma_bitmap.c \
ma_update.c ma_write.c ma_unique.c \
ma_delete.c \
@@ -142,7 +142,7 @@ libaria_la_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \
ma_pagecache.c ma_pagecaches.c \
ma_checkpoint.c ma_recovery.c ma_commit.c \
ma_pagecrc.c ma_recovery_util.c \
- compat_aliases.cc ma_servicethread.c
+ ma_servicethread.c
libaria_s_la_SOURCES = ha_maria.cc
libaria_s_la_CXXFLAGS = $(AM_CXXFLAGS)
diff --git a/storage/maria/compat_aliases.cc b/storage/maria/compat_aliases.cc
deleted file mode 100644
index ce8838b2da2..00000000000
--- a/storage/maria/compat_aliases.cc
+++ /dev/null
@@ -1,250 +0,0 @@
-/* Copyright (C) 2010 Monty Program Ab
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-/*
- compatibility aliases for system and static variables
-*/
-#include <my_global.h>
-#include <maria.h>
-#include <mysql/plugin.h>
-#include "ma_loghandler.h"
-#include "compat_aliases.h"
-
-ulong block_size_alias;
-static MYSQL_SYSVAR_ULONG(block_size, block_size_alias,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-block-size instead", 0, 0,
- MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
- MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
-
-ulong checkpoint_interval_alias;
-static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-checkpoint-interval instead",
- NULL, NULL, 30, 0, UINT_MAX, 1);
-
-ulong force_start_after_recovery_failures_alias;
-static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures, force_start_after_recovery_failures_alias,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-force-start-after-recovery-failures instead",
- NULL, NULL, 0, 0, UINT_MAX8, 1);
-
-my_bool page_checksum_alias;
-static MYSQL_SYSVAR_BOOL(page_checksum, page_checksum_alias, 0,
- "Deprecated, use --aria-page-checksum instead", 0, 0, 1);
-
-char *log_dir_path_alias;
-static MYSQL_SYSVAR_STR(log_dir_path, log_dir_path_alias,
- PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-log-dir-path instead",
- NULL, NULL, mysql_real_data_home);
-
-ulong log_file_size_alias;
-static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-log-file-size instead",
- NULL, NULL, TRANSLOG_FILE_SIZE,
- TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);
-
-ulong group_commit_alias;
-static MYSQL_SYSVAR_ENUM(group_commit, group_commit_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-group-commit instead",
- NULL, NULL,
- TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib);
-
-ulong group_commit_interval_alias;
-static MYSQL_SYSVAR_ULONG(group_commit_interval, group_commit_interval_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-group-commit-interval instead",
- NULL, NULL, 0, 0, UINT_MAX, 1);
-
-ulong log_purge_type_alias;
-static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-log-purge-type instead",
- NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
- &maria_translog_purge_type_typelib);
-
-ulonglong max_sort_file_size_alias;
-static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, max_sort_file_size_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-max-temp-length instead",
- 0, 0, MAX_FILE_SIZE, 0, MAX_FILE_SIZE, 1024*1024);
-
-ulong pagecache_age_threshold_alias;
-static MYSQL_SYSVAR_ULONG(pagecache_age_threshold, pagecache_age_threshold_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-pagecache-age-threshold instead",
- 0, 0, 300, 100, ~0L, 100);
-
-ulonglong pagecache_buffer_size_alias;
-static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size_alias,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-pagecache-buffer-size instead",
- 0, 0, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~0UL, IO_SIZE);
-
-ulong pagecache_division_limit_alias;
-static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-pagecache-division-limit instead",
- 0, 0, 100, 1, 100, 1);
-
-ulong recover_alias;
-static MYSQL_SYSVAR_ENUM(recover, recover_alias, PLUGIN_VAR_OPCMDARG,
- "Deprecated, use --aria-recover instead",
- NULL, NULL, HA_RECOVER_DEFAULT, &maria_recover_typelib);
-
-ulong repair_threads_alias;
-static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-repair-threads instead",
- 0, 0, 1, 1, ~0L, 1);
-
-ulong sort_buffer_size_alias;
-static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-sort-buffer-size instead",
- 0, 0, 128L*1024L*1024L, 4, ~0L, 1);
-
-ulong stats_method_alias;
-static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-stats-method instead",
- 0, 0, 0, &maria_stats_method_typelib);
-
-ulong sync_log_dir_alias;
-static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-sync-log-dir instead",
- NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
- &maria_sync_log_dir_typelib);
-
-my_bool used_for_temp_tables_alias= 1;
-static MYSQL_SYSVAR_BOOL(used_for_temp_tables,
- used_for_temp_tables_alias, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT,
- NULL, 0, 0, 1);
-
-static struct st_mysql_show_var status_variables_aliases[]= {
- {"Maria", (char*) &status_variables, SHOW_ARRAY},
- {NullS, NullS, SHOW_LONG}
-};
-
-/*
- There is one problem with aliases for command-line options.
- Plugin initialization works like this
-
- for all plugins:
- prepare command-line options
- initialize command-line option variables to the default values
- parse command line, assign values as necessary
-
- for all plugins:
- call the plugin initialization function
-
- it means, we cannot have maria* and aria* command-line options to use
- the same underlying variables - because after assigning maria* values,
- MySQL will put there default values again preparing for parsing aria*
- values. So, maria* values will be lost.
-
- So, we create separate set of variables for maria* options,
- and take both values into account in ha_maria_init().
-
- When the command line was parsed, we patch maria* options
- to use the same variables as aria* options so that
- set @@maria_some_var would have the same value as @@aria_some_var
- without forcing us to copy the values around all the time.
-*/
-
-static struct st_mysql_sys_var* system_variables_aliases[]= {
- MYSQL_SYSVAR(block_size),
- MYSQL_SYSVAR(checkpoint_interval),
- MYSQL_SYSVAR(force_start_after_recovery_failures),
- MYSQL_SYSVAR(group_commit),
- MYSQL_SYSVAR(group_commit_interval),
- MYSQL_SYSVAR(log_dir_path),
- MYSQL_SYSVAR(log_file_size),
- MYSQL_SYSVAR(log_purge_type),
- MYSQL_SYSVAR(max_sort_file_size),
- MYSQL_SYSVAR(page_checksum),
- MYSQL_SYSVAR(pagecache_age_threshold),
- MYSQL_SYSVAR(pagecache_buffer_size),
- MYSQL_SYSVAR(pagecache_division_limit),
- MYSQL_SYSVAR(recover),
- MYSQL_SYSVAR(repair_threads),
- MYSQL_SYSVAR(sort_buffer_size),
- MYSQL_SYSVAR(stats_method),
- MYSQL_SYSVAR(sync_log_dir),
- MYSQL_SYSVAR(used_for_temp_tables),
- NULL
-};
-
-#define COPY_SYSVAR(name) \
- memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \
- sizeof(MYSQL_SYSVAR_NAME(name))); \
- if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \
- *MYSQL_SYSVAR_NAME(name).value == MYSQL_SYSVAR_NAME(name).def_val) \
- *MYSQL_SYSVAR_NAME(name).value= name ## _alias;
-
-#define COPY_THDVAR(name) \
- name ## _alias= THDVAR(0, name); \
- memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \
- sizeof(MYSQL_SYSVAR_NAME(name))); \
- if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \
- THDVAR(0, name) == MYSQL_SYSVAR_NAME(name).def_val) \
- THDVAR(0, name)= name ## _alias;
-
-/* Note:
- The following list must be identical to the list for system_variables[] in ha_maria.cc
-*/
-
-void copy_variable_aliases()
-{
- int i= 0;
- COPY_SYSVAR(block_size);
- COPY_SYSVAR(checkpoint_interval);
- i++; // Skip checkpoint_min_log_activity
- COPY_SYSVAR(force_start_after_recovery_failures);
- COPY_SYSVAR(group_commit);
- COPY_SYSVAR(group_commit_interval);
- COPY_SYSVAR(log_dir_path);
- COPY_SYSVAR(log_file_size);
- COPY_SYSVAR(log_purge_type);
- COPY_SYSVAR(max_sort_file_size);
- COPY_SYSVAR(page_checksum);
- COPY_SYSVAR(pagecache_age_threshold);
- COPY_SYSVAR(pagecache_buffer_size);
- COPY_SYSVAR(pagecache_division_limit);
- COPY_SYSVAR(recover);
- COPY_THDVAR(repair_threads);
- COPY_THDVAR(sort_buffer_size);
- COPY_THDVAR(stats_method);
- COPY_SYSVAR(sync_log_dir);
- COPY_SYSVAR(used_for_temp_tables);
-}
-
-struct st_maria_plugin compat_aliases= {
- MYSQL_DAEMON_PLUGIN,
- &maria_storage_engine,
- "Maria",
- "Monty Program Ab",
- "Compatibility aliases for the Aria engine",
- PLUGIN_LICENSE_GPL,
- NULL,
- NULL,
- 0x0105,
- status_variables_aliases,
- system_variables_aliases,
- "1.5",
- MariaDB_PLUGIN_MATURITY_GAMMA
-};
-
diff --git a/storage/maria/compat_aliases.h b/storage/maria/compat_aliases.h
deleted file mode 100644
index 46a4da74eec..00000000000
--- a/storage/maria/compat_aliases.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright (C) 2010 Monty Program Ab
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-extern struct st_maria_plugin compat_aliases;
-extern char mysql_real_data_home[FN_REFLEN];
-extern TYPELIB maria_recover_typelib;
-extern TYPELIB maria_stats_method_typelib;
-extern TYPELIB maria_translog_purge_type_typelib;
-extern TYPELIB maria_sync_log_dir_typelib;
-extern TYPELIB maria_group_commit_typelib;
-extern struct st_mysql_storage_engine maria_storage_engine;
-extern my_bool use_maria_for_temp_tables;
-extern struct st_mysql_sys_var* system_variables[];
-extern st_mysql_show_var status_variables[];
-void copy_variable_aliases();
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 60fa7a58fb6..e65ac7e443c 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -29,7 +29,6 @@
#include "ha_maria.h"
#include "trnman_public.h"
#include "trnman.h"
-#include "compat_aliases.h"
C_MODE_START
#include "maria_def.h"
@@ -219,7 +218,8 @@ static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
"Don't use the fast sort index method to created index if the "
"temporary file would get bigger than this.",
- 0, 0, MAX_FILE_SIZE & ~(1*MB-1), 0, MAX_FILE_SIZE, 1*MB);
+ 0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)),
+ 0, MAX_FILE_SIZE, 1*MB);
static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
@@ -511,6 +511,8 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
if (found->flags & BLOB_FLAG)
recinfo_pos->type= FIELD_BLOB;
+ else if (found->type() == MYSQL_TYPE_TIMESTAMP)
+ recinfo_pos->type= FIELD_NORMAL;
else if (found->type() == MYSQL_TYPE_VARCHAR)
recinfo_pos->type= FIELD_VARCHAR;
else if (!(options & HA_OPTION_PACK_RECORD) ||
@@ -720,6 +722,34 @@ int _ma_killed_ptr(HA_CHECK *param)
}
+/*
+ Report progress to mysqld
+
+ This is a bit more complex than what a normal progress report
+ function normally is.
+
+ The reason is that this is called by enable_index/repair which
+ is one stage in ALTER TABLE and we can't use the external
+ stage/max_stage for this.
+
+ thd_progress_init/thd_progress_next_stage is to be called by
+ high level commands like CHECK TABLE or REPAIR TABLE, not
+ by sub commands like enable_index().
+
+ In ma_check.c it's easier to work with stages than with a total
+ progress, so we use internal stage/max_stage here to keep the
+ code simple.
+*/
+
+void _ma_report_progress(HA_CHECK *param, ulonglong progress,
+ ulonglong max_progress)
+{
+ thd_progress_report((THD*)param->thd,
+ progress + max_progress * param->stage,
+ max_progress * param->max_stage);
+}
+
+
void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
{
va_list args;
@@ -769,7 +799,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
static int maria_create_trn_for_mysql(MARIA_HA *info)
{
- THD *thd= (THD*) info->external_ptr;
+ THD *thd= ((TABLE*) info->external_ref)->in_use;
TRN *trn= THD_TRN;
DBUG_ENTER("maria_create_trn_for_mysql");
@@ -808,6 +838,11 @@ static int maria_create_trn_for_mysql(MARIA_HA *info)
DBUG_RETURN(0);
}
+my_bool ma_killed_in_mariadb(MARIA_HA *info)
+{
+ return (((TABLE*) (info->external_ref))->in_use->killed != 0);
+}
+
} /* extern "C" */
/**
@@ -1025,6 +1060,8 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked)
return (my_errno ? my_errno : -1);
file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
+ /* Set external_ref, mainly for temporary tables */
+ file->external_ref= (void*) table; // For ma_killed()
if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0));
@@ -1048,6 +1085,16 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked)
if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
int_table_flags |= HA_HAS_NEW_CHECKSUM;
+ /*
+ For static size rows, tell MariaDB that we will access all bytes
+ in the record when writing it. This signals MariaDB to initalize
+ the full row to ensure we don't get any errors from valgrind and
+ that all bytes in the row is properly reset.
+ */
+ if (file->s->data_file_type == STATIC_RECORD &&
+ (file->s->has_varchar_fields | file->s->has_null_fields))
+ int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
+
for (i= 0; i < table->s->keys; i++)
{
plugin_ref parser= table->key_info[i].parser;
@@ -1064,6 +1111,8 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked)
int ha_maria::close(void)
{
MARIA_HA *tmp= file;
+ if (!tmp)
+ return 0;
file= 0;
return maria_close(tmp);
}
@@ -1094,7 +1143,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
int error;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
MARIA_SHARE *share= file->s;
- const char *old_proc_info= thd_proc_info(thd, "Checking table");
+ const char *old_proc_info;
TRN *old_trn= file->trn;
if (!file || !&param) return HA_ADMIN_INTERNAL_ERROR;
@@ -1122,12 +1171,18 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
return HA_ADMIN_ALREADY_DONE;
maria_chk_init_for_check(&param, file);
+ old_proc_info= thd_proc_info(thd, "Checking status");
+ thd_progress_init(thd, 3);
(void) maria_chk_status(&param, file); // Not fatal
error= maria_chk_size(&param, file);
if (!error)
error|= maria_chk_del(&param, file, param.testflag);
+ thd_proc_info(thd, "Checking keys");
+ thd_progress_next_stage(thd);
if (!error)
error= maria_chk_key(&param, file);
+ thd_proc_info(thd, "Checking data");
+ thd_progress_next_stage(thd);
if (!error)
{
if ((!(param.testflag & T_QUICK) &&
@@ -1178,6 +1233,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
/* Reset trn, that may have been set by repair */
_ma_set_trn_for_table(file, old_trn);
thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}
@@ -1193,6 +1249,7 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
int error= 0;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
MARIA_SHARE *share= file->s;
+ const char *old_proc_info;
if (!&param)
return HA_ADMIN_INTERNAL_ERROR;
@@ -1210,6 +1267,8 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
if (!(share->state.changed & STATE_NOT_ANALYZED))
return HA_ADMIN_ALREADY_DONE;
+ old_proc_info= thd_proc_info(thd, "Scanning");
+ thd_progress_init(thd, 1);
error= maria_chk_key(&param, file);
if (!error)
{
@@ -1219,6 +1278,8 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
}
else if (!maria_is_crashed(file) && !thd->killed)
maria_mark_crashed(file);
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}
@@ -1352,6 +1413,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
int error;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
ha_rows start_records;
+ const char *old_proc_info;
if (!file || !&param)
return HA_ADMIN_INTERNAL_ERROR;
@@ -1365,6 +1427,8 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
param.backup_time= check_opt->start_time;
start_records= file->state->records;
+ old_proc_info= thd_proc_info(thd, "Checking table");
+ thd_progress_init(thd, 1);
while ((error= repair(thd, &param, 0)) && param.retry_repair)
{
param.retry_repair= 0;
@@ -1400,6 +1464,8 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
llstr(start_records, llbuff2),
table->s->path.str);
}
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
return error;
}
@@ -1447,14 +1513,15 @@ int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
+ thd_progress_init(thd, 1);
if ((error= repair(thd, &param, 1)) && param.retry_repair)
{
sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
my_errno, param.db_name, param.table_name);
param.testflag &= ~T_REP_BY_SORT;
- error= repair(thd, &param, 1);
+ error= repair(thd, &param, 0);
}
-
+ thd_progress_end(thd);
return error;
}
@@ -1628,6 +1695,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
}
pthread_mutex_unlock(&share->intern_lock);
thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd); // Mark done
if (!thd->locked_tables)
maria_lock_database(file, F_UNLCK);
@@ -1954,15 +2022,27 @@ void ha_maria::start_bulk_insert(ha_rows rows)
{
DBUG_ENTER("ha_maria::start_bulk_insert");
THD *thd= table->in_use;
- ulong size= min(thd->variables.read_buff_size,
- (ulong) (table->s->avg_row_length * rows));
MARIA_SHARE *share= file->s;
- DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu",
- (ulong) rows, size));
+ DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows));
/* don't enable row cache if too few rows */
if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
- maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size);
+ {
+ ulonglong size= thd->variables.read_buff_size, tmp;
+ if (rows)
+ {
+ if (file->state->records)
+ {
+ MARIA_INFO maria_info;
+ maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE);
+ set_if_smaller(size, maria_info.mean_reclength * rows);
+ }
+ else if (table->s->avg_row_length)
+ set_if_smaller(size, (size_t) (table->s->avg_row_length * rows));
+ }
+ tmp= (ulong) size; // Safe becasue of limits
+ maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp);
+ }
can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
share->base.keys));
@@ -1997,7 +2077,14 @@ void ha_maria::start_bulk_insert(ha_rows rows)
@todo for a single-row INSERT SELECT, we will go into repair, which
is more costly (flushes, syncs) than a row write.
*/
- maria_disable_non_unique_index(file, rows);
+ if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
+ {
+ /* Internal table; If we get a duplicate something is very wrong */
+ file->update|= HA_STATE_CHANGED;
+ maria_clear_all_keys_active(file->s->state.key_map);
+ }
+ else
+ maria_disable_non_unique_index(file, rows);
if (share->now_transactional)
{
bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
@@ -2157,6 +2244,20 @@ int ha_maria::delete_row(const uchar * buf)
return maria_delete(file, buf);
}
+C_MODE_START
+
+ICP_RESULT index_cond_func_maria(void *arg)
+{
+ ha_maria *h= (ha_maria*)arg;
+ if (h->end_range)
+ {
+ if (h->compare_key2(h->end_range) > 0)
+ return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
+ }
+ return h->pushed_idx_cond->val_int() ? ICP_MATCH : ICP_NO_MATCH;
+}
+
+C_MODE_END
int ha_maria::index_read_map(uchar * buf, const uchar * key,
key_part_map keypart_map,
@@ -2173,7 +2274,15 @@ int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
- int error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
+ int error;
+ /* Use the pushed index condition if it matches the index we're scanning */
+ end_range= NULL;
+ if (index == pushed_idx_cond_keyno)
+ ma_set_index_cond_func(file, index_cond_func_maria, this);
+
+ error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
+
+ ma_set_index_cond_func(file, NULL, 0);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
}
@@ -2246,6 +2355,25 @@ int ha_maria::index_next_same(uchar * buf,
}
+int ha_maria::index_init(uint idx, bool sorted)
+{
+ active_index=idx;
+ if (pushed_idx_cond_keyno == idx)
+ ma_set_index_cond_func(file, index_cond_func_maria, this);
+ return 0;
+}
+
+
+int ha_maria::index_end()
+{
+ active_index=MAX_KEY;
+ ma_set_index_cond_func(file, NULL, 0);
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
+ return 0;
+}
+
+
int ha_maria::rnd_init(bool scan)
{
if (scan)
@@ -2256,6 +2384,7 @@ int ha_maria::rnd_init(bool scan)
int ha_maria::rnd_end()
{
+ ds_mrr.dsmrr_close();
/* Safe to call even if we don't have started a scan */
maria_scan_end(file);
return 0;
@@ -2300,7 +2429,7 @@ void ha_maria::position(const uchar *record)
int ha_maria::info(uint flag)
{
- return info(flag, table->s->tmp_table == NO_TMP_TABLE);
+ return (!table ? 1 : info(flag, table->s->tmp_table == NO_TMP_TABLE));
}
int ha_maria::info(uint flag, my_bool lock_table_share)
@@ -2328,6 +2457,7 @@ int ha_maria::info(uint flag, my_bool lock_table_share)
ref_length= maria_info.reflength;
share->db_options_in_use= maria_info.options;
stats.block_size= maria_block_size;
+ stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = max(sizeof(void *))
/* Update share */
if (lock_table_share)
@@ -2409,6 +2539,11 @@ int ha_maria::extra(enum ha_extra_function operation)
int ha_maria::reset(void)
{
+ pushed_idx_cond= NULL;
+ pushed_idx_cond_keyno= MAX_KEY;
+ in_range_check_pushed_down= FALSE;
+ ma_set_index_cond_func(file, NULL, 0);
+ ds_mrr.dsmrr_close();
if (file->trn)
{
/* Next statement is a new statement. Ensure it's logged */
@@ -2461,7 +2596,7 @@ int ha_maria::delete_table(const char *name)
void ha_maria::drop_table(const char *name)
{
DBUG_ASSERT(file->s->temporary);
- (void) close();
+ (void) ha_close();
(void) maria_delete_table_files(name, 0);
}
@@ -2469,6 +2604,7 @@ void ha_maria::drop_table(const char *name)
int ha_maria::external_lock(THD *thd, int lock_type)
{
DBUG_ENTER("ha_maria::external_lock");
+ file->external_ref= (void*) table; // For ma_killed()
/*
We don't test now_transactional because it may vary between lock/unlock
and thus confuse our reference counting.
@@ -2487,8 +2623,6 @@ int ha_maria::external_lock(THD *thd, int lock_type)
/* Transactional table */
if (lock_type != F_UNLCK)
{
- file->external_ptr= thd; // For maria_register_trn()
-
if (!file->s->lock_key_trees) // If we don't use versioning
{
/*
@@ -2567,7 +2701,7 @@ int ha_maria::external_lock(THD *thd, int lock_type)
changes to commit (rollback shouldn't be tested).
*/
DBUG_ASSERT(!thd->main_da.is_sent ||
- thd->killed == THD::KILL_CONNECTION);
+ thd->killed == KILL_CONNECTION);
/* autocommit ? rollback a transaction */
#ifdef MARIA_CANNOT_ROLLBACK
if (ma_commit(trn))
@@ -3120,6 +3254,14 @@ bool maria_flush_logs(handlerton *hton)
}
+int maria_checkpoint_state(handlerton *hton, bool disabled)
+{
+ maria_checkpoint_disabled= (my_bool) disabled;
+ return 0;
+}
+
+
+
#define SHOW_MSG_LEN (FN_REFLEN + 20)
/**
@brief show status handler
@@ -3299,7 +3441,6 @@ bool ha_maria::is_changed() const
static int ha_maria_init(void *p)
{
int res;
- copy_variable_aliases();
const char *log_dir= maria_data_root;
maria_hton= (handlerton *)p;
maria_hton->state= SHOW_OPTION_YES;
@@ -3308,6 +3449,7 @@ static int ha_maria_init(void *p)
maria_hton->panic= maria_hton_panic;
maria_hton->commit= maria_commit;
maria_hton->rollback= maria_rollback;
+ maria_hton->checkpoint_state= maria_checkpoint_state;
#ifdef MARIA_CANNOT_ROLLBACK
maria_hton->commit= 0;
#endif
@@ -3344,6 +3486,9 @@ static int ha_maria_init(void *p)
#endif
if (res)
maria_hton= 0;
+
+ ma_killed= ma_killed_in_mariadb;
+
return res ? HA_ERR_INITIALIZATION : 0;
}
@@ -3578,11 +3723,73 @@ static struct st_mysql_show_var aria_status_variables[]= {
{NullS, NullS, SHOW_LONG}
};
+/****************************************************************************
+ * Maria MRR implementation: use DS-MRR
+ ***************************************************************************/
+
+int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
+{
+ return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_maria::multi_range_read_next(range_id_t *range_info)
+{
+ return ds_mrr.dsmrr_next(range_info);
+}
+
+ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost)
+{
+ /*
+ This call is here because there is no location where this->table would
+ already be known.
+ TODO: consider moving it into some per-query initialization call.
+ */
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
+ flags, cost);
+}
+
+ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
+{
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
+}
+
+int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
+/* MyISAM MRR implementation ends */
+
+
+/* Index condition pushdown implementation*/
+
+
+Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
+{
+ pushed_idx_cond_keyno= keyno_arg;
+ pushed_idx_cond= idx_cond_arg;
+ in_range_check_pushed_down= TRUE;
+ if (active_index == pushed_idx_cond_keyno)
+ ma_set_index_cond_func(file, index_cond_func_maria, this);
+ return NULL;
+}
+
+
+
+
struct st_mysql_storage_engine maria_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
maria_declare_plugin(aria)
-compat_aliases,
{
MYSQL_STORAGE_ENGINE_PLUGIN,
&maria_storage_engine,
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
index c7af191c1e7..0ba134a5b45 100644
--- a/storage/maria/ha_maria.h
+++ b/storage/maria/ha_maria.h
@@ -13,6 +13,8 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+#ifndef HA_MARIA_INCLUDED
+#define HA_MARIA_INCLUDED
#ifdef USE_PRAGMA_INTERFACE
#pragma interface /* gcc class implementation */
@@ -22,6 +24,16 @@
#include <maria.h>
+#define HA_RECOVER_NONE 0 /* No automatic recover */
+#define HA_RECOVER_DEFAULT 1 /* Automatic recover active */
+#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */
+#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */
+#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */
+
+C_MODE_START
+ICP_RESULT index_cond_func_maria(void *arg);
+C_MODE_END
+
extern ulong maria_sort_buffer_size;
extern TYPELIB maria_recover_typelib;
extern ulong maria_recover_options;
@@ -56,7 +68,7 @@ public:
{
return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ?
0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
- HA_READ_ORDER | HA_KEYREAD_ONLY);
+ HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
}
uint max_supported_keys() const
{ return MARIA_MAX_KEY; }
@@ -98,6 +110,8 @@ public:
key->charset(), table->record[0]);
}
int ft_read(uchar * buf);
+ int index_init(uint idx, bool sorted);
+ int index_end();
int rnd_init(bool scan);
int rnd_end(void);
int rnd_next(uchar * buf);
@@ -159,4 +173,26 @@ public:
return file;
}
static int implicit_commit(THD *thd, bool new_trn);
+ /**
+ * Multi Range Read interface
+ */
+ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ int multi_range_read_next(range_id_t *range_info);
+ ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
+
+ /* Index condition pushdown implementation */
+ Item *idx_cond_push(uint keyno, Item* idx_cond);
+private:
+ DsMrr_impl ds_mrr;
+ friend ICP_RESULT index_cond_func_maria(void *arg);
};
+
+#endif /* HA_MARIA_INCLUDED */
diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c
index d6d4dcd44e6..459ee5d8ae2 100644
--- a/storage/maria/lockman.c
+++ b/storage/maria/lockman.c
@@ -690,12 +690,12 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo,
}
/* yuck. waiting */
- deadline= my_getsystime() + lm->lock_timeout * 10000;
- set_timespec_nsec(timeout,lm->lock_timeout * 1000000);
+ deadline= my_hrtime().val*1000 + lm->lock_timeout * 1000000;
+ set_timespec_time_nsec(timeout, deadline);
do
{
pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout);
- } while (!DELETED(blocker->link) && my_getsystime() < deadline);
+ } while (!DELETED(blocker->link) && my_hrtime().val < deadline/1000);
pthread_mutex_unlock(wait_for_lo->mutex);
lf_rwlock_by_pins(pins);
if (!DELETED(blocker->link))
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
index f4470f846f5..72c01fc9995 100644
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@@ -125,6 +125,7 @@ void maria_chk_init(HA_CHECK *param)
param->max_record_length= LONGLONG_MAX;
param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ param->max_stage= 1;
}
@@ -530,6 +531,7 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
continue;
}
found_keys++;
+ _ma_report_progress(param, key, share->base.keys);
param->record_checksum=init_checksum;
@@ -1013,10 +1015,12 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
/* fall through */
}
if ((share->data_file_type != BLOCK_RECORD &&
+ share->data_file_type != NO_RECORD &&
record >= share->state.state.data_file_length) ||
(share->data_file_type == BLOCK_RECORD &&
ma_recordpos_to_page(record) * share->base.min_block_length >=
- share->state.state.data_file_length))
+ share->state.state.data_file_length) ||
+ (share->data_file_type == NO_RECORD && record != 0))
{
#ifndef DBUG_OFF
char llbuff2[22], llbuff3[22];
@@ -1134,10 +1138,14 @@ static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
param->tmp_record_checksum+= (ha_checksum) start_recpos;
param->records++;
- if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0)
+ if (param->records % WRITE_COUNT == 0)
{
- printf("%s\r", llstr(param->records, llbuff));
- VOID(fflush(stdout));
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ printf("%s\r", llstr(param->records, llbuff));
+ VOID(fflush(stdout));
+ }
+ _ma_report_progress(param, param->records, share->state.state.records);
}
/* Check if keys match the record */
@@ -2086,6 +2094,12 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
case COMPRESSED_RECORD:
error= check_compressed_record(param, info, extend, record);
break;
+ case NO_RECORD:
+ param->records= share->state.state.records;
+ param->record_checksum= 0;
+ extend= 1; /* No row checksums */
+ /* no data, nothing to do */
+ break;
} /* switch */
info->in_check_table= 0;
@@ -2323,6 +2337,13 @@ static int initialize_variables_for_repair(HA_CHECK *param,
{
MARIA_SHARE *share= info->s;
+ if (share->data_file_type == NO_RECORD)
+ {
+ _ma_check_print_error(param,
+ "Can't repair tables with record type NO_DATA");
+ return 1;
+ }
+
/* Make a copy to allow us to restore state and check how state changed */
memcpy(org_share, share, sizeof(*share));
@@ -2369,6 +2390,7 @@ static int initialize_variables_for_repair(HA_CHECK *param,
/* calculate max_records */
sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
+ param->max_progress= sort_info->filelength;
if ((param->testflag & T_CREATE_MISSING_KEYS) ||
sort_info->org_data_file_type == COMPRESSED_RECORD)
sort_info->max_records= share->state.state.records;
@@ -2391,6 +2413,8 @@ static int initialize_variables_for_repair(HA_CHECK *param,
maria_ignore_trids(info);
/* Don't write transid's during repair */
maria_versioning(info, 0);
+ /* remember original number of rows */
+ *info->state= info->s->state.state;
return 0;
}
@@ -3623,7 +3647,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
const char * name, my_bool rep_quick)
{
int got_error;
- uint i;
+ uint i, keys_to_repair;
ha_rows start_records;
my_off_t new_header_length, org_header_length, del;
File new_file;
@@ -3749,6 +3773,17 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
del=share->state.state.del;
+ /* Calculate number of keys to repair */
+ keys_to_repair= 0;
+ for (sort_param.key=0 ; sort_param.key < share->base.keys ;
+ sort_param.key++)
+ {
+ if (maria_is_key_active(key_map, sort_param.key))
+ keys_to_repair++;
+ }
+ /* For each key we scan and merge sort the keys */
+ param->max_stage= keys_to_repair*2;
+
rec_per_key_part= param->new_rec_per_key_part;
for (sort_param.key=0 ; sort_param.key < share->base.keys ;
rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
@@ -3869,6 +3904,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
/* Set for next loop */
sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
+ param->stage++; /* Next stage */
+ param->progress= 0;
+
if (param->testflag & T_STATISTICS)
maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
sort_param.unique,
@@ -3949,6 +3987,10 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
sort_info.org_data_file_type= share->data_file_type;
sort_info.filelength= share->state.state.data_file_length;
sort_param.fix_datafile=0;
+
+ /* Offsets are now in proportion to the new file length */
+ param->max_progress= sort_info.filelength;
+
}
else
share->state.state.data_file_length=sort_param.max_pos;
@@ -4749,6 +4791,11 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
if (_ma_killed_ptr(param))
DBUG_RETURN(1);
+ if (param->progress_counter++ >= WRITE_COUNT)
+ {
+ param->progress_counter= 0;
+ _ma_report_progress(param, param->progress, param->max_progress);
+ }
switch (sort_info->org_data_file_type) {
case BLOCK_RECORD:
@@ -4789,6 +4836,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
flag= HA_ERR_ROW_NOT_VISIBLE;
}
}
+ param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)*
+ share->block_size);
+
share->page_type= save_page_type;
if (!flag)
{
@@ -4841,6 +4891,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
DBUG_RETURN(-1);
}
sort_param->start_recpos=sort_param->pos;
+ param->progress= sort_param->pos;
if (!sort_param->fix_datafile)
{
sort_param->current_filepos= sort_param->pos;
@@ -4868,6 +4919,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
LINT_INIT(to);
pos=sort_param->pos;
+ param->progress= pos;
searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
for (;;)
@@ -5177,6 +5229,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
}
}
case COMPRESSED_RECORD:
+ param->progress= sort_param->pos;
for (searching=0 ;; searching=1, sort_param->pos++)
{
if (_ma_read_cache(info, &sort_param->read_cache, block_info.header,
@@ -5251,8 +5304,10 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
}
DBUG_RETURN(0);
}
+ case NO_RECORD:
+ DBUG_RETURN(1); /* Impossible */
}
- DBUG_RETURN(1); /* Impossible */
+ DBUG_RETURN(1); /* Impossible */
}
@@ -5375,6 +5430,8 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
sort_param->filepos+=reclength+length;
share->state.split++;
break;
+ case NO_RECORD:
+ DBUG_RETURN(1); /* Impossible */
}
}
if (sort_param->master)
diff --git a/storage/maria/ma_check_standalone.h b/storage/maria/ma_check_standalone.h
index 8cda285bb99..f9242bbe123 100644
--- a/storage/maria/ma_check_standalone.h
+++ b/storage/maria/ma_check_standalone.h
@@ -35,6 +35,13 @@ int _ma_killed_ptr(HA_CHECK *param __attribute__((unused)))
return 0;
}
+
+void _ma_report_progress(HA_CHECK *param __attribute__((unused)),
+ ulonglong progress __attribute__((unused)),
+ ulonglong max_progress __attribute__((unused)))
+{
+}
+
/* print warnings and errors */
/* VARARGS */
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
index 6576c365a47..602e5da3065 100644
--- a/storage/maria/ma_checkpoint.c
+++ b/storage/maria/ma_checkpoint.c
@@ -580,51 +580,59 @@ pthread_handler_t ma_checkpoint_background(void *arg)
{
case 0:
{
- TRANSLOG_ADDRESS horizon= translog_get_horizon();
+ /* If checkpoints are disabled, wait 1 second and try again */
+ if (maria_checkpoint_disabled)
+ {
+ sleep_time= 1;
+ break;
+ }
+ {
+ TRANSLOG_ADDRESS horizon= translog_get_horizon();
- /*
- With background flushing evenly distributed over the time
- between two checkpoints, we should have only little flushing to do
- in the checkpoint.
- */
- /*
- No checkpoint if little work of interest for recovery was done
- since last checkpoint. Such work includes log writing (lengthens
- recovery, checkpoint would shorten it), page flushing (checkpoint
- would decrease the amount of read pages in recovery).
- In case of one short statement per minute (very low load), we don't
- want to checkpoint every minute, hence the positive
- maria_checkpoint_min_activity.
- */
- if ((ulonglong) (horizon - log_horizon_at_last_checkpoint) <=
- maria_checkpoint_min_log_activity &&
- ((ulonglong) (maria_pagecache->global_cache_write -
- pagecache_flushes_at_last_checkpoint) *
+ /*
+ With background flushing evenly distributed over the time
+ between two checkpoints, we should have only little flushing to do
+ in the checkpoint.
+ */
+ /*
+ No checkpoint if little work of interest for recovery was done
+ since last checkpoint. Such work includes log writing (lengthens
+ recovery, checkpoint would shorten it), page flushing (checkpoint
+ would decrease the amount of read pages in recovery).
+ In case of one short statement per minute (very low load), we don't
+ want to checkpoint every minute, hence the positive
+ maria_checkpoint_min_activity.
+ */
+ if ((ulonglong) (horizon - log_horizon_at_last_checkpoint) <=
+ maria_checkpoint_min_log_activity &&
+ ((ulonglong) (maria_pagecache->global_cache_write -
+ pagecache_flushes_at_last_checkpoint) *
maria_pagecache->block_size) <=
- maria_checkpoint_min_cache_activity)
- {
+ maria_checkpoint_min_cache_activity)
+ {
+ /*
+ Not enough has happend since last checkpoint.
+ Sleep for a while and try again later
+ */
+ sleep_time= interval;
+ break;
+ }
+ sleep_time= 1;
+ ma_checkpoint_execute(CHECKPOINT_MEDIUM, TRUE);
/*
- Not enough has happend since last checkpoint.
- Sleep for a while and try again later
+ Snapshot this kind of "state" of the engine. Note that the value
+ below is possibly greater than last_checkpoint_lsn.
+ */
+ log_horizon_at_last_checkpoint= translog_get_horizon();
+ pagecache_flushes_at_last_checkpoint=
+ maria_pagecache->global_cache_write;
+ /*
+ If the checkpoint above succeeded it has set d|kfiles and
+ d|kfiles_end. If is has failed, it has set
+ pages_to_flush_before_next_checkpoint to 0 so we will skip flushing
+ and sleep until the next checkpoint.
*/
- sleep_time= interval;
- break;
}
- sleep_time= 1;
- ma_checkpoint_execute(CHECKPOINT_MEDIUM, TRUE);
- /*
- Snapshot this kind of "state" of the engine. Note that the value below
- is possibly greater than last_checkpoint_lsn.
- */
- log_horizon_at_last_checkpoint= translog_get_horizon();
- pagecache_flushes_at_last_checkpoint=
- maria_pagecache->global_cache_write;
- /*
- If the checkpoint above succeeded it has set d|kfiles and
- d|kfiles_end. If is has failed, it has set
- pages_to_flush_before_next_checkpoint to 0 so we will skip flushing
- and sleep until the next checkpoint.
- */
break;
}
case 1:
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
index 54ebcdc709e..ea9671d4b07 100644
--- a/storage/maria/ma_create.c
+++ b/storage/maria/ma_create.c
@@ -251,10 +251,16 @@ int maria_create(const char *name, enum data_file_type datafile_type,
datafile_type= BLOCK_RECORD;
}
+ if (datafile_type == NO_RECORD && uniques)
+ {
+ /* Can't do unique without data, revert to block records */
+ datafile_type= BLOCK_RECORD;
+ }
+
if (datafile_type == DYNAMIC_RECORD)
options|= HA_OPTION_PACK_RECORD; /* Must use packed records */
- if (datafile_type == STATIC_RECORD)
+ if (datafile_type == STATIC_RECORD || datafile_type == NO_RECORD)
{
/* We can't use checksum with static length rows */
flags&= ~HA_CREATE_CHECKSUM;
@@ -375,7 +381,9 @@ int maria_create(const char *name, enum data_file_type datafile_type,
}
else
{
- if (datafile_type != STATIC_RECORD)
+ if (datafile_type == NO_RECORD)
+ pointer= 0;
+ else if (datafile_type != STATIC_RECORD)
pointer= maria_get_pointer_length(ci->data_file_length,
maria_data_pointer_size);
else
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
index d2e011fcf24..183f74835b2 100644
--- a/storage/maria/ma_extra.c
+++ b/storage/maria/ma_extra.c
@@ -415,9 +415,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
if (!share->temporary)
error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
FLUSH_KEEP, FLUSH_KEEP);
-#ifdef HAVE_PREAD
+
_ma_decrement_open_count(info, 1);
-#endif
if (share->not_flushed)
{
share->not_flushed= 0;
@@ -490,6 +489,14 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
} /* maria_extra */
+void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func,
+ void *func_arg)
+{
+ info->index_cond_func= func;
+ info->index_cond_func_arg= func_arg;
+}
+
+
/*
Start/Stop Inserting Duplicates Into a Table, WL#1648.
*/
@@ -644,3 +651,10 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
_ma_set_fatal_error(info->s, HA_ERR_CRASHED);
DBUG_RETURN(1);
}
+
+
+my_bool ma_killed_standalone(MARIA_HA *info __attribute__((unused)))
+{
+ return 0;
+}
+
diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c
index 35a34d32713..13c3a24f362 100644
--- a/storage/maria/ma_ft_boolean_search.c
+++ b/storage/maria/ma_ft_boolean_search.c
@@ -475,14 +475,15 @@ static void _ftb_init_index_search(FT_INFO *ftb)
int i;
FTB_WORD *ftbw;
- if ((ftb->state != READY && ftb->state !=INDEX_DONE) ||
- ftb->keynr == NO_SUCH_KEY)
+ if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
return;
ftb->state=INDEX_SEARCH;
- for (i=ftb->queue.elements; i; i--)
+ for (i= queue_last_element(&ftb->queue);
+ (int) i >= (int) queue_first_element(&ftb->queue);
+ i--)
{
- ftbw=(FTB_WORD *)(ftb->queue.root[i]);
+ ftbw=(FTB_WORD *)(queue_element(&ftb->queue, i));
if (ftbw->flags & FTB_FLAG_TRUNC)
{
@@ -587,7 +588,7 @@ FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr,
sizeof(void *))))
goto err;
reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
- (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0);
+ (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0, 0, 0);
for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
queue_insert(&ftb->queue, (uchar *)ftbw);
ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
@@ -830,7 +831,7 @@ int maria_ft_boolean_read_next(FT_INFO *ftb, char *record)
/* update queue */
_ft2_search(ftb, ftbw, 0);
- queue_replaced(& ftb->queue);
+ queue_replace_top(&ftb->queue);
}
ftbe=ftb->root;
diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c
index 927f34f8b72..3bb7defcaaf 100644
--- a/storage/maria/ma_ft_nlq_search.c
+++ b/storage/maria/ma_ft_nlq_search.c
@@ -253,12 +253,12 @@ FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, uchar *query,
{
QUEUE best;
init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
- 0);
+ 0, 0, 0);
tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
&best, left_root_right);
while (best.elements)
{
- my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
+ my_off_t docid= ((FT_DOC *)queue_remove_top(&best))->dpos;
if (!(*info->read_record)(info, record, docid))
{
info->update|= HA_STATE_AKTIV;
diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c
index 207ae9a8bda..1e60e6a8c06 100644
--- a/storage/maria/ma_key.c
+++ b/storage/maria/ma_key.c
@@ -26,7 +26,8 @@
#define CHECK_KEYS /* Enable safety checks */
-static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,uchar *record);
+static int _ma_put_key_in_record(MARIA_HA *info, uint keynr,
+ my_bool unpack_blobs, uchar *record);
#define FIX_LENGTH(cs, pos, length, char_length) \
do { \
@@ -476,6 +477,9 @@ void _ma_copy_key(MARIA_KEY *to, const MARIA_KEY *from)
_ma_put_key_in_record()
info MARIA handler
keynr Key number that was used
+ unpack_blobs TRUE <=> Unpack blob columns
+ FALSE <=> Skip them. This is used by index condition
+ pushdown check function
record Store key here
Last read key is in info->lastkey
@@ -489,7 +493,7 @@ void _ma_copy_key(MARIA_KEY *to, const MARIA_KEY *from)
*/
static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr,
- uchar *record)
+ my_bool unpack_blobs, uchar *record)
{
reg2 uchar *key;
uchar *pos,*key_end;
@@ -582,16 +586,19 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr,
if (length > keyseg->length || key+length > key_end)
goto err;
#endif
- memcpy(record+keyseg->start+keyseg->bit_start,
- (char*) &blob_ptr,sizeof(char*));
- memcpy(blob_ptr,key,length);
- blob_ptr+=length;
+ if (unpack_blobs)
+ {
+ memcpy(record+keyseg->start+keyseg->bit_start,
+ (char*) &blob_ptr,sizeof(char*));
+ memcpy(blob_ptr,key,length);
+ blob_ptr+=length;
- /* The above changed info->lastkey2. Inform maria_rnext_same(). */
- info->update&= ~HA_STATE_RNEXT_SAME;
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
- _ma_store_blob_length(record+keyseg->start,
- (uint) keyseg->bit_start,length);
+ _ma_store_blob_length(record+keyseg->start,
+ (uint) keyseg->bit_start,length);
+ }
key+=length;
}
else if (keyseg->flag & HA_SWAP_KEY)
@@ -621,6 +628,7 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr,
DBUG_RETURN(0);
err:
+ DBUG_PRINT("info",("error"));
DBUG_RETURN(1); /* Crashed row */
} /* _ma_put_key_in_record */
@@ -634,7 +642,7 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
{
if (info->lastinx >= 0)
{ /* Read only key */
- if (_ma_put_key_in_record(info,(uint) info->lastinx,buf))
+ if (_ma_put_key_in_record(info, (uint)info->lastinx, TRUE, buf))
{
_ma_set_fatal_error(info->s, HA_ERR_CRASHED);
return -1;
@@ -648,6 +656,54 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
}
+
+/*
+ Save current key tuple to record and call index condition check function
+
+ SYNOPSIS
+ ma_check_index_cond()
+ info MyISAM handler
+ keynr Index we're running a scan on
+ record Record buffer to use (it is assumed that index check function
+ will look for column values there)
+
+ RETURN
+ ICP_ERROR Error ; my_errno set to HA_ERR_CRASHED
+ ICP_NO_MATCH Index condition is not satisfied, continue scanning
+ ICP_MATCH Index condition is satisfied
+ ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan.
+ my_errno set to HA_ERR_END_OF_FILE
+
+ info->cur_row.lastpos is set to HA_OFFSET_ERROR in case of ICP_ERROR or
+ ICP_OUT_OF_RANGE to indicate that we don't have any active row.
+*/
+
+ICP_RESULT ma_check_index_cond(register MARIA_HA *info, uint keynr,
+ uchar *record)
+{
+ ICP_RESULT res= ICP_MATCH;
+ if (info->index_cond_func)
+ {
+ if (_ma_put_key_in_record(info, keynr, FALSE, record))
+ {
+ /* Impossible case; Can only happen if bug in code */
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_CRASHED;
+ res= ICP_ERROR;
+ }
+ else if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
+ ICP_OUT_OF_RANGE)
+ {
+ /* We got beyond the end of scanned range */
+ info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_END_OF_FILE;
+ }
+ }
+ return res;
+}
+
+
/*
Retrieve auto_increment info
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index a01e0949b68..cdb26716e3f 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -458,7 +458,9 @@ void translog_lock_handler_assert_owner()
@param num how many records should be filled
*/
-static void check_translog_description_table(int num)
+static uint max_allowed_translog_type= 0;
+
+void check_translog_description_table(int num)
{
int i;
DBUG_ENTER("check_translog_description_table");
@@ -467,6 +469,7 @@ static void check_translog_description_table(int num)
/* last is reserved for extending the table */
DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
+ max_allowed_translog_type= num;
for (i= 0; i <= num; i++)
{
@@ -1078,7 +1081,7 @@ static my_bool translog_write_file_header()
memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
page+= sizeof(maria_trans_file_magic);
/* timestamp */
- timestamp= my_getsystime();
+ timestamp= my_hrtime().val;
int8store(page, timestamp);
page+= 8;
/* maria version */
@@ -3583,6 +3586,7 @@ my_bool translog_init_with_table(const char *directory,
log_descriptor.flush_no= 0;
log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
+ /* Normally in Aria this this calls translog_table_init() */
(*init_table_func)();
compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >=
TRANSLOG_BUFFERS_NO);
@@ -6224,6 +6228,8 @@ my_bool translog_write_record(LSN *lsn,
(uint) short_trid, (ulong) rec_len));
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
translog_status == TRANSLOG_READONLY);
+ DBUG_ASSERT(type != 0);
+ DBUG_ASSERT((uint)type <= max_allowed_translog_type);
if (unlikely(translog_status != TRANSLOG_OK))
{
DBUG_PRINT("error", ("Transaction log is write protected"));
@@ -6322,9 +6328,9 @@ my_bool translog_write_record(LSN *lsn,
/* process this parts */
if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
- (*log_record_type_descriptor[type].prewrite_hook) (type, trn,
- tbl_info,
- hook_arg))))
+ (*log_record_type_descriptor[type].prewrite_hook)(type, trn,
+ tbl_info,
+ hook_arg))))
{
switch (log_record_type_descriptor[type].rclass) {
case LOGRECTYPE_VARIABLE_LENGTH:
@@ -6337,6 +6343,7 @@ my_bool translog_write_record(LSN *lsn,
short_trid, &parts, trn, hook_arg);
break;
case LOGRECTYPE_NOT_ALLOWED:
+ DBUG_ASSERT(0);
default:
DBUG_ASSERT(0);
rc= 1;
@@ -7712,7 +7719,7 @@ static my_bool translog_sync_files(uint32 min, uint32 max,
flush_interval= group_commit_wait;
if (flush_interval)
- flush_start= my_micro_time();
+ flush_start= microsecond_interval_timer();
for (fn= min; fn <= max; fn++)
{
TRANSLOG_FILE *file= get_logfile_by_number(fn);
@@ -7979,7 +7986,8 @@ retest:
/*
We do not check time here because pthread_mutex_lock rarely takes
a lot of time so we can sacrifice a bit precision to performance
- (taking into account that my_micro_time() might be expensive call).
+ (taking into account that microsecond_interval_timer() might be
+ expensive call).
*/
if (flush_interval == 0)
break; /* flush pass is ended */
@@ -7988,7 +7996,8 @@ retest:
if (log_descriptor.next_pass_max_lsn == LSN_IMPOSSIBLE)
{
if (flush_interval == 0 ||
- (time_spent= (my_micro_time() - flush_start)) >= flush_interval)
+ (time_spent= (microsecond_interval_timer() - flush_start)) >=
+ flush_interval)
{
pthread_mutex_unlock(&log_descriptor.log_flush_lock);
break;
@@ -8780,7 +8789,7 @@ ma_soft_sync_background( void *arg __attribute__((unused)))
DBUG_ENTER("ma_soft_sync_background");
for(;;)
{
- ulonglong prev_loop= my_micro_time();
+ ulonglong prev_loop= microsecond_interval_timer();
ulonglong time, sleep;
uint32 min, max, sync_request;
min= soft_sync_min;
@@ -8792,7 +8801,7 @@ ma_soft_sync_background( void *arg __attribute__((unused)))
sleep= group_commit_wait;
if (sync_request)
translog_sync_files(min, max, FALSE);
- time= my_micro_time() - prev_loop;
+ time= microsecond_interval_timer() - prev_loop;
if (time > sleep)
sleep= 0;
else
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
index f33e92e9771..5ac6d67413a 100644
--- a/storage/maria/ma_loghandler.h
+++ b/storage/maria/ma_loghandler.h
@@ -312,6 +312,9 @@ extern my_bool translog_init_with_table(const char *directory,
my_bool readonly,
void (*init_table_func)(),
my_bool no_error);
+#ifndef DBUG_OFF
+void check_translog_description_table(int num);
+#endif
extern my_bool
translog_write_record(LSN *lsn, enum translog_record_type type, TRN *trn,
diff --git a/storage/maria/ma_norec.c b/storage/maria/ma_norec.c
new file mode 100644
index 00000000000..6d4f37e34fd
--- /dev/null
+++ b/storage/maria/ma_norec.c
@@ -0,0 +1,66 @@
+/* Copyright (C) 2010 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Functions to handle tables with no row data (only index)
+ This is useful when you just want to do key reads or want to use
+ the index to check against duplicates.
+*/
+
+#include "maria_def.h"
+
+my_bool _ma_write_no_record(MARIA_HA *info __attribute__((unused)),
+ const uchar *record __attribute__((unused)))
+{
+ return 0;
+}
+
+my_bool _ma_update_no_record(MARIA_HA *info __attribute__((unused)),
+ MARIA_RECORD_POS pos __attribute__((unused)),
+ const uchar *oldrec __attribute__((unused)),
+ const uchar *record __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+
+my_bool _ma_delete_no_record(MARIA_HA *info __attribute__((unused)),
+ const uchar *record __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+
+int _ma_read_no_record(MARIA_HA *info __attribute__((unused)),
+ uchar *record __attribute__((unused)),
+ MARIA_RECORD_POS pos __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+
+int _ma_read_rnd_no_record(MARIA_HA *info __attribute__((unused)),
+ uchar *buf __attribute__((unused)),
+ MARIA_RECORD_POS filepos __attribute__((unused)),
+ my_bool skip_deleted_blocks __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share __attribute__ ((unused)),
+ my_off_t pos __attribute__ ((unused)))
+{
+ return 0;
+}
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
index 498904f89a7..c9dc20ce78b 100644
--- a/storage/maria/ma_open.c
+++ b/storage/maria/ma_open.c
@@ -205,6 +205,9 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
#ifdef THREAD
thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
#endif
+ if (share->options & HA_OPTION_TMP_TABLE)
+ m_info->lock.type= TL_WRITE;
+
m_info->open_list.data=(void*) m_info;
maria_open_list=list_add(maria_open_list,&m_info->open_list);
@@ -491,6 +494,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
(uint) share->base.block_size,
(uint) maria_block_size));
my_errno=HA_ERR_UNSUPPORTED;
+ my_printf_error(my_errno, "Wrong block size %u; Expected %u",
+ MYF(0),
+ (uint) share->base.block_size,
+ (uint) maria_block_size);
goto err;
}
@@ -786,6 +793,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->blobs[j].offset= share->columndef[i].offset;
j++;
}
+ if (share->columndef[i].type == FIELD_VARCHAR)
+ share->has_varchar_fields= 1;
+ if (share->columndef[i].null_bit)
+ share->has_null_fields= 1;
}
share->columndef[i].type= FIELD_LAST; /* End marker */
disk_pos= _ma_column_nr_read(disk_pos, share->column_nr,
@@ -968,6 +979,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->state.changed));
pthread_mutex_unlock(&THR_LOCK_maria);
+
+ m_info->open_flags= open_flags;
DBUG_RETURN(m_info);
err:
@@ -1109,6 +1122,20 @@ void _ma_setup_functions(register MARIA_SHARE *share)
else
share->calc_checksum= _ma_checksum;
break;
+ case NO_RECORD:
+ share->read_record= _ma_read_no_record;
+ share->scan= _ma_read_rnd_no_record;
+ share->delete_record= _ma_delete_no_record;
+ share->update_record= _ma_update_no_record;
+ share->write_record= _ma_write_no_record;
+ share->recpos_to_keypos= _ma_no_keypos_to_recpos;
+ share->keypos_to_recpos= _ma_no_keypos_to_recpos;
+
+ /* Abort if following functions are called */
+ share->compare_record= 0;
+ share->compare_unique= 0;
+ share->calc_checksum= 0;
+ break;
case BLOCK_RECORD:
share->once_init= _ma_once_init_block_record;
share->once_end= _ma_once_end_block_record;
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index 96504cfba2d..33112a1ec5e 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -344,7 +344,7 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
}
}
- now= my_getsystime();
+ now= microsecond_interval_timer();
in_redo_phase= TRUE;
if (run_redo_phase(from_lsn, end_lsn, apply))
{
@@ -374,10 +374,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
in_redo_phase= FALSE;
old_now= now;
- now= my_getsystime();
+ now= microsecond_interval_timer();
if (recovery_message_printed == REC_MSG_REDO)
{
- double phase_took= (now - old_now)/10000000.0;
+ double phase_took= (now - old_now)/1000000.0;
/*
Detailed progress info goes to stderr, because ma_message_no_user()
cannot put several messages on one line.
@@ -443,10 +443,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
}
old_now= now;
- now= my_getsystime();
+ now= microsecond_interval_timer();
if (recovery_message_printed == REC_MSG_UNDO)
{
- double phase_took= (now - old_now)/10000000.0;
+ double phase_took= (now - old_now)/1000000.0;
procent_printed= 1;
fprintf(stderr, " (%.1f seconds); ", phase_took);
fflush(stderr);
@@ -463,10 +463,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
}
old_now= now;
- now= my_getsystime();
+ now= microsecond_interval_timer();
if (recovery_message_printed == REC_MSG_FLUSH)
{
- double phase_took= (now - old_now)/10000000.0;
+ double phase_took= (now - old_now)/1000000.0;
procent_printed= 1;
fprintf(stderr, " (%.1f seconds); ", phase_took);
fflush(stderr);
diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c
index 976e09ece99..1ece9e0049e 100644
--- a/storage/maria/ma_rkey.c
+++ b/storage/maria/ma_rkey.c
@@ -34,6 +34,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
HA_KEYSEG *last_used_keyseg;
uint32 nextflag;
MARIA_KEY key;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("maria_rkey");
DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
(long) info, (long) buf, inx, search_flag));
@@ -82,10 +83,11 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
rw_rdlock(&keyinfo->root_lock);
nextflag= maria_read_vec[search_flag] | key.flag;
- if (search_flag != HA_READ_KEY_EXACT ||
- ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME))
+ if (search_flag != HA_READ_KEY_EXACT)
+ {
+ /* Assume we will get a read next/previous call after this one */
nextflag|= SEARCH_SAVE_BUFF;
-
+ }
switch (keyinfo->key_alg) {
#ifdef HAVE_RTREE_KEYS
case HA_KEY_ALG_RTREE:
@@ -101,16 +103,18 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
if (!_ma_search(info, &key, nextflag, info->s->state.key_root[inx]))
{
MARIA_KEY lastkey;
- lastkey.keyinfo= keyinfo;
- lastkey.data= info->lastkey_buff;
/*
Found a key, but it might not be usable. We cannot use rows that
are inserted by other threads after we got our table lock
("concurrent inserts"). The record may not even be present yet.
Keys are inserted into the index(es) before the record is
- inserted into the data file.
+ inserted into the data file.
+
+ If index condition is present, it must be either satisfied or
+ not satisfied with an out-of-range condition.
*/
- if ((*share->row_is_visible)(info))
+ if ((*share->row_is_visible)(info) &&
+ ((icp_res= ma_check_index_cond(info, inx, buf)) != ICP_NO_MATCH))
break;
/* The key references a concurrently inserted record. */
@@ -122,7 +126,9 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
info->cur_row.lastpos= HA_OFFSET_ERROR;
break;
}
-
+
+ lastkey.keyinfo= keyinfo;
+ lastkey.data= info->lastkey_buff;
do
{
uint not_used[2];
@@ -138,6 +144,18 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
if (_ma_search_next(info, &lastkey, maria_readnext_vec[search_flag],
info->s->state.key_root[inx]))
break; /* purecov: inspected */
+
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ ma_yield_and_check_if_killed(info, inx))
+ {
+ DBUG_ASSERT(info->cur_row.lastpos == HA_OFFSET_ERROR);
+ break;
+ }
+
/*
Check that the found key does still match the search.
_ma_search_next() delivers the next key regardless of its
@@ -153,7 +171,13 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
break;
/* purecov: end */
}
- } while (!(*share->row_is_visible)(info));
+
+ } while (!(*share->row_is_visible)(info) ||
+ ((icp_res= ma_check_index_cond(info, inx, buf)) == 0));
+ }
+ else
+ {
+ DBUG_ASSERT(info->cur_row.lastpos);
}
}
if (share->lock_key_trees)
@@ -161,10 +185,15 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
if (info->cur_row.lastpos == HA_OFFSET_ERROR)
{
+ if (icp_res == ICP_OUT_OF_RANGE)
+ {
+ /* We don't want HA_ERR_END_OF_FILE in this particular case */
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ }
fast_ma_writeinfo(info);
goto err;
}
-
+
/* Calculate length of the found key; Used by maria_rnext_same */
if ((keyinfo->flag & HA_VAR_LENGTH_KEY))
info->last_rkey_length= _ma_keylength_part(keyinfo, info->lastkey_buff,
@@ -200,3 +229,37 @@ err:
info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */
DBUG_RETURN(my_errno);
} /* _ma_rkey */
+
+
+/*
+ Yield to possible other writers during a index scan.
+ Check also if we got killed by the user and if yes, return
+ HA_ERR_LOCK_WAIT_TIMEOUT
+
+ return 0 ok
+ return 1 Query has been requested to be killed
+*/
+
+my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx)
+{
+ MARIA_SHARE *share;
+ if (ma_killed(info))
+ {
+ /* purecov: begin tested */
+ /* Mark that we don't have an active row */
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ /* Set error that we where aborted by kill from application */
+ my_errno= HA_ERR_ABORTED_BY_USER;
+ return 1;
+ /* purecov: end */
+ }
+
+ if ((share= info->s)->lock_key_trees)
+ {
+ /* Give writers a chance to access index */
+ rw_unlock(&share->keyinfo[inx].root_lock);
+ rw_rdlock(&share->keyinfo[inx].root_lock);
+ }
+ return 0;
+}
+
diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c
index c49bbb19e83..e5ee84b61c7 100644
--- a/storage/maria/ma_rnext.c
+++ b/storage/maria/ma_rnext.c
@@ -30,6 +30,7 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
uint flag;
MARIA_SHARE *share= info->s;
MARIA_KEYDEF *keyinfo;
+ ICP_RESULT icp_res= ICP_MATCH;
uint update_mask= HA_STATE_NEXT_FOUND;
DBUG_ENTER("maria_rnext");
@@ -105,8 +106,21 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
if (!error)
{
- while (!(*share->row_is_visible)(info))
+ while (!(*share->row_is_visible)(info) ||
+ ((icp_res= ma_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ ma_yield_and_check_if_killed(info, inx))
+ {
+ /* my_errno is set by ma_yield_and_check_if_killed() */
+ error= 1;
+ break;
+ }
+
/* Skip rows inserted by other threads since we got a lock */
if ((error= _ma_search_next(info, &info->last_key,
SEARCH_BIGGER,
@@ -120,14 +134,16 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
/* Don't clear if database-changed */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= update_mask;
-
- if (error)
+
+ if (error || icp_res != ICP_MATCH)
{
+ fast_ma_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
- my_errno=HA_ERR_END_OF_FILE;
+ my_errno= HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_ma_writeinfo(info);
DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c
index cbd81d20816..c35d8ae0222 100644
--- a/storage/maria/ma_rnext_same.c
+++ b/storage/maria/ma_rnext_same.c
@@ -30,6 +30,7 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf)
int error;
uint inx,not_used[2];
MARIA_KEYDEF *keyinfo;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("maria_rnext_same");
if ((int) (inx= info->lastinx) < 0 ||
@@ -79,8 +80,19 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf)
info->cur_row.lastpos= HA_OFFSET_ERROR;
break;
}
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ ma_yield_and_check_if_killed(info, inx))
+ {
+ error= 1;
+ break;
+ }
/* Skip rows that are inserted by other threads since we got a lock */
- if ((info->s->row_is_visible)(info))
+ if ((info->s->row_is_visible)(info) &&
+ ((icp_res= ma_check_index_cond(info, inx, buf)) != ICP_NO_MATCH))
break;
}
}
@@ -90,13 +102,15 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf)
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME;
- if (error)
+ if (error || icp_res != ICP_MATCH)
{
+ fast_ma_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
- my_errno=HA_ERR_END_OF_FILE;
+ my_errno= HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_ma_writeinfo(info);
DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c
index b9f46d7c405..c4bcb9de967 100644
--- a/storage/maria/ma_rprev.c
+++ b/storage/maria/ma_rprev.c
@@ -28,6 +28,7 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx)
register uint flag;
MARIA_SHARE *share= info->s;
MARIA_KEYDEF *keyinfo;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("maria_rprev");
if ((inx = _ma_check_index(info,inx)) < 0)
@@ -55,8 +56,24 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx)
if (!error)
{
- while (!(*share->row_is_visible)(info))
+ my_off_t cur_keypage= info->last_keypage;
+ while (!(*share->row_is_visible)(info) ||
+ ((icp_res= ma_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
+ /*
+ If we are at the last (i.e. first?) key on the key page,
+ allow writers to access the index.
+ */
+ if (info->last_keypage != cur_keypage)
+ {
+ cur_keypage= info->last_keypage;
+ if (ma_yield_and_check_if_killed(info, inx))
+ {
+ error= 1;
+ break;
+ }
+ }
+
/* Skip rows that are inserted by other threads since we got a lock */
if ((error= _ma_search_next(info, &info->last_key,
SEARCH_SMALLER,
@@ -68,13 +85,16 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx)
rw_unlock(&keyinfo->root_lock);
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_PREV_FOUND;
- if (error)
+
+ if (error || icp_res != ICP_MATCH)
{
+ fast_ma_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
- my_errno=HA_ERR_END_OF_FILE;
+ my_errno= HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_ma_writeinfo(info);
DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c
index d6270daacee..84123fec93a 100644
--- a/storage/maria/ma_search.c
+++ b/storage/maria/ma_search.c
@@ -149,7 +149,11 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key,
flag= (*keyinfo->bin_search)(key, &page, nextflag, &keypos, lastkey,
&last_key_not_used);
if (flag == MARIA_FOUND_WRONG_KEY)
- DBUG_RETURN(-1);
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ }
page_flag= page.flag;
used_length= page.size;
nod_flag= page.node;
@@ -790,6 +794,7 @@ MARIA_RECORD_POS _ma_row_pos_from_key(const MARIA_KEY *key)
case 4: pos= (my_off_t) mi_uint4korr(after_key); break;
case 3: pos= (my_off_t) mi_uint3korr(after_key); break;
case 2: pos= (my_off_t) mi_uint2korr(after_key); break;
+ case 0: /* NO_RECORD */
default:
pos=0L; /* Shut compiler up */
}
@@ -899,6 +904,7 @@ void _ma_dpointer(MARIA_SHARE *share, uchar *buff, my_off_t pos)
case 4: mi_int4store(buff,pos); break;
case 3: mi_int3store(buff,pos); break;
case 2: mi_int2store(buff,(uint) pos); break;
+ case 0: break; /* For NO_RECORD */
default: abort(); /* Impossible */
}
} /* _ma_dpointer */
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
index 5c5bd8f8d6c..7bfb53ca0a1 100644
--- a/storage/maria/ma_sort.c
+++ b/storage/maria/ma_sort.c
@@ -191,6 +191,9 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
&tempfile,&tempfile_for_exceptions))
== HA_POS_ERROR)
goto err; /* purecov: tested */
+
+ info->sort_info->param->stage++; /* Merge stage */
+
if (maxbuffer == 0)
{
if (!no_messages)
@@ -769,6 +772,8 @@ static int write_index(MARIA_SORT_PARAM *info,
if ((*info->key_write)(info, *sort_keys++))
DBUG_RETURN(-1); /* purecov: inspected */
}
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, 1, 1);
DBUG_RETURN(0);
} /* write_index */
@@ -779,7 +784,7 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
uchar **sort_keys, BUFFPEK *buffpek,
int *maxbuffer, IO_CACHE *t_file)
{
- register int i;
+ int tmp, merges, max_merges;
IO_CACHE t_file2, *from_file, *to_file, *temp;
BUFFPEK *lastbuff;
DBUG_ENTER("merge_many_buff");
@@ -791,9 +796,21 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
DBUG_RETURN(1); /* purecov: inspected */
+ /* Calculate how many merges are needed */
+ max_merges= 1; /* Count merge_index */
+ tmp= *maxbuffer;
+ while (tmp >= MERGEBUFF2)
+ {
+ merges= (tmp-MERGEBUFF*3/2 + 1) / MERGEBUFF + 1;
+ max_merges+= merges;
+ tmp= merges;
+ }
+ merges= 0;
+
from_file= t_file ; to_file= &t_file2;
while (*maxbuffer >= MERGEBUFF2)
{
+ int i;
reinit_io_cache(from_file,READ_CACHE,0L,0,0);
reinit_io_cache(to_file,WRITE_CACHE,0L,0,0);
lastbuff=buffpek;
@@ -802,6 +819,8 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
buffpek+i,buffpek+i+MERGEBUFF-1))
goto cleanup;
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, merges++, max_merges);
}
if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
buffpek+i,buffpek+ *maxbuffer))
@@ -810,6 +829,8 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
break; /* purecov: inspected */
temp=from_file; from_file=to_file; to_file=temp;
*maxbuffer= (int) (lastbuff-buffpek)-1;
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, merges++, max_merges);
}
cleanup:
close_cached_file(to_file); /* This holds old result */
@@ -939,7 +960,7 @@ merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0,
(int (*)(void*, uchar *,uchar*)) info->key_cmp,
- (void*) info))
+ (void*) info, 0, 0))
DBUG_RETURN(1); /* purecov: inspected */
for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
@@ -988,7 +1009,7 @@ merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
uchar *base= buffpek->base;
uint max_keys=buffpek->max_keys;
- VOID(queue_remove(&queue,0));
+ VOID(queue_remove_top(&queue));
/* Put room used by buffer to use in other buffer */
for (refpek= (BUFFPEK**) &queue_top(&queue);
@@ -1013,7 +1034,7 @@ merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
}
else if (error == -1)
goto err; /* purecov: inspected */
- queue_replaced(&queue); /* Top element has been replaced */
+ queue_replace_top(&queue); /* Top element has been replaced */
}
}
buffpek=(BUFFPEK*) queue_top(&queue);
@@ -1066,6 +1087,8 @@ merge_index(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys,
if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek,
buffpek+maxbuffer))
DBUG_RETURN(1); /* purecov: inspected */
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, 1, 1);
DBUG_RETURN(0);
} /* merge_index */
diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c
index 5c88ba6745b..71094fb0343 100644
--- a/storage/maria/ma_static.c
+++ b/storage/maria/ma_static.c
@@ -39,6 +39,7 @@ my_bool maria_inited= FALSE;
my_bool maria_in_ha_maria= FALSE; /* If used from ha_maria or not */
my_bool maria_recovery_changed_data= 0, maria_recovery_verbose= 0;
my_bool maria_assert_if_crashed_table= 0;
+my_bool maria_checkpoint_disabled= 0;
pthread_mutex_t THR_LOCK_maria;
#if defined(THREAD) && !defined(DONT_USE_RW_LOCKS)
@@ -109,3 +110,6 @@ static int always_valid(const char *filename __attribute__((unused)))
}
int (*maria_test_invalid_symlink)(const char *filename)= always_valid;
+
+my_bool (*ma_killed)(MARIA_HA *)= ma_killed_standalone;
+
diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c
index ec3060822f9..4b0eb0f5644 100644
--- a/storage/maria/ma_test1.c
+++ b/storage/maria/ma_test1.c
@@ -410,6 +410,10 @@ static int run_test(const char *filename)
if (!silent)
printf("- Reading rows with key\n");
record[1]= 0; /* For nicer printf */
+
+ if (record_type == NO_RECORD)
+ maria_extra(file, HA_EXTRA_KEYREAD, 0);
+
for (i=0 ; i <= 25 ; i++)
{
create_key(key,i);
@@ -423,9 +427,15 @@ static int run_test(const char *filename)
(int) key_length,key+offset_to_key,error,my_errno,record+1);
}
}
+ if (record_type == NO_RECORD)
+ {
+ maria_extra(file, HA_EXTRA_NO_KEYREAD, 0);
+ goto end;
+ }
if (!silent)
printf("- Reading rows with position\n");
+
if (maria_scan_init(file))
{
fprintf(stderr, "maria_scan_init failed\n");
@@ -760,6 +770,8 @@ static struct my_option my_long_options[] =
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"rows-in-block", 'M', "Store rows in block format",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"rows-no-data", 'n', "Don't store any data, only keys",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"row-pointer-size", 'R', "Undocumented", (uchar**) &rec_pointer_size,
(uchar**) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"silent", 's', "Undocumented",
@@ -819,6 +831,9 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
case 'M':
record_type= BLOCK_RECORD;
break;
+ case 'n':
+ record_type= NO_RECORD;
+ break;
case 'S':
if (key_field == FIELD_VARCHAR)
{
@@ -890,6 +905,10 @@ static void get_options(int argc, char *argv[])
exit(ho_error);
if (transactional)
record_type= BLOCK_RECORD;
+ if (record_type == NO_RECORD)
+ skip_update= skip_delete= 1;
+
+
return;
} /* get options */
diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c
index 2fcca9cbe10..6f92ca1559c 100644
--- a/storage/maria/ma_unique.c
+++ b/storage/maria/ma_unique.c
@@ -52,7 +52,8 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record,
info->lastinx= ~0;
DBUG_ASSERT(key.data_length == MARIA_UNIQUE_HASH_LENGTH);
- if (_ma_search(info, &key, SEARCH_FIND, info->s->state.key_root[def->key]))
+ if (_ma_search(info, &key, SEARCH_FIND | SEARCH_SAVE_BUFF,
+ info->s->state.key_root[def->key]))
{
info->page_changed=1; /* Can't optimize read next */
info->cur_row.lastpos= lastpos;
@@ -145,13 +146,14 @@ ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *record)
keyseg->charset->coll->hash_sort(keyseg->charset,
(const uchar*) pos, length, &seed1,
&seed2);
- crc^= seed1;
+ crc+= seed1;
}
else
- while (pos != end)
- crc=((crc << 8) +
- (((uchar) *pos++))) +
- (crc >> (8*sizeof(ha_checksum)-8));
+ {
+ my_hash_sort_bin((CHARSET_INFO*) 0, pos, (size_t) (end-pos),
+ &seed1, &seed2);
+ crc+= seed1;
+ }
}
return crc;
}
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index 629e07b872d..7b80ca5a5b8 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -124,12 +124,23 @@ int maria_write(MARIA_HA *info, uchar *record)
goto err2;
/* Calculate and check all unique constraints */
- for (i=0 ; i < share->state.header.uniques ; i++)
+
+ if (share->state.header.uniques)
{
- if (_ma_check_unique(info,share->uniqueinfo+i,record,
- _ma_unique_hash(share->uniqueinfo+i,record),
- HA_OFFSET_ERROR))
- goto err2;
+ for (i=0 ; i < share->state.header.uniques ; i++)
+ {
+ MARIA_UNIQUEDEF *def= share->uniqueinfo + i;
+ ha_checksum unique_hash= _ma_unique_hash(share->uniqueinfo+i,record);
+ if (maria_is_key_active(share->state.key_map, def->key))
+ {
+ if (_ma_check_unique(info, def, record,
+ unique_hash, HA_OFFSET_ERROR))
+ goto err2;
+ }
+ else
+ maria_unique_store(record+ share->keyinfo[def->key].seg->start,
+ unique_hash);
+ }
}
/* Ensure we don't try to restore auto_increment if it doesn't change */
diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c
index 52a30dce235..34e9ed108ba 100644
--- a/storage/maria/maria_chk.c
+++ b/storage/maria/maria_chk.c
@@ -70,7 +70,7 @@ static const char *field_pack[]=
static const char *record_formats[]=
{
- "Fixed length", "Packed", "Compressed", "Block", "?"
+ "Fixed length", "Packed", "Compressed", "Block", "No data", "?", "?"
};
static const char *bitmap_description[]=
@@ -436,7 +436,7 @@ static struct my_option my_long_options[] =
static void print_version(void)
{
- printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE,
+ printf("%s Ver 1.1 for %s at %s\n", my_progname, SYSTEM_TYPE,
MACHINE_TYPE);
NETWARE_SET_SCREEN_MODE(1);
}
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index 4692896212d..392e0f8d95c 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -386,6 +386,8 @@ typedef struct st_maria_share
my_bool temporary;
/* Below flag is needed to make log tables work with concurrent insert */
my_bool is_log_table;
+ my_bool has_null_fields;
+ my_bool has_varchar_fields; /* If table has varchar fields */
/*
Set to 1 if open_count was wrong at open. Set to avoid asserts for
wrong open count on close.
@@ -497,12 +499,12 @@ typedef struct st_maria_block_scan
MARIA_RECORD_POS row_base_page;
} MARIA_BLOCK_SCAN;
+typedef ICP_RESULT (*index_cond_func_t)(void *param);
struct st_maria_handler
{
MARIA_SHARE *s; /* Shared between open:s */
struct st_ma_transaction *trn; /* Pointer to active transaction */
- void *external_ptr; /* Pointer to THD in mysql */
MARIA_STATUS_INFO *state, state_save;
MARIA_STATUS_INFO *state_start; /* State at start of transaction */
MARIA_ROW cur_row; /* The active row that we just read */
@@ -519,6 +521,7 @@ struct st_maria_handler
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
MEM_ROOT ft_memroot; /* used by the parser */
MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ void *external_ref; /* For MariaDB TABLE */
uchar *buff; /* page buffer */
uchar *keyread_buff; /* Buffer for last key read */
uchar *lastkey_buff; /* Last used search key */
@@ -560,6 +563,7 @@ struct st_maria_handler
ulong row_base_length; /* Length of row header */
uint row_flag; /* Flag to store in row header */
uint opt_flag; /* Optim. for space/speed */
+ uint open_flags; /* Flags used in open() */
uint update; /* If file changed since open */
int lastinx; /* Last used index */
uint last_rkey_length; /* Last length in maria_rkey() */
@@ -601,6 +605,9 @@ struct st_maria_handler
uchar *maria_rtree_recursion_state; /* For RTREE */
uchar length_buff[5]; /* temp buff to store blob lengths */
int maria_rtree_recursion_depth;
+
+ index_cond_func_t index_cond_func; /* Index condition function */
+ void *index_cond_func_arg; /* parameter for the func */
};
/* Some defines used by maria-functions */
@@ -819,11 +826,12 @@ extern uint maria_quick_table_bits;
extern char *maria_data_root;
extern uchar maria_zero_string[];
extern my_bool maria_inited, maria_in_ha_maria, maria_recovery_changed_data;
-extern my_bool maria_recovery_verbose;
+extern my_bool maria_recovery_verbose, maria_checkpoint_disabled;
extern my_bool maria_assert_if_crashed_table;
extern ulong maria_checkpoint_min_log_activity;
extern HASH maria_stored_state;
extern int (*maria_create_trn_hook)(MARIA_HA *);
+extern my_bool (*ma_killed)(MARIA_HA *);
/* This is used by _ma_calc_xxx_key_length och _ma_store_key */
typedef struct st_maria_s_param
@@ -883,6 +891,18 @@ extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS,
const uchar *, const uchar *);
extern my_bool _ma_delete_static_record(MARIA_HA *info, const uchar *record);
extern my_bool _ma_cmp_static_record(MARIA_HA *info, const uchar *record);
+
+extern my_bool _ma_write_no_record(MARIA_HA *info, const uchar *record);
+extern my_bool _ma_update_no_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec, const uchar *record);
+extern my_bool _ma_delete_no_record(MARIA_HA *info, const uchar *record);
+extern int _ma_read_no_record(MARIA_HA *info, uchar *record,
+ MARIA_RECORD_POS pos);
+extern int _ma_read_rnd_no_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks);
+my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share, my_off_t pos);
+
extern my_bool _ma_ck_write(MARIA_HA *info, MARIA_KEY *key);
extern my_bool _ma_enlarge_root(MARIA_HA *info, MARIA_KEY *key,
MARIA_RECORD_POS *root);
@@ -1207,6 +1227,8 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
See ma_check_standalone.h .
*/
int _ma_killed_ptr(HA_CHECK *param);
+void _ma_report_progress(HA_CHECK *param, ulonglong progress,
+ ulonglong max_progress);
void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...))
ATTRIBUTE_FORMAT(printf, 2, 3);
void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...))
@@ -1281,4 +1303,11 @@ extern my_bool maria_flush_log_for_page_none(uchar *page,
pgcache_page_no_t page_no,
uchar *data_ptr);
extern PAGECACHE *maria_log_pagecache;
+extern void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func,
+ void *func_arg);
+ICP_RESULT ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record);
+
+extern my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx);
+extern my_bool ma_killed_standalone(MARIA_HA *);
+
extern uint _ma_file_callback_to_id(void *callback_data);
diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c
index 983a38e69e7..8332af60de5 100644
--- a/storage/maria/maria_pack.c
+++ b/storage/maria/maria_pack.c
@@ -590,7 +590,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
Create a global priority queue in preparation for making
temporary Huffman trees.
*/
- if (init_queue(&queue,256,0,0,compare_huff_elements,0))
+ if (init_queue(&queue, 256, 0, 0, compare_huff_elements, 0, 0, 0))
goto err;
/*
@@ -1523,7 +1523,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
if (queue.max_elements < found)
{
delete_queue(&queue);
- if (init_queue(&queue,found,0,0,compare_huff_elements,0))
+ if (init_queue(&queue,found, 0, 0, compare_huff_elements, 0, 0, 0))
return -1;
}
@@ -1627,8 +1627,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
Make a priority queue from the queue. Construct its index so that we
have a partially ordered tree.
*/
- for (i=found/2 ; i > 0 ; i--)
- _downheap(&queue,i);
+ queue_fix(&queue);
/* The Huffman algorithm. */
bytes_packed=0; bits_packed=0;
@@ -1639,12 +1638,9 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
Popping from a priority queue includes a re-ordering of the queue,
to get the next least incidence element to the top.
*/
- a=(HUFF_ELEMENT*) queue_remove(&queue,0);
- /*
- Copy the next least incidence element. The queue implementation
- reserves root[0] for temporary purposes. root[1] is the top.
- */
- b=(HUFF_ELEMENT*) queue.root[1];
+ a=(HUFF_ELEMENT*) queue_remove_top(&queue);
+ /* Copy the next least incidence element */
+ b=(HUFF_ELEMENT*) queue_top(&queue);
/* Get a new element from the element buffer. */
new_huff_el=huff_tree->element_buffer+found+i;
/* The new element gets the sum of the two least incidence elements. */
@@ -1666,8 +1662,8 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
Replace the copied top element by the new element and re-order the
queue.
*/
- queue.root[1]=(uchar*) new_huff_el;
- queue_replaced(&queue);
+ queue_top(&queue)= (uchar*) new_huff_el;
+ queue_replace_top(&queue);
}
huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
@@ -1798,8 +1794,7 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
Make a priority queue from the queue. Construct its index so that we
have a partially ordered tree.
*/
- for (i=(found+1)/2 ; i > 0 ; i--)
- _downheap(&queue,i);
+ queue_fix(&queue);
/* The Huffman algorithm. */
for (i=0 ; i < found-1 ; i++)
@@ -1813,12 +1808,9 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
incidence). Popping from a priority queue includes a re-ordering
of the queue, to get the next least incidence element to the top.
*/
- a= (my_off_t*) queue_remove(&queue, 0);
- /*
- Copy the next least incidence element. The queue implementation
- reserves root[0] for temporary purposes. root[1] is the top.
- */
- b= (my_off_t*) queue.root[1];
+ a= (my_off_t*) queue_remove_top(&queue);
+ /* Copy the next least incidence element. */
+ b= (my_off_t*) queue_top(&queue);
/* Create a new element in a local (automatic) buffer. */
new_huff_el= element_buffer + i;
/* The new element gets the sum of the two least incidence elements. */
@@ -1838,8 +1830,8 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
queue. This successively replaces the references to counts by
references to HUFF_ELEMENTs.
*/
- queue.root[1]=(uchar*) new_huff_el;
- queue_replaced(&queue);
+ queue_top(&queue)= (uchar*) new_huff_el;
+ queue_replace_top(&queue);
}
DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
}
diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c
index 1bb8889aaa7..eb494a8d19b 100644
--- a/storage/maria/tablockman.c
+++ b/storage/maria/tablockman.c
@@ -607,7 +607,7 @@ void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout)
lm->loid_to_tlo= func;
lm->lock_timeout= timeout;
pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST);
- my_getsystime(); /* ensure that my_getsystime() is initialized */
+ my_interval_timer(); /* ensure that my_interval_timer() is initialized */
}
void tablockman_destroy(TABLOCKMAN *lm)
diff --git a/storage/maria/unittest/ma_loghandler_examples.c b/storage/maria/unittest/ma_loghandler_examples.c
index 0c11a3b9a8e..cd5d927587a 100644
--- a/storage/maria/unittest/ma_loghandler_examples.c
+++ b/storage/maria/unittest/ma_loghandler_examples.c
@@ -59,6 +59,9 @@ void translog_example_table_init()
i < LOGREC_NUMBER_OF_TYPES;
i++)
log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
+#ifndef DBUG_OFF
+ check_translog_description_table(LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE);
+#endif
}
diff --git a/storage/maria/unittest/ma_test_all-t b/storage/maria/unittest/ma_test_all-t
index 4d72c6dfadf..e66d269ab93 100755
--- a/storage/maria/unittest/ma_test_all-t
+++ b/storage/maria/unittest/ma_test_all-t
@@ -283,6 +283,7 @@ sub run_check_tests
["-p -B --key_length=480","-sm"],
["--checksum --unique","-se"],
["--unique","-se"],
+ ["--rows-no-data", "-s"],
["--key_multiple -N -S","-sm"],
["--key_multiple -a -p --key_length=480","-sm"],
["--key_multiple -a -B --key_length=480","-sm"],
diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c
index 43cf982a7f2..92919b37152 100644
--- a/storage/maria/unittest/trnman-t.c
+++ b/storage/maria/unittest/trnman-t.c
@@ -75,7 +75,7 @@ pthread_handler_t test_trnman(void *arg)
void run_test(const char *test, pthread_handler handler, int n, int m)
{
pthread_t *threads;
- ulonglong now= my_getsystime();
+ ulonglong now= microsecond_interval_timer();
int i;
litmus= 0;
@@ -97,8 +97,8 @@ void run_test(const char *test, pthread_handler handler, int n, int m)
}
for (i= 0 ; i < n ; i++)
pthread_join(threads[i], 0);
- now= my_getsystime()-now;
- ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ now= microsecond_interval_timer() - now;
+ ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e6, litmus);
my_free((void*)threads, MYF(0));
}
@@ -162,10 +162,10 @@ int main(int argc __attribute__((unused)), char **argv)
diag("mallocs: %d", trnman_allocated_transactions);
{
- ulonglong now= my_getsystime();
+ ulonglong now= microsecond_interval_timer();
trnman_destroy();
- now= my_getsystime()-now;
- diag("trnman_destroy: %g", ((double)now)/1e7);
+ now= microsecond_interval_timer() - now;
+ diag("trnman_destroy: %g", ((double)now)/1e6);
}
pthread_mutex_destroy(&rt_mutex);
diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c
index 3a6368f338d..dc7eb763b77 100644
--- a/storage/myisam/ft_boolean_search.c
+++ b/storage/myisam/ft_boolean_search.c
@@ -482,16 +482,18 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
static void _ftb_init_index_search(FT_INFO *ftb)
{
- int i;
+ uint i;
FTB_WORD *ftbw;
if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
return;
ftb->state=INDEX_SEARCH;
- for (i=ftb->queue.elements; i; i--)
+ for (i= queue_last_element(&ftb->queue);
+ (int) i >= (int) queue_first_element(&ftb->queue);
+ i--)
{
- ftbw=(FTB_WORD *)(ftb->queue.root[i]);
+ ftbw=(FTB_WORD *)(queue_element(&ftb->queue, i));
if (ftbw->flags & FTB_FLAG_TRUNC)
{
@@ -595,12 +597,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, uchar *query,
sizeof(void *))))
goto err;
reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
- (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0);
+ (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0, 0, 0);
for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
queue_insert(&ftb->queue, (uchar *)ftbw);
ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
sizeof(FTB_WORD *)*ftb->queue.elements);
- memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
+ memcpy(ftb->list, &queue_top(&ftb->queue), sizeof(FTB_WORD *)*ftb->queue.elements);
my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
(qsort2_cmp)FTB_WORD_cmp_list, (void*) ftb->charset);
if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
@@ -839,7 +841,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
/* update queue */
_ft2_search(ftb, ftbw, 0);
- queue_replaced(& ftb->queue);
+ queue_replace_top(&ftb->queue);
}
ftbe=ftb->root;
diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c
index a00fa8e840c..edca8c25105 100644
--- a/storage/myisam/ft_nlq_search.c
+++ b/storage/myisam/ft_nlq_search.c
@@ -249,12 +249,12 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, uchar *query,
{
QUEUE best;
init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
- 0);
+ 0, 0, 0);
tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
&best, left_root_right);
while (best.elements)
{
- my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
+ my_off_t docid= ((FT_DOC *)queue_remove_top(&best))->dpos;
if (!(*info->read_record)(info,docid,record))
{
info->update|= HA_STATE_AKTIV;
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 13d59b63527..e6ad635c885 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -298,6 +298,8 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
if (found->flags & BLOB_FLAG)
recinfo_pos->type= FIELD_BLOB;
+ else if (found->type() == MYSQL_TYPE_TIMESTAMP)
+ recinfo_pos->type= FIELD_NORMAL;
else if (found->type() == MYSQL_TYPE_VARCHAR)
recinfo_pos->type= FIELD_VARCHAR;
else if (!(options & HA_OPTION_PACK_RECORD))
@@ -540,6 +542,13 @@ void mi_check_print_warning(HA_CHECK *param, const char *fmt,...)
va_end(args);
}
+/* Return 1 if user have requested query to be killed */
+
+my_bool mi_killed_in_mariadb(MI_INFO *info)
+{
+ return (((TABLE*) (info->external_ref))->in_use->killed != 0);
+}
+
}
@@ -711,6 +720,8 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked)
return (my_errno ? my_errno : -1);
file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
+ /* Set external_ref, mainly for temporary tables */
+ file->external_ref= (void*) table; // For mi_killed()
if (!table->s->tmp_table) /* No need to perform a check for tmp table */
{
@@ -757,6 +768,16 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked)
int_table_flags|= HA_HAS_OLD_CHECKSUM;
}
+ /*
+ For static size rows, tell MariaDB that we will access all bytes
+ in the record when writing it. This signals MariaDB to initalize
+ the full row to ensure we don't get any errors from valgrind and
+ that all bytes in the row is properly reset.
+ */
+ if ((file->s->options & HA_OPTION_PACK_RECORD) &&
+ (file->s->has_varchar_fields | file->s->has_null_fields))
+ int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
+
for (i= 0; i < table->s->keys; i++)
{
plugin_ref parser= table->key_info[i].parser;
@@ -782,6 +803,8 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked)
int ha_myisam::close(void)
{
MI_INFO *tmp=file;
+ if (!tmp)
+ return 0;
file=0;
return mi_close(tmp);
}
@@ -1596,7 +1619,15 @@ void ha_myisam::start_bulk_insert(ha_rows rows)
*/
if (file->state->records == 0 && can_enable_indexes &&
(!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES))
- mi_disable_non_unique_index(file,rows);
+ {
+ if (file->open_flag & HA_OPEN_INTERNAL_TABLE)
+ {
+ file->update|= HA_STATE_CHANGED;
+ mi_clear_all_keys_active(file->s->state.key_map);
+ }
+ else
+ mi_disable_non_unique_index(file,rows);
+ }
else
if (!file->bulk_insert &&
(!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT))
@@ -1716,6 +1747,48 @@ int ha_myisam::delete_row(const uchar *buf)
return mi_delete(file,buf);
}
+
+C_MODE_START
+
+ICP_RESULT index_cond_func_myisam(void *arg)
+{
+ ha_myisam *h= (ha_myisam*)arg;
+ if (h->end_range)
+ {
+ if (h->compare_key2(h->end_range) > 0)
+ return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
+ }
+ return (ICP_RESULT) test(h->pushed_idx_cond->val_int());
+}
+
+C_MODE_END
+
+
+int ha_myisam::index_init(uint idx, bool sorted)
+{
+ active_index=idx;
+ if (pushed_idx_cond_keyno == idx)
+ mi_set_index_cond_func(file, index_cond_func_myisam, this);
+ return 0;
+}
+
+
+int ha_myisam::index_end()
+{
+ active_index=MAX_KEY;
+ //pushed_idx_cond_keyno= MAX_KEY;
+ mi_set_index_cond_func(file, NULL, 0);
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
+ return 0;
+}
+
+int ha_myisam::rnd_end()
+{
+ ds_mrr.dsmrr_close();
+ return 0;
+}
+
int ha_myisam::index_read_map(uchar *buf, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
@@ -1727,7 +1800,14 @@ int ha_myisam::index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
- return mi_rkey(file, buf, index, key, keypart_map, find_flag);
+ int res;
+ /* Use the pushed index condition if it matches the index we're scanning */
+ end_range= NULL;
+ if (index == pushed_idx_cond_keyno)
+ mi_set_index_cond_func(file, index_cond_func_myisam, this);
+ res= mi_rkey(file, buf, index, key, keypart_map, find_flag);
+ mi_set_index_cond_func(file, NULL, 0);
+ return res;
}
int ha_myisam::index_next(uchar *buf)
@@ -1804,6 +1884,9 @@ int ha_myisam::info(uint flag)
MI_ISAMINFO misam_info;
char name_buff[FN_REFLEN];
+ if (!table)
+ return 1;
+
(void) mi_status(file,&misam_info,flag);
if (flag & HA_STATUS_VARIABLE)
{
@@ -1821,6 +1904,16 @@ int ha_myisam::info(uint flag)
stats.max_data_file_length= misam_info.max_data_file_length;
stats.max_index_file_length= misam_info.max_index_file_length;
stats.create_time= (ulong) misam_info.create_time;
+ /*
+ We want the value of stats.mrr_length_per_rec to be platform independent.
+ The size of the chunk at the end of the join buffer used for MRR needs
+ is calculated now basing on the values passed in the stats structure.
+ The remaining part of the join buffer is used for records. A different
+ number of records in the buffer results in a different number of buffer
+ refills and in a different order of records in the result set.
+ */
+ stats.mrr_length_per_rec= misam_info.reflength + 8; // 8=max(sizeof(void *))
+
ref_length= misam_info.reflength;
share->db_options_in_use= misam_info.options;
stats.block_size= myisam_block_size; /* record block size */
@@ -1876,8 +1969,14 @@ int ha_myisam::extra(enum ha_extra_function operation)
return mi_extra(file, operation, 0);
}
+
int ha_myisam::reset(void)
{
+ pushed_idx_cond= NULL;
+ pushed_idx_cond_keyno= MAX_KEY;
+ in_range_check_pushed_down= FALSE;
+ mi_set_index_cond_func(file, NULL, 0);
+ ds_mrr.dsmrr_close();
return mi_reset(file);
}
@@ -1909,6 +2008,7 @@ int ha_myisam::delete_table(const char *name)
int ha_myisam::external_lock(THD *thd, int lock_type)
{
+ file->external_ref= (void*) table; // For mi_killed()
return mi_lock_database(file, !table->s->tmp_table ?
lock_type : ((lock_type == F_UNLCK) ?
F_UNLCK : F_EXTRA_LCK));
@@ -2157,9 +2257,73 @@ static int myisam_init(void *p)
myisam_hton->create= myisam_create_handler;
myisam_hton->panic= myisam_panic;
myisam_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
+ mi_killed= mi_killed_in_mariadb;
return 0;
}
+/****************************************************************************
+ * MyISAM MRR implementation: use DS-MRR
+ ***************************************************************************/
+
+int ha_myisam::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
+{
+ return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_myisam::multi_range_read_next(range_id_t *range_info)
+{
+ return ds_mrr.dsmrr_next(range_info);
+}
+
+ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost)
+{
+ /*
+ This call is here because there is no location where this->table would
+ already be known.
+ TODO: consider moving it into some per-query initialization call.
+ */
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
+ flags, cost);
+}
+
+ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
+{
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
+}
+
+
+int ha_myisam::multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
+
+/* MyISAM MRR implementation ends */
+
+
+/* Index condition pushdown implementation*/
+
+
+Item *ha_myisam::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
+{
+ pushed_idx_cond_keyno= keyno_arg;
+ pushed_idx_cond= idx_cond_arg;
+ in_range_check_pushed_down= TRUE;
+ if (active_index == pushed_idx_cond_keyno)
+ mi_set_index_cond_func(file, index_cond_func_myisam, this);
+ return NULL;
+}
+
+
struct st_mysql_storage_engine myisam_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index e6ad6b03476..f895a5bf449 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -34,6 +34,10 @@ extern ulong myisam_sort_buffer_size;
extern TYPELIB myisam_recover_typelib;
extern ulong myisam_recover_options;
+C_MODE_START
+ICP_RESULT index_cond_func_myisam(void *arg);
+C_MODE_END
+
class ha_myisam: public handler
{
MI_INFO *file;
@@ -50,11 +54,15 @@ class ha_myisam: public handler
const char *index_type(uint key_number);
const char **bas_ext() const;
ulonglong table_flags() const { return int_table_flags; }
+ int index_init(uint idx, bool sorted);
+ int index_end();
+ int rnd_end();
+
ulong index_flags(uint inx, uint part, bool all_parts) const
{
return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ?
0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
- HA_READ_ORDER | HA_KEYREAD_ONLY);
+ HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
}
uint max_supported_keys() const { return MI_MAX_KEY; }
uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
@@ -147,4 +155,25 @@ class ha_myisam: public handler
{
return file;
}
+public:
+ /**
+ * Multi Range Read interface
+ */
+ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ int multi_range_read_next(range_id_t *range_info);
+ ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
+
+ /* Index condition pushdown implementation */
+ Item *idx_cond_push(uint keyno, Item* idx_cond);
+private:
+ DsMrr_impl ds_mrr;
+ friend ICP_RESULT index_cond_func_myisam(void *arg);
};
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 0e5da184f58..515b506bad4 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -995,9 +995,6 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend)
if (killed_ptr(param))
goto err2;
switch (info->s->data_file_type) {
- case BLOCK_RECORD:
- DBUG_ASSERT(0); /* Impossible */
- break;
case STATIC_RECORD:
if (my_b_read(&param->read_cache,(uchar*) record,
info->s->base.pack_reclength))
@@ -1215,6 +1212,9 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend)
link_used+= (block_info.filepos - start_recpos);
used+= (pos-start_recpos);
break;
+ default:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
} /* switch */
if (! got_error)
{
@@ -3280,9 +3280,6 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
}
switch (share->data_file_type) {
- case BLOCK_RECORD:
- DBUG_ASSERT(0); /* Impossible */
- break;
case STATIC_RECORD:
for (;;)
{
@@ -3683,6 +3680,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
record));
DBUG_RETURN(0);
}
+ default:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
}
DBUG_ASSERT(0); /* Impossible */
DBUG_RETURN(1); /* Impossible */
@@ -3720,9 +3720,6 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
if (sort_param->fix_datafile)
{
switch (sort_info->new_data_file_type) {
- case BLOCK_RECORD:
- DBUG_ASSERT(0); /* Impossible */
- break;
case STATIC_RECORD:
if (my_b_write(&info->rec_cache,sort_param->record,
share->base.pack_reclength))
@@ -3796,6 +3793,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
sort_param->filepos+=reclength+length;
info->s->state.split++;
break;
+ default:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
}
}
if (sort_param->master)
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index 7b8f5249b3e..2f8af89c27d 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -410,6 +410,12 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
DBUG_RETURN(error);
} /* mi_extra */
+void mi_set_index_cond_func(MI_INFO *info, index_cond_func_t func,
+ void *func_arg)
+{
+ info->index_cond_func= func;
+ info->index_cond_func_arg= func_arg;
+}
/*
Start/Stop Inserting Duplicates Into a Table, WL#1648.
@@ -467,3 +473,8 @@ int mi_reset(MI_INFO *info)
HA_STATE_PREV_FOUND);
DBUG_RETURN(error);
}
+
+my_bool mi_killed_standalone(MI_INFO *info __attribute__((unused)))
+{
+ return 0;
+}
diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c
index 94f3f34ec58..a3d38269e61 100644
--- a/storage/myisam/mi_key.c
+++ b/storage/myisam/mi_key.c
@@ -31,7 +31,8 @@
set_if_smaller(char_length,length); \
} while(0)
-static int _mi_put_key_in_record(MI_INFO *info,uint keynr,uchar *record);
+static int _mi_put_key_in_record(MI_INFO *info,uint keynr,
+ my_bool unpack_blobs, uchar *record);
/*
Make a intern key from a record
@@ -312,6 +313,9 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old,
_mi_put_key_in_record()
info MyISAM handler
keynr Key number that was used
+ unpack_blobs TRUE <=> Unpack blob columns
+ FALSE <=> Skip them. This is used by index condition
+ pushdown check function
record Store key here
Last read key is in info->lastkey
@@ -325,7 +329,7 @@ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old,
*/
static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
- uchar *record)
+ my_bool unpack_blobs, uchar *record)
{
reg2 uchar *key;
uchar *pos,*key_end;
@@ -418,16 +422,19 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
if (length > keyseg->length || key+length > key_end)
goto err;
#endif
- memcpy(record+keyseg->start+keyseg->bit_start,
- (char*) &blob_ptr,sizeof(char*));
- memcpy(blob_ptr,key,length);
- blob_ptr+=length;
+ if (unpack_blobs)
+ {
+ memcpy(record+keyseg->start+keyseg->bit_start,
+ (char*) &blob_ptr,sizeof(char*));
+ memcpy(blob_ptr,key,length);
+ blob_ptr+=length;
- /* The above changed info->lastkey2. Inform mi_rnext_same(). */
- info->update&= ~HA_STATE_RNEXT_SAME;
+ /* The above changed info->lastkey2. Inform mi_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
- _mi_store_blob_length(record+keyseg->start,
- (uint) keyseg->bit_start,length);
+ _mi_store_blob_length(record+keyseg->start,
+ (uint) keyseg->bit_start,length);
+ }
key+=length;
}
else if (keyseg->flag & HA_SWAP_KEY)
@@ -471,7 +478,7 @@ int _mi_read_key_record(MI_INFO *info, my_off_t filepos, uchar *buf)
{
if (info->lastinx >= 0)
{ /* Read only key */
- if (_mi_put_key_in_record(info,(uint) info->lastinx,buf))
+ if (_mi_put_key_in_record(info,(uint) info->lastinx, TRUE, buf))
{
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
@@ -487,6 +494,45 @@ int _mi_read_key_record(MI_INFO *info, my_off_t filepos, uchar *buf)
/*
+ Save current key tuple to record and call index condition check function
+
+ SYNOPSIS
+ mi_check_index_cond()
+ info MyISAM handler
+ keynr Index we're running a scan on
+ record Record buffer to use (it is assumed that index check function
+ will look for column values there)
+
+ RETURN
+ ICP_ERROR Error
+ ICP_NO_MATCH Index condition is not satisfied, continue scanning
+ ICP_MATCH Index condition is satisfied
+ ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan.
+*/
+
+ICP_RESULT mi_check_index_cond(register MI_INFO *info, uint keynr,
+ uchar *record)
+{
+ ICP_RESULT res;
+ if (_mi_put_key_in_record(info, keynr, FALSE, record))
+ {
+ /* Impossible case; Can only happen if bug in code */
+ mi_print_error(info->s, HA_ERR_CRASHED);
+ info->lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_CRASHED;
+ res= ICP_ERROR;
+ }
+ else if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
+ ICP_OUT_OF_RANGE)
+ {
+ /* We got beyond the end of scanned range */
+ info->lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_END_OF_FILE;
+ }
+ return res;
+}
+
+/*
Retrieve auto_increment info
SYNOPSIS
diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c
index 97011831af8..17c1fc83f13 100644
--- a/storage/myisam/mi_locking.c
+++ b/storage/myisam/mi_locking.c
@@ -239,7 +239,7 @@ int mi_lock_database(MI_INFO *info, int lock_type)
break; /* Impossible */
}
}
-#ifdef __WIN__
+#ifdef _WIN32
else
{
/*
@@ -521,11 +521,11 @@ int _mi_writeinfo(register MI_INFO *info, uint operation)
share->state.update_count= info->last_loop= ++info->this_loop;
if ((error=mi_state_info_write(share->kfile, &share->state, 1)))
olderror=my_errno;
-#ifdef __WIN__
+#ifdef _WIN32
if (myisam_flush)
{
- _commit(share->kfile);
- _commit(info->dfile);
+ my_sync(share->kfile,0);
+ my_sync(info->dfile,0);
}
#endif
}
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 9d69f8622e8..89a0ef61081 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -79,14 +79,14 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
{
int lock_error,kfile,open_mode,save_errno,have_rtree=0, realpath_err;
uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys,
- key_parts,unique_key_parts,fulltext_keys,uniques;
+ key_parts,unique_key_parts,base_key_parts,fulltext_keys,uniques;
char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
data_name[FN_REFLEN];
uchar *disk_cache, *disk_pos, *end_pos;
MI_INFO info,*m_info,*old_info;
MYISAM_SHARE share_buff,*share;
- ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG];
- my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
+ ulong *rec_per_key_part= 0;
+ my_off_t *key_root, *key_del;
ulonglong max_key_file_length, max_data_file_length;
DBUG_ENTER("mi_open");
@@ -111,9 +111,6 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
{
share= &share_buff;
bzero((uchar*) &share_buff,sizeof(share_buff));
- share_buff.state.rec_per_key_part=rec_per_key_part;
- share_buff.state.key_root=key_root;
- share_buff.state.key_del=key_del;
share_buff.key_cache= multi_key_cache_search((uchar*) name_buff,
strlen(name_buff),
dflt_key_cache);
@@ -203,7 +200,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
keys= (uint) share->state.header.keys;
uniques= (uint) share->state.header.uniques;
fulltext_keys= (uint) share->state.header.fulltext_keys;
- key_parts= mi_uint2korr(share->state.header.key_parts);
+ base_key_parts= key_parts= mi_uint2korr(share->state.header.key_parts);
unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts);
if (len != MI_STATE_INFO_SIZE)
{
@@ -213,7 +210,12 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
share->state_diff_length=len-MI_STATE_INFO_SIZE;
- mi_state_info_read(disk_cache, &share->state);
+ if (!mi_state_info_read(disk_cache, &share->state))
+ goto err;
+ rec_per_key_part= share->state.rec_per_key_part;
+ key_root= share->state.key_root;
+ key_del= share->state.key_del;
+
len= mi_uint2korr(share->state.header.base_info_length);
if (len != MI_BASE_INFO_SIZE)
{
@@ -296,7 +298,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
if (!my_multi_malloc(MY_WME,
&share,sizeof(*share),
- &share->state.rec_per_key_part,sizeof(long)*key_parts,
+ &share->state.rec_per_key_part,
+ sizeof(long)*base_key_parts,
&share->keyinfo,keys*sizeof(MI_KEYDEF),
&share->uniqueinfo,uniques*sizeof(MI_UNIQUEDEF),
&share->keyparts,
@@ -320,7 +323,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
errpos=4;
*share=share_buff;
memcpy((char*) share->state.rec_per_key_part,
- (char*) rec_per_key_part, sizeof(long)*key_parts);
+ (char*) rec_per_key_part, sizeof(long)*base_key_parts);
memcpy((char*) share->state.key_root,
(char*) key_root, sizeof(my_off_t)*keys);
memcpy((char*) share->state.key_del,
@@ -666,6 +669,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
pthread_mutex_unlock(&THR_LOCK_myisam);
bzero(info.buff, share->base.max_key_block_length * 2);
+ my_free(rec_per_key_part, MYF(MY_ALLOW_ZERO_PTR));
if (myisam_log_file >= 0)
{
@@ -695,6 +699,7 @@ err:
case 3:
if (! lock_error)
VOID(my_lock(kfile, F_UNLCK, 0L, F_TO_EOF, MYF(MY_SEEK_NOT_DONE)));
+ my_free(rec_per_key_part, MYF(MY_ALLOW_ZERO_PTR));
/* fall through */
case 2:
my_afree(disk_cache);
@@ -982,6 +987,16 @@ uchar *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state)
ptr+= state->state_diff_length;
+ if (!state->rec_per_key_part)
+ {
+ if (!my_multi_malloc(MY_WME,
+ &state->rec_per_key_part,sizeof(long)*key_parts,
+ &state->key_root, keys*sizeof(my_off_t),
+ &state->key_del, key_blocks*sizeof(my_off_t),
+ NullS))
+ return(0);
+ }
+
for (i=0; i < keys; i++)
{
state->key_root[i]= mi_sizekorr(ptr); ptr +=8;
diff --git a/storage/myisam/mi_rkey.c b/storage/myisam/mi_rkey.c
index f20b0366683..dbe4d59ee90 100644
--- a/storage/myisam/mi_rkey.c
+++ b/storage/myisam/mi_rkey.c
@@ -29,6 +29,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
MI_KEYDEF *keyinfo;
HA_KEYSEG *last_used_keyseg;
uint pack_key_length, use_key_length, nextflag;
+ ICP_RESULT res= ICP_NO_MATCH;
DBUG_ENTER("mi_rkey");
DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
(long) info, (long) buf, inx, search_flag));
@@ -87,6 +88,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
my_errno=HA_ERR_CRASHED;
if (share->concurrent_insert)
rw_unlock(&share->key_root_lock[inx]);
+ fast_mi_writeinfo(info);
goto err;
}
break;
@@ -105,80 +107,113 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
saved the current data_file_length. Concurrent inserts always go
to the end of the file. So we can test if the found key
references a new record.
+
+ If we are searching for a partial key (or using >, >=, < or <=) and
+ the data is outside of the data file, we need to continue searching
+ for the first key inside the data file.
+
+ We do also continue searching if an index condition check function
+ is available.
*/
- if (info->lastpos >= info->state->data_file_length)
+ while ((info->lastpos >= info->state->data_file_length &&
+ (search_flag != HA_READ_KEY_EXACT ||
+ last_used_keyseg != keyinfo->seg + keyinfo->keysegs)) ||
+ (info->index_cond_func &&
+ (res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
- /* The key references a concurrently inserted record. */
+ uint not_used[2];
+ /*
+ Skip rows that are inserted by other threads since we got a lock
+ Note that this can only happen if we are not searching after an
+ full length exact key, because the keys are sorted
+ according to position
+ */
+ if (_mi_search_next(info, keyinfo, info->lastkey,
+ info->lastkey_length,
+ myisam_readnext_vec[search_flag],
+ info->s->state.key_root[inx]))
+ {
+ info->lastpos= HA_OFFSET_ERROR;
+ break;
+ }
+ /*
+ Check that the found key does still match the search.
+ _mi_search_next() delivers the next key regardless of its
+ value.
+ */
if (search_flag == HA_READ_KEY_EXACT &&
- last_used_keyseg == keyinfo->seg + keyinfo->keysegs)
+ ha_key_cmp(keyinfo->seg, key_buff, info->lastkey, use_key_length,
+ SEARCH_FIND, not_used))
{
- /* Simply ignore the key if it matches exactly. (Bug #29838) */
my_errno= HA_ERR_KEY_NOT_FOUND;
info->lastpos= HA_OFFSET_ERROR;
+ break;
}
- else
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ mi_yield_and_check_if_killed(info, inx))
{
- /*
- If searching for a partial key (or using >, >=, < or <=) and
- the data is outside of the data file, we need to continue
- searching for the first key inside the data file.
- */
- do
- {
- uint not_used[2];
- /*
- Skip rows that are inserted by other threads since we got
- a lock. Note that this can only happen if we are not
- searching after a full length exact key, because the keys
- are sorted according to position.
- */
- if (_mi_search_next(info, keyinfo, info->lastkey,
- info->lastkey_length,
- myisam_readnext_vec[search_flag],
- info->s->state.key_root[inx]))
- break; /* purecov: inspected */
- /*
- Check that the found key does still match the search.
- _mi_search_next() delivers the next key regardless of its
- value.
- */
- if (search_flag == HA_READ_KEY_EXACT &&
- ha_key_cmp(keyinfo->seg, key_buff, info->lastkey,
- use_key_length, SEARCH_FIND, not_used))
- {
- /* purecov: begin inspected */
- my_errno= HA_ERR_KEY_NOT_FOUND;
- info->lastpos= HA_OFFSET_ERROR;
- break;
- /* purecov: end */
- }
- } while (info->lastpos >= info->state->data_file_length);
+ /* Aborted by user */
+ buf= 0; /* Fast abort */
}
}
+ if (res == ICP_OUT_OF_RANGE)
+ {
+ /* Change error from HA_ERR_END_OF_FILE */
+ DBUG_ASSERT(info->lastpos == HA_OFFSET_ERROR);
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ }
+ /*
+ Error if no row found within the data file. (Bug #29838)
+ Do not overwrite my_errno if already at HA_OFFSET_ERROR.
+ */
+ if (info->lastpos != HA_OFFSET_ERROR &&
+ info->lastpos >= info->state->data_file_length)
+ {
+ info->lastpos= HA_OFFSET_ERROR;
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ }
+ }
+ else
+ {
+ DBUG_ASSERT(info->lastpos= HA_OFFSET_ERROR);
}
}
if (share->concurrent_insert)
rw_unlock(&share->key_root_lock[inx]);
- /* Calculate length of the found key; Used by mi_rnext_same */
- if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg &&
- info->lastpos != HA_OFFSET_ERROR)
- info->last_rkey_length= _mi_keylength_part(keyinfo, info->lastkey,
- last_used_keyseg);
- else
- info->last_rkey_length= pack_key_length;
-
- /* Check if we don't want to have record back, only error message */
- if (!buf)
- DBUG_RETURN(info->lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ info->last_rkey_length= pack_key_length;
- if (!(*info->read_record)(info,info->lastpos,buf))
+ if (info->lastpos == HA_OFFSET_ERROR) /* No such record */
{
- info->update|= HA_STATE_AKTIV; /* Record is read */
- DBUG_RETURN(0);
+ fast_mi_writeinfo(info);
+ if (!buf)
+ DBUG_RETURN(my_errno);
}
+ else
+ {
+ /* Calculate length of the found key; Used by mi_rnext_same */
+ if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg)
+ info->last_rkey_length= _mi_keylength_part(keyinfo, info->lastkey,
+ last_used_keyseg);
- info->lastpos = HA_OFFSET_ERROR; /* Didn't find key */
+ /* Check if we don't want to have record back, only error message */
+ if (!buf)
+ {
+ fast_mi_writeinfo(info);
+ DBUG_RETURN(0);
+ }
+ if (!(*info->read_record)(info,info->lastpos,buf))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("error", ("Didn't find row. Error %d", my_errno));
+ info->lastpos= HA_OFFSET_ERROR; /* Didn't find row */
+ }
/* Store last used key as a base for read next */
memcpy(info->lastkey,key_buff,pack_key_length);
@@ -191,3 +226,36 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
err:
DBUG_RETURN(my_errno);
} /* _mi_rkey */
+
+
+/*
+ Yield to possible other writers during a index scan.
+ Check also if we got killed by the user and if yes, return
+ HA_ERR_LOCK_WAIT_TIMEOUT
+
+ return 0 ok
+ return 1 Query has been requested to be killed
+*/
+
+my_bool mi_yield_and_check_if_killed(MI_INFO *info, int inx)
+{
+ MYISAM_SHARE *share;
+ if (mi_killed(info))
+ {
+ /* purecov: begin tested */
+ info->lastpos= HA_OFFSET_ERROR;
+ /* Set error that we where aborted by kill from application */
+ my_errno= HA_ERR_ABORTED_BY_USER;
+ return 1;
+ /* purecov: end */
+
+ }
+
+ if ((share= info->s)->concurrent_insert)
+ {
+ /* Give writers a chance to access index */
+ rw_unlock(&share->key_root_lock[inx]);
+ rw_rdlock(&share->key_root_lock[inx]);
+ }
+ return 0;
+}
diff --git a/storage/myisam/mi_rnext.c b/storage/myisam/mi_rnext.c
index b9bbda3cacb..79db5fb992d 100644
--- a/storage/myisam/mi_rnext.c
+++ b/storage/myisam/mi_rnext.c
@@ -28,6 +28,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
{
int error,changed;
uint flag;
+ ICP_RESULT icp_res= ICP_MATCH;
uint update_mask= HA_STATE_NEXT_FOUND;
DBUG_ENTER("mi_rnext");
@@ -96,34 +97,53 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
}
}
- if (info->s->concurrent_insert)
+ if (!error)
{
- if (!error)
+ while ((info->s->concurrent_insert &&
+ info->lastpos >= info->state->data_file_length) ||
+ (info->index_cond_func &&
+ (icp_res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
- while (info->lastpos >= info->state->data_file_length)
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ mi_yield_and_check_if_killed(info, inx))
{
- /* Skip rows inserted by other threads since we got a lock */
- if ((error=_mi_search_next(info,info->s->keyinfo+inx,
- info->lastkey,
- info->lastkey_length,
- SEARCH_BIGGER,
- info->s->state.key_root[inx])))
- break;
+ error= 1;
+ break;
}
+
+ /*
+ Skip rows that are either inserted by other threads since
+ we got a lock or do not match pushed index conditions
+ */
+ if ((error=_mi_search_next(info,info->s->keyinfo+inx,
+ info->lastkey,
+ info->lastkey_length,
+ SEARCH_BIGGER,
+ info->s->state.key_root[inx])))
+ break;
}
- rw_unlock(&info->s->key_root_lock[inx]);
}
+
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->key_root_lock[inx]);
+
/* Don't clear if database-changed */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= update_mask;
- if (error)
+ if (error || icp_res != ICP_MATCH)
{
+ fast_mi_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
my_errno=HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_mi_writeinfo(info);
DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info,info->lastpos,buf))
diff --git a/storage/myisam/mi_rnext_same.c b/storage/myisam/mi_rnext_same.c
index 1892fe3e1e0..3aa7e93dfd5 100644
--- a/storage/myisam/mi_rnext_same.c
+++ b/storage/myisam/mi_rnext_same.c
@@ -29,6 +29,7 @@ int mi_rnext_same(MI_INFO *info, uchar *buf)
int error;
uint inx,not_used[2];
MI_KEYDEF *keyinfo;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("mi_rnext_same");
if ((int) (inx=info->lastinx) < 0 || info->lastpos == HA_OFFSET_ERROR)
@@ -63,6 +64,17 @@ int mi_rnext_same(MI_INFO *info, uchar *buf)
}
for (;;)
{
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ mi_yield_and_check_if_killed(info, inx))
+ {
+ error=1;
+ break;
+ }
+
if ((error=_mi_search_next(info,keyinfo,info->lastkey,
info->lastkey_length,SEARCH_BIGGER,
info->s->state.key_root[inx])))
@@ -75,24 +87,33 @@ int mi_rnext_same(MI_INFO *info, uchar *buf)
info->lastpos= HA_OFFSET_ERROR;
break;
}
- /* Skip rows that are inserted by other threads since we got a lock */
- if (info->lastpos < info->state->data_file_length)
+ /*
+ Skip
+ - rows that are inserted by other threads since we got a lock
+ - rows that don't match index condition
+ */
+ if (info->lastpos < info->state->data_file_length &&
+ (!info->index_cond_func ||
+ (icp_res= mi_check_index_cond(info, inx, buf)) != ICP_NO_MATCH))
break;
}
}
if (info->s->concurrent_insert)
rw_unlock(&info->s->key_root_lock[inx]);
+
/* Don't clear if database-changed */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME;
- if (error)
+ if (error || icp_res != ICP_MATCH)
{
+ fast_mi_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
my_errno=HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_mi_writeinfo(info);
DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info,info->lastpos,buf))
diff --git a/storage/myisam/mi_rprev.c b/storage/myisam/mi_rprev.c
index d1407012590..040b08b428c 100644
--- a/storage/myisam/mi_rprev.c
+++ b/storage/myisam/mi_rprev.c
@@ -27,6 +27,7 @@ int mi_rprev(MI_INFO *info, uchar *buf, int inx)
int error,changed;
register uint flag;
MYISAM_SHARE *share=info->s;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("mi_rprev");
if ((inx = _mi_check_index(info,inx)) < 0)
@@ -51,31 +52,55 @@ int mi_rprev(MI_INFO *info, uchar *buf, int inx)
error=_mi_search(info,share->keyinfo+inx,info->lastkey,
USE_WHOLE_KEY, flag, share->state.key_root[inx]);
- if (share->concurrent_insert)
+ if (!error)
{
- if (!error)
+ my_off_t cur_keypage= info->last_keypage;
+ while ((share->concurrent_insert &&
+ info->lastpos >= info->state->data_file_length) ||
+ (info->index_cond_func &&
+ (icp_res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
- while (info->lastpos >= info->state->data_file_length)
+ /*
+ If we are at the last (i.e. first?) key on the key page,
+ allow writers to access the index.
+ */
+ if (info->last_keypage != cur_keypage)
{
- /* Skip rows that are inserted by other threads since we got a lock */
- if ((error=_mi_search_next(info,share->keyinfo+inx,info->lastkey,
- info->lastkey_length,
- SEARCH_SMALLER,
- share->state.key_root[inx])))
- break;
+ cur_keypage= info->last_keypage;
+ if (mi_yield_and_check_if_killed(info, inx))
+ {
+ error= 1;
+ break;
+ }
}
+
+ /*
+ Skip rows that are either inserted by other threads since
+ we got a lock or do not match pushed index conditions
+ */
+ if ((error=_mi_search_next(info,share->keyinfo+inx,info->lastkey,
+ info->lastkey_length,
+ SEARCH_SMALLER,
+ share->state.key_root[inx])))
+ break;
}
- rw_unlock(&share->key_root_lock[inx]);
}
+
+ if (share->concurrent_insert)
+ rw_unlock(&share->key_root_lock[inx]);
+
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_PREV_FOUND;
- if (error)
+
+ if (error || icp_res != ICP_MATCH)
{
+ fast_mi_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
my_errno=HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_mi_writeinfo(info);
DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info,info->lastpos,buf))
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 1f8399f25ca..13ee552e7ec 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -89,7 +89,10 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag,
&keypos,lastkey, &last_key);
if (flag == MI_FOUND_WRONG_KEY)
- DBUG_RETURN(-1);
+ {
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ }
nod_flag=mi_test_if_nod(buff);
maxpos=buff+mi_getint(buff)-1;
diff --git a/storage/myisam/mi_static.c b/storage/myisam/mi_static.c
index 27485e101ff..2d297ddc907 100644
--- a/storage/myisam/mi_static.c
+++ b/storage/myisam/mi_static.c
@@ -41,6 +41,7 @@ my_off_t myisam_max_temp_length= MAX_FILE_SIZE;
ulong myisam_bulk_insert_tree_size=8192*1024;
ulong myisam_data_pointer_size=4;
ulonglong myisam_mmap_size= SIZE_T_MAX, myisam_mmap_used= 0;
+my_bool (*mi_killed)(MI_INFO *)= mi_killed_standalone;
static int always_valid(const char *filename __attribute__((unused)))
{
diff --git a/storage/myisam/mi_test_all.sh b/storage/myisam/mi_test_all.sh
index 5989d9cfaf0..c6bc686e885 100755
--- a/storage/myisam/mi_test_all.sh
+++ b/storage/myisam/mi_test_all.sh
@@ -5,6 +5,7 @@
valgrind="valgrind --alignment=8 --leak-check=yes"
silent="-s"
+rm -f test1.TMD
if test -f mi_test1$MACH ; then suffix=$MACH ; else suffix=""; fi
./mi_test1$suffix $silent
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index f6f58236423..bf7e4d1ce41 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -25,6 +25,10 @@
#include <my_no_pthread.h>
#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#if defined(my_write) && !defined(MAP_TO_USE_RAID)
/* undef map from my_nosys; We need test-if-disk full */
#undef my_write
@@ -235,6 +239,7 @@ typedef struct st_mi_isam_share
rw_lock_t mmap_lock;
} MYISAM_SHARE;
+typedef ICP_RESULT (*index_cond_func_t)(void *param);
struct st_myisam_info
{
@@ -247,6 +252,7 @@ struct st_myisam_info
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
MEM_ROOT ft_memroot; /* used by the parser */
MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ void *external_ref; /* For MariaDB TABLE */
char *filename; /* parameter to open filename */
uchar *buff, /* Temp area for key */
*lastkey, *lastkey2; /* Last used search key */
@@ -304,6 +310,8 @@ struct st_myisam_info
my_bool page_changed;
/* If info->buff has to be reread for rnext */
my_bool buff_used;
+ index_cond_func_t index_cond_func; /* Index condition function */
+ void *index_cond_func_arg; /* parameter for the func */
#ifdef THREAD
THR_LOCK_DATA lock;
#endif
@@ -430,6 +438,7 @@ extern uint NEAR myisam_read_vec[], NEAR myisam_readnext_vec[];
extern uint myisam_quick_table_bits;
extern File myisam_log_file;
extern ulong myisam_pid;
+extern my_bool (*mi_killed)(MI_INFO *);
/* This is used by _mi_calc_xxx_key_length och _mi_store_key */
@@ -590,6 +599,8 @@ extern ulonglong mi_safe_mul(ulonglong a, ulonglong b);
extern int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf,
const uchar *oldrec, const uchar *newrec,
my_off_t pos);
+extern my_bool mi_yield_and_check_if_killed(MI_INFO *info, int inx);
+extern my_bool mi_killed_standalone(MI_INFO *);
struct st_sort_info;
@@ -648,9 +659,7 @@ enum myisam_log_commands
#define fast_mi_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _mi_writeinfo((INFO),0)
#define fast_mi_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _mi_readinfo((INFO),F_RDLCK,1)
-#ifdef __cplusplus
-extern "C" {
-#endif
+
extern uint _mi_get_block_info(MI_BLOCK_INFO *, File, my_off_t);
extern uint _mi_rec_pack(MI_INFO *info, uchar *to, const uchar *from);
extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
@@ -726,6 +735,7 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size);
int mi_munmap_file(MI_INFO *info);
void mi_remap_file(MI_INFO *info, my_off_t size);
+ICP_RESULT mi_check_index_cond(register MI_INFO *info, uint keynr, uchar *record);
/* Functions needed by mi_check */
int killed_ptr(HA_CHECK *param);
void mi_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...));
@@ -734,6 +744,8 @@ void mi_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...));
#ifdef THREAD
pthread_handler_t thr_find_all_keys(void *arg);
#endif
+extern void mi_set_index_cond_func(MI_INFO *info, index_cond_func_t func,
+ void *func_arg);
int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file,
ulonglong *dirty_part_map);
#ifdef __cplusplus
diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c
index f1b9e04c474..6c63f5afaef 100644
--- a/storage/myisam/myisampack.c
+++ b/storage/myisam/myisampack.c
@@ -576,7 +576,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
Create a global priority queue in preparation for making
temporary Huffman trees.
*/
- if (init_queue(&queue,256,0,0,compare_huff_elements,0))
+ if (init_queue(&queue, 256, 0, 0, compare_huff_elements, 0, 0, 0))
goto err;
/*
@@ -1511,7 +1511,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
if (queue.max_elements < found)
{
delete_queue(&queue);
- if (init_queue(&queue,found,0,0,compare_huff_elements,0))
+ if (init_queue(&queue,found, 0, 0, compare_huff_elements, 0, 0, 0))
return -1;
}
@@ -1615,8 +1615,7 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
Make a priority queue from the queue. Construct its index so that we
have a partially ordered tree.
*/
- for (i=found/2 ; i > 0 ; i--)
- _downheap(&queue,i);
+ queue_fix(&queue);
/* The Huffman algorithm. */
bytes_packed=0; bits_packed=0;
@@ -1627,12 +1626,9 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
Popping from a priority queue includes a re-ordering of the queue,
to get the next least incidence element to the top.
*/
- a=(HUFF_ELEMENT*) queue_remove(&queue,0);
- /*
- Copy the next least incidence element. The queue implementation
- reserves root[0] for temporary purposes. root[1] is the top.
- */
- b=(HUFF_ELEMENT*) queue.root[1];
+ a=(HUFF_ELEMENT*) queue_remove_top(&queue);
+ /* Copy the next least incidence element */
+ b=(HUFF_ELEMENT*) queue_top(&queue);
/* Get a new element from the element buffer. */
new_huff_el=huff_tree->element_buffer+found+i;
/* The new element gets the sum of the two least incidence elements. */
@@ -1654,8 +1650,8 @@ static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
Replace the copied top element by the new element and re-order the
queue.
*/
- queue.root[1]=(uchar*) new_huff_el;
- queue_replaced(&queue);
+ queue_top(&queue)= (uchar*) new_huff_el;
+ queue_replace_top(&queue);
}
huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
@@ -1786,8 +1782,7 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
Make a priority queue from the queue. Construct its index so that we
have a partially ordered tree.
*/
- for (i=(found+1)/2 ; i > 0 ; i--)
- _downheap(&queue,i);
+ queue_fix(&queue);
/* The Huffman algorithm. */
for (i=0 ; i < found-1 ; i++)
@@ -1801,12 +1796,9 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
incidence). Popping from a priority queue includes a re-ordering
of the queue, to get the next least incidence element to the top.
*/
- a= (my_off_t*) queue_remove(&queue, 0);
- /*
- Copy the next least incidence element. The queue implementation
- reserves root[0] for temporary purposes. root[1] is the top.
- */
- b= (my_off_t*) queue.root[1];
+ a= (my_off_t*) queue_remove_top(&queue);
+ /* Copy the next least incidence element. */
+ b= (my_off_t*) queue_top(&queue);
/* Create a new element in a local (automatic) buffer. */
new_huff_el= element_buffer + i;
/* The new element gets the sum of the two least incidence elements. */
@@ -1826,8 +1818,8 @@ static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
queue. This successively replaces the references to counts by
references to HUFF_ELEMENTs.
*/
- queue.root[1]=(uchar*) new_huff_el;
- queue_replaced(&queue);
+ queue_top(&queue)= (uchar*) new_huff_el;
+ queue_replace_top(&queue);
}
DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
}
diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c
index 5274d8da5ca..db589759128 100644
--- a/storage/myisam/sort.c
+++ b/storage/myisam/sort.c
@@ -922,7 +922,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0,
(int (*)(void*, uchar *,uchar*)) info->key_cmp,
- (void*) info))
+ (void*) info, 0, 0))
DBUG_RETURN(1); /* purecov: inspected */
for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
@@ -971,7 +971,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
uchar *base= buffpek->base;
uint max_keys=buffpek->max_keys;
- VOID(queue_remove(&queue,0));
+ VOID(queue_remove_top(&queue));
/* Put room used by buffer to use in other buffer */
for (refpek= (BUFFPEK**) &queue_top(&queue);
@@ -996,7 +996,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
}
else if (error == -1)
goto err; /* purecov: inspected */
- queue_replaced(&queue); /* Top element has been replaced */
+ queue_replace_top(&queue); /* Top element has been replaced */
}
}
buffpek=(BUFFPEK*) queue_top(&queue);
diff --git a/storage/myisammrg/myrg_queue.c b/storage/myisammrg/myrg_queue.c
index d2579053784..cf862b53b86 100644
--- a/storage/myisammrg/myrg_queue.c
+++ b/storage/myisammrg/myrg_queue.c
@@ -52,7 +52,7 @@ int _myrg_init_queue(MYRG_INFO *info,int inx,enum ha_rkey_function search_flag)
if (init_queue(q,info->tables, 0,
(myisam_readnext_vec[search_flag] == SEARCH_SMALLER),
queue_key_cmp,
- info->open_tables->table->s->keyinfo[inx].seg))
+ info->open_tables->table->s->keyinfo[inx].seg, 0, 0))
error=my_errno;
}
else
@@ -60,7 +60,7 @@ int _myrg_init_queue(MYRG_INFO *info,int inx,enum ha_rkey_function search_flag)
if (reinit_queue(q,info->tables, 0,
(myisam_readnext_vec[search_flag] == SEARCH_SMALLER),
queue_key_cmp,
- info->open_tables->table->s->keyinfo[inx].seg))
+ info->open_tables->table->s->keyinfo[inx].seg, 0, 0))
error=my_errno;
}
}
diff --git a/storage/myisammrg/myrg_rnext.c b/storage/myisammrg/myrg_rnext.c
index 82d5cbf38b1..1442ee08dd4 100644
--- a/storage/myisammrg/myrg_rnext.c
+++ b/storage/myisammrg/myrg_rnext.c
@@ -32,7 +32,7 @@ int myrg_rnext(MYRG_INFO *info, uchar *buf, int inx)
{
if (err == HA_ERR_END_OF_FILE)
{
- queue_remove(&(info->by_key),0);
+ queue_remove_top(&(info->by_key));
if (!info->by_key.elements)
return HA_ERR_END_OF_FILE;
}
@@ -43,7 +43,7 @@ int myrg_rnext(MYRG_INFO *info, uchar *buf, int inx)
{
/* Found here, adding to queue */
queue_top(&(info->by_key))=(uchar *)(info->current_table);
- queue_replaced(&(info->by_key));
+ queue_replace_top(&(info->by_key));
}
/* now, mymerge's read_next is as simple as one queue_top */
diff --git a/storage/myisammrg/myrg_rnext_same.c b/storage/myisammrg/myrg_rnext_same.c
index ad7bbfb0f6e..14b41dbe756 100644
--- a/storage/myisammrg/myrg_rnext_same.c
+++ b/storage/myisammrg/myrg_rnext_same.c
@@ -29,7 +29,7 @@ int myrg_rnext_same(MYRG_INFO *info, uchar *buf)
{
if (err == HA_ERR_END_OF_FILE)
{
- queue_remove(&(info->by_key),0);
+ queue_remove_top(&(info->by_key));
if (!info->by_key.elements)
return HA_ERR_END_OF_FILE;
}
@@ -40,7 +40,7 @@ int myrg_rnext_same(MYRG_INFO *info, uchar *buf)
{
/* Found here, adding to queue */
queue_top(&(info->by_key))=(uchar *)(info->current_table);
- queue_replaced(&(info->by_key));
+ queue_replace_top(&(info->by_key));
}
/* now, mymerge's read_next is as simple as one queue_top */
diff --git a/storage/myisammrg/myrg_rprev.c b/storage/myisammrg/myrg_rprev.c
index 66c94974940..0c560a0b73d 100644
--- a/storage/myisammrg/myrg_rprev.c
+++ b/storage/myisammrg/myrg_rprev.c
@@ -32,7 +32,7 @@ int myrg_rprev(MYRG_INFO *info, uchar *buf, int inx)
{
if (err == HA_ERR_END_OF_FILE)
{
- queue_remove(&(info->by_key),0);
+ queue_remove_top(&(info->by_key));
if (!info->by_key.elements)
return HA_ERR_END_OF_FILE;
}
@@ -43,7 +43,7 @@ int myrg_rprev(MYRG_INFO *info, uchar *buf, int inx)
{
/* Found here, adding to queue */
queue_top(&(info->by_key))=(uchar *)(info->current_table);
- queue_replaced(&(info->by_key));
+ queue_replace_top(&(info->by_key));
}
/* now, mymerge's read_prev is as simple as one queue_top */
diff --git a/storage/pbxt/src/filesys_xt.cc b/storage/pbxt/src/filesys_xt.cc
index 31e2cf961b6..ebe0ed146b0 100644
--- a/storage/pbxt/src/filesys_xt.cc
+++ b/storage/pbxt/src/filesys_xt.cc
@@ -369,8 +369,7 @@ xtPublic xtBool xt_fs_stat(XTThreadPtr self, char *path, off_t *size, struct tim
CloseHandle(fh);
if (size)
*size = (off_t) info.nFileSizeLow | (((off_t) info.nFileSizeHigh) << 32);
- if (mod_time)
- mod_time->tv.ft = info.ftLastWriteTime;
+ memset(mod_time, 0, sizeof(*mod_time));
#else
struct stat sb;
diff --git a/storage/pbxt/src/ha_pbxt.cc b/storage/pbxt/src/ha_pbxt.cc
index 8640c079a37..b5cc7d7f34a 100644
--- a/storage/pbxt/src/ha_pbxt.cc
+++ b/storage/pbxt/src/ha_pbxt.cc
@@ -108,6 +108,9 @@ static int pbxt_end(void *p);
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag);
static void pbxt_drop_database(handlerton *hton, char *path);
static int pbxt_close_connection(handlerton *hton, THD* thd);
+#ifdef MARIADB_BASE_VERSION
+static void pbxt_commit_ordered(handlerton *hton, THD *thd, bool all);
+#endif
static int pbxt_commit(handlerton *hton, THD *thd, bool all);
static int pbxt_rollback(handlerton *hton, THD *thd, bool all);
static int pbxt_prepare(handlerton *hton, THD *thd, bool all);
@@ -1147,6 +1150,9 @@ static int pbxt_init(void *p)
pbxt_hton->state = SHOW_OPTION_YES;
pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
+#ifdef MARIADB_BASE_VERSION
+ pbxt_hton->commit_ordered = pbxt_commit_ordered;
+#endif
pbxt_hton->commit = pbxt_commit; /* commit */
pbxt_hton->rollback = pbxt_rollback; /* rollback */
if (pbxt_support_xa) {
@@ -1484,6 +1490,29 @@ static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
return err;
}
+#ifdef MARIADB_BASE_VERSION
+/*
+ * Quickly commit the transaction to memory and make it visible to others.
+ * The remaining part of commit will happen later, in pbxt_commit().
+ */
+static void pbxt_commit_ordered(handlerton *hton, THD *thd, bool all)
+{
+ XTThreadPtr self;
+
+ if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
+ XT_PRINT2(self, "%s pbxt_commit_ordered all=%d\n", all ? "END CONN XACT" : "END STAT", all);
+
+ if (self->st_xact_data) {
+ if (all || self->st_auto_commit) {
+ self->st_commit_ordered = TRUE;
+ self->st_writer = self->st_xact_writer;
+ self->st_delayed_error= !xt_xn_commit_fast(self, self->st_writer);
+ }
+ }
+ }
+}
+#endif
+
/*
* Commit the PBXT transaction of the given thread.
* thd is the MySQL thread structure.
@@ -1512,7 +1541,13 @@ static int pbxt_commit(handlerton *hton, THD *thd, bool all)
if (all || self->st_auto_commit) {
XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
- if (!xt_xn_commit(self))
+ if (self->st_commit_ordered) {
+ self->st_commit_ordered = FALSE;
+ err = !xt_xn_commit_slow(self, self->st_writer) || self->st_delayed_error;
+ } else {
+ err = !xt_xn_commit(self);
+ }
+ if (err)
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
}
}
@@ -6064,7 +6099,7 @@ static MYSQL_SYSVAR_INT(max_threads, pbxt_max_threads,
NULL, NULL, 0, 0, 20000, 1);
#endif
-#ifndef DEBUG
+#if !defined(DEBUG) || defined(MARIADB_BASE_VERSION)
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
PLUGIN_VAR_OPCMDARG,
"Enable PBXT support for the XA two-phase commit, default is enabled",
diff --git a/storage/pbxt/src/pthread_xt.cc b/storage/pbxt/src/pthread_xt.cc
index 64c03db734c..d704e977c21 100755
--- a/storage/pbxt/src/pthread_xt.cc
+++ b/storage/pbxt/src/pthread_xt.cc
@@ -396,48 +396,7 @@ xtPublic int xt_p_cond_wait(xt_cond_type *cond, xt_mutex_type *mutex)
xtPublic int xt_p_cond_timedwait(xt_cond_type *cond, xt_mutex_type *mt, struct timespec *abstime)
{
- pthread_mutex_t *mutex = &mt->mt_cs;
- int result;
- long timeout;
- union ft64 now;
-
- if (abstime != NULL) {
- GetSystemTimeAsFileTime(&now.ft);
-
- timeout = (long)((abstime->tv.i64 - now.i64) / 10000);
- if (timeout < 0)
- timeout = 0L;
- if (timeout > abstime->max_timeout_msec)
- timeout = abstime->max_timeout_msec;
- }
- else
- timeout= INFINITE;
-
- WaitForSingleObject(cond->broadcast_block_event, INFINITE);
-
- EnterCriticalSection(&cond->lock_waiting);
- cond->waiting++;
- LeaveCriticalSection(&cond->lock_waiting);
-
- LeaveCriticalSection(mutex);
-
- result= WaitForMultipleObjects(2, cond->events, FALSE, timeout);
-
- EnterCriticalSection(&cond->lock_waiting);
- cond->waiting--;
-
- if (cond->waiting == 0) {
- /* The last waiter must reset the broadcast
- * state (whther there was a broadcast or not)!
- */
- ResetEvent(cond->events[xt_cond_type::BROADCAST]);
- SetEvent(cond->broadcast_block_event);
- }
- LeaveCriticalSection(&cond->lock_waiting);
-
- EnterCriticalSection(mutex);
-
- return result == WAIT_TIMEOUT ? ETIMEDOUT : 0;
+ return pthread_cond_timedwait(cond, &mt->mt_cs, abstime);
}
xtPublic int xt_p_join(pthread_t thread, void **value)
diff --git a/storage/pbxt/src/thread_xt.cc b/storage/pbxt/src/thread_xt.cc
index 985d33840d3..16abd96f983 100644
--- a/storage/pbxt/src/thread_xt.cc
+++ b/storage/pbxt/src/thread_xt.cc
@@ -54,6 +54,9 @@ void xt_db_exit_thread(XTThreadPtr self);
static void thr_accumulate_statistics(XTThreadPtr self);
+#ifdef _WIN32
+#include <my_sys.h>
+#endif
/*
* -----------------------------------------------------------------------
* THREAD GLOBALS
@@ -1962,18 +1965,7 @@ xtPublic xtBool xt_timed_wait_cond(XTThreadPtr self, xt_cond_type *cond, xt_mute
XTThreadPtr me = self ? self : xt_get_self();
#ifdef XT_WIN
- union ft64 now;
-
- GetSystemTimeAsFileTime(&now.ft);
-
- /* System time is measured in 100ns units.
- * This calculation will be reversed by the Windows implementation
- * of pthread_cond_timedwait(), in order to extract the
- * milli-second timeout!
- */
- abstime.tv.i64 = now.i64 + (milli_sec * 10000);
-
- abstime.max_timeout_msec = milli_sec;
+ set_timespec_nsec(abstime, 1000000ULL* milli_sec);
#else
struct timeval now;
u_llong micro_sec;
diff --git a/storage/pbxt/src/thread_xt.h b/storage/pbxt/src/thread_xt.h
index a07f7b7ae01..282df46a5d5 100644
--- a/storage/pbxt/src/thread_xt.h
+++ b/storage/pbxt/src/thread_xt.h
@@ -299,6 +299,9 @@ typedef struct XTThread {
xtBool st_stat_ended; /* TRUE if the statement was ended. */
xtBool st_stat_trans; /* TRUE if a statement transaction is running (started on UPDATE). */
xtBool st_stat_modify; /* TRUE if the statement is an INSERT/UPDATE/DELETE */
+ xtBool st_commit_ordered; /* TRUE if we have run commit_ordered() */
+ xtBool st_delayed_error; /* TRUE if we got an error in commit_ordered() */
+ xtBool st_writer; /* Copy of thread->st_xact_writer (which is clobbered by xlog_append()) */
#ifdef XT_IMPLEMENT_NO_ACTION
XTBasicListRec st_restrict_list; /* These records have been deleted and should have no reference. */
#endif
diff --git a/storage/pbxt/src/xaction_xt.cc b/storage/pbxt/src/xaction_xt.cc
index 0d94449c3da..fd7ae88a4ae 100644
--- a/storage/pbxt/src/xaction_xt.cc
+++ b/storage/pbxt/src/xaction_xt.cc
@@ -1286,27 +1286,61 @@ xtPublic xtBool xt_xn_begin(XTThreadPtr self)
return OK;
}
-static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
+static void xn_end_release_locks(XTThreadPtr thread)
+{
+ XTXactDataPtr xact = thread->st_xact_data;
+ XTDatabaseHPtr db = thread->st_database;
+ ASSERT_NS(xact);
+
+ /* {REMOVE-LOCKS} Drop locks if you have any: */
+ thread->st_lock_list.xt_remove_all_locks(db, thread);
+
+ /* Do this afterwards to make sure the sweeper
+ * does not cleanup transactions start cleaning up
+ * before any transactions that were waiting for
+ * this transaction have completed!
+ */
+ xact->xd_end_xn_id = db->db_xn_curr_id;
+
+ /* Now you can sweep! */
+ xact->xd_flags |= XT_XN_XAC_SWEEP;
+}
+
+/* The commit is split into two phases: one "fast" for MariaDB commit_ordered(),
+ * and one "slow" for commit(). When not using internal 2pc, there is only one
+ * call combining both phases.
+ */
+
+enum {
+ XN_END_PHASE_FAST = 1,
+ XN_END_PHASE_SLOW = 2,
+ XN_END_PHASE_BOTH = 3
+};
+
+static xtBool xn_end_xact(XTThreadPtr thread, u_int status, xtBool writer, int phase)
{
XTXactDataPtr xact;
xtBool ok = TRUE;
+ xtBool err;
ASSERT_NS(thread->st_xact_data);
if ((xact = thread->st_xact_data)) {
XTDatabaseHPtr db = thread->st_database;
xtXactID xn_id = xact->xd_start_xn_id;
- xtBool writer;
- if ((writer = thread->st_xact_writer)) {
+ if (writer) {
/* The transaction wrote something: */
XTXactEndEntryDRec entry;
xtWord4 sum;
- sum = XT_CHECKSUM4_XACT(xn_id) ^ XT_CHECKSUM4_XACT(0);
- entry.xe_status_1 = status;
- entry.xe_checksum_1 = XT_CHECKSUM_1(sum);
- XT_SET_DISK_4(entry.xe_xact_id_4, xn_id);
- XT_SET_DISK_4(entry.xe_not_used_4, 0);
+ if (phase & XN_END_PHASE_FAST)
+ {
+ sum = XT_CHECKSUM4_XACT(xn_id) ^ XT_CHECKSUM4_XACT(0);
+ entry.xe_status_1 = status;
+ entry.xe_checksum_1 = XT_CHECKSUM_1(sum);
+ XT_SET_DISK_4(entry.xe_xact_id_4, xn_id);
+ XT_SET_DISK_4(entry.xe_not_used_4, 0);
+ }
#ifdef XT_IMPLEMENT_NO_ACTION
/* This will check any resticts that have been delayed to the end of the statement. */
@@ -1318,20 +1352,35 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
}
#endif
- /* Flush the data log: */
- if (!thread->st_dlog_buf.dlb_flush_log(TRUE, thread)) {
+ /* Flush the data log (in the "fast" case we already did it in prepare: */
+ if ((phase & XN_END_PHASE_SLOW) && !thread->st_dlog_buf.dlb_flush_log(TRUE, thread)) {
ok = FALSE;
status = XT_LOG_ENT_ABORT;
}
/* Write and flush the transaction log: */
- if (!xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, xt_db_flush_log_at_trx_commit)) {
+ if (phase == XN_END_PHASE_FAST) {
+ /* Fast phase, delay any write or flush to later. */
+ err = !xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, XT_XLOG_NO_WRITE_NO_FLUSH);
+ } else if (phase == XN_END_PHASE_SLOW) {
+ /* We already appended the commit record in the fast phase.
+ * Now just call with empty record to ensure we write/flush
+ * the log as needed for this commit.
+ */
+ err = !xt_xlog_log_data(thread, 0, NULL, xt_db_flush_log_at_trx_commit);
+ } else /* phase == XN_END_PHASE_BOTH */ {
+ /* Both phases at once, append commit record and write/flush normally. */
+ ASSERT_NS(phase == XN_END_PHASE_BOTH);
+ err = !xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, xt_db_flush_log_at_trx_commit);
+ }
+
+ if (err) {
ok = FALSE;
status = XT_LOG_ENT_ABORT;
/* Make sure this is done, if we failed to log
* the transction end!
*/
- if (thread->st_xact_writer) {
+ if (writer) {
/* Adjust this in case of error, but don't forget
* to lock!
*/
@@ -1346,46 +1395,46 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
}
}
- /* Setting this flag completes the transaction,
- * Do this before we release the locks, because
- * the unlocked transactions expect the
- * transaction they are waiting for to be
- * gone!
- */
- xact->xd_end_time = ++db->db_xn_end_time;
- if (status == XT_LOG_ENT_COMMIT) {
- thread->st_statistics.st_commits++;
- xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
- }
- else {
- thread->st_statistics.st_rollbacks++;
- xact->xd_flags |= XT_XN_XAC_ENDED;
+ if (phase & XN_END_PHASE_FAST) {
+ /* Setting this flag completes the transaction,
+ * Do this before we release the locks, because
+ * the unlocked transactions expect the
+ * transaction they are waiting for to be
+ * gone!
+ */
+ xact->xd_end_time = ++db->db_xn_end_time;
+ if (status == XT_LOG_ENT_COMMIT) {
+ thread->st_statistics.st_commits++;
+ xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
+ }
+ else {
+ thread->st_statistics.st_rollbacks++;
+ xact->xd_flags |= XT_XN_XAC_ENDED;
+ }
}
- /* {REMOVE-LOCKS} Drop locks is you have any: */
- thread->st_lock_list.xt_remove_all_locks(db, thread);
-
- /* Do this afterwards to make sure the sweeper
- * does not cleanup transactions start cleaning up
- * before any transactions that were waiting for
- * this transaction have completed!
+ /* Be as fast as possible in the "fast" path, as we want to be as
+ * fast as possible here (we will release slow locks immediately
+ * after in the "slow" part).
+ * ToDo: If we ran the fast part, the slow part could release locks
+ * _before_ fsync(), rather than after.
*/
- xact->xd_end_xn_id = db->db_xn_curr_id;
+ if (!(phase & XN_END_PHASE_SLOW))
+ return ok;
- /* Now you can sweep! */
- xact->xd_flags |= XT_XN_XAC_SWEEP;
+ xn_end_release_locks(thread);
}
else {
/* Read-only transaction can be removed, immediately */
- xact->xd_end_time = ++db->db_xn_end_time;
- xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
-
- /* Drop locks is you have any: */
- thread->st_lock_list.xt_remove_all_locks(db, thread);
+ if (phase & XN_END_PHASE_FAST) {
+ xact->xd_end_time = ++db->db_xn_end_time;
+ xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
- xact->xd_end_xn_id = db->db_xn_curr_id;
+ if (!(phase & XN_END_PHASE_SLOW))
+ return ok;
+ }
- xact->xd_flags |= XT_XN_XAC_SWEEP;
+ xn_end_release_locks(thread);
if (xt_xn_delete_xact(db, xn_id, thread)) {
if (db->db_xn_min_ram_id == xn_id)
@@ -1477,12 +1526,22 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
xtPublic xtBool xt_xn_commit(XTThreadPtr thread)
{
- return xn_end_xact(thread, XT_LOG_ENT_COMMIT);
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, thread->st_xact_writer, XN_END_PHASE_BOTH);
+}
+
+xtPublic xtBool xt_xn_commit_fast(XTThreadPtr thread, xtBool writer)
+{
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, writer, XN_END_PHASE_FAST);
+}
+
+xtPublic xtBool xt_xn_commit_slow(XTThreadPtr thread, xtBool writer)
+{
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, writer, XN_END_PHASE_SLOW);
}
xtPublic xtBool xt_xn_rollback(XTThreadPtr thread)
{
- return xn_end_xact(thread, XT_LOG_ENT_ABORT);
+ return xn_end_xact(thread, XT_LOG_ENT_ABORT, thread->st_xact_writer, XN_END_PHASE_BOTH);
}
xtPublic xtBool xt_xn_log_tab_id(XTThreadPtr self, xtTableID tab_id)
diff --git a/storage/pbxt/src/xaction_xt.h b/storage/pbxt/src/xaction_xt.h
index e679a0f38f0..cd350200506 100644
--- a/storage/pbxt/src/xaction_xt.h
+++ b/storage/pbxt/src/xaction_xt.h
@@ -193,6 +193,8 @@ void xt_wakeup_sweeper(struct XTDatabase *db);
xtBool xt_xn_begin(struct XTThread *self);
xtBool xt_xn_commit(struct XTThread *self);
+xtBool xt_xn_commit_fast(struct XTThread *self, xtBool writer);
+xtBool xt_xn_commit_slow(struct XTThread *self, xtBool writer);
xtBool xt_xn_rollback(struct XTThread *self);
xtBool xt_xn_log_tab_id(struct XTThread *self, xtTableID tab_id);
int xt_xn_status(struct XTOpenTable *ot, xtXactID xn_id, xtRecordID rec_id);
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 50f2dba5cf9..8426804615d 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -21,8 +21,6 @@ IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
SET(WIN64 TRUE)
ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
-ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER)
-
# Include directories under xtradb
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include
${CMAKE_SOURCE_DIR}/storage/xtradb/handler)
@@ -83,9 +81,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c)
-# Windows atomics do not perform well. Disable Windows atomics by default.
-# See bug#52102 for details.
-#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
-ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
+
MYSQL_STORAGE_ENGINE(XTRADB)
diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c
index 020896b5739..5ea8056bed4 100644
--- a/storage/xtradb/buf/buf0buf.c
+++ b/storage/xtradb/buf/buf0buf.c
@@ -2661,7 +2661,7 @@ buf_page_get_gen(
ulint fix_type;
ibool must_read;
ulint retries = 0;
- mutex_t* block_mutex= 0;
+ mutex_t* block_mutex= NULL;
trx_t* trx = NULL;
ulint sec;
ulint ms;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index dfe98f09e4f..a511d764b24 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -32,7 +32,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/* TODO list for the InnoDB handler in 5.0:
- - Remove the flag trx->active_trans and look at trx->conc_state
+ - Remove the flag trx->active_flag & TRX_ACTIVE_IN_MYSQL and look
+ at trx->conc_state
- fix savepoint functions to use savepoint storage area
- Find out what kind of problems the OS X case-insensitivity causes to
table and database names; should we 'normalize' the names like we do
@@ -55,7 +56,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include <m_ctype.h>
#include <mysys_err.h>
#include <mysql/plugin.h>
-
+#ifdef _WIN32
+#include <io.h>
+#endif
/** @file ha_innodb.cc */
/* Include necessary InnoDB headers */
@@ -117,14 +120,18 @@ bool check_global_access(THD *thd, ulong want_access);
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
static pthread_mutex_t commit_cond_m;
static bool innodb_inited = 0;
+C_MODE_START
+static xtradb_icp_result_t index_cond_func_innodb(void *arg);
+C_MODE_END
+
+
+
#define INSIDE_HA_INNOBASE_CC
/* In the Windows plugin, the return value of current_thd is
@@ -243,6 +250,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -508,6 +516,17 @@ bool innobase_show_status(handlerton *hton, THD* thd,
stat_print_fn* stat_print,
enum ha_stat_type stat_type);
+/* Enable / disable checkpoints */
+static int innobase_checkpoint_state(handlerton *hton, bool disable)
+{
+ if (disable)
+ (void) log_disable_checkpoint();
+ else
+ log_enable_checkpoint();
+ return 0;
+}
+
+
/*****************************************************************//**
Commits a transaction in an InnoDB database. */
static
@@ -868,6 +887,9 @@ convert_error_code_to_mysql(
case DB_RECORD_NOT_FOUND:
return(HA_ERR_NO_ACTIVE_RECORD);
+ case DB_SEARCH_ABORTED_BY_USER:
+ return(HA_ERR_ABORTED_BY_USER);
+
case DB_DEADLOCK:
/* Since we rolled back the whole transaction, we must
tell it also to MySQL so that MySQL knows to empty the
@@ -1219,7 +1241,28 @@ innobase_mysql_tmpfile(void)
will be passed to fdopen(), it will be closed by invoking
fclose(), which in turn will invoke close() instead of
my_close(). */
+#ifdef _WIN32
+ /* Note that on Windows, the integer returned by mysql_tmpfile
+ has no relation to C runtime file descriptor. Here, we need
+ to call my_get_osfhandle to get the HANDLE and then convert it
+ to C runtime filedescriptor. */
+ {
+ HANDLE hFile = my_get_osfhandle(fd);
+ HANDLE hDup;
+ BOOL bOK =
+ DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(),
+ &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
+ if(bOK) {
+ fd2 = _open_osfhandle((intptr_t)hDup,0);
+ }
+ else {
+ my_osmaperr(GetLastError());
+ fd2 = -1;
+ }
+ }
+#else
fd2 = dup(fd);
+#endif
if (fd2 < 0) {
DBUG_PRINT("error",("Got error %d on dup",fd2));
my_errno=errno;
@@ -1386,7 +1429,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1445,8 +1487,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1738,10 +1778,10 @@ innobase_query_caching_of_table_permitted(
/* The call of row_search_.. will start a new transaction if it is
not yet started */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
@@ -2011,11 +2051,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* Set the MySQL flag to mark that there is an active transaction */
- if (prebuilt->trx->active_trans == 0) {
+ if ((prebuilt->trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, user_thd);
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
/* We did the necessary inits in this function, no need to repeat them
@@ -2066,12 +2106,14 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
+ innobase_hton->checkpoint_state= innobase_checkpoint_state;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
@@ -2595,7 +2637,6 @@ skip_overwrite:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_cond_init(&commit_cond, NULL);
@@ -2650,7 +2691,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_cond_destroy(&commit_cond);
@@ -2771,14 +2811,118 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(hton, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
DBUG_RETURN(0);
}
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ pthread_cond_wait(&commit_cond,
+ &commit_cond_m);
+ pthread_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ pthread_cond_signal(&commit_cond);
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
+ && trx->conc_state != TRX_NOT_STARTED) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx->active_flag |= TRX_ACTIVE_COMMIT_ORDERED;
+
+ DBUG_VOID_RETURN;
+}
+
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@@ -2804,11 +2948,12 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch &&
+ (trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
trx_search_latch_release_if_reserved(trx);
}
- /* The flag trx->active_trans is set to 1 in
+ /* The flag TRX_ACTIVE_IN_MYSQL in trx->active_flag is set in
1. ::external_lock(),
2. ::start_stmt(),
@@ -2818,81 +2963,33 @@ innobase_commit(
6. innobase_start_trx_and_assign_read_view(),
7. ::transactional_table_lock()
- and it is only set to 0 in a commit or a rollback. If it is 0 we know
+ and it is only cleared in a commit or a rollback. If it is unset we know
there cannot be resources to be freed and we could return immediately.
For the time being, we play safe and do the cleanup though there should
be nothing to clean up. */
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL== 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
+
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if ((trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
/* We just mark the SQL statement ended and do not do a
@@ -2965,7 +3062,7 @@ innobase_rollback(
|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
error = trx_rollback_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
@@ -3109,7 +3206,7 @@ innobase_savepoint(
innobase_release_stat_resources(trx);
/* cannot happen outside of transaction */
- DBUG_ASSERT(trx->active_trans);
+ DBUG_ASSERT(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
/* TODO: use provided savepoint data area to store savepoint data */
char name[64];
@@ -3139,11 +3236,11 @@ innobase_close_connection(
ut_a(trx);
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
@@ -3271,13 +3368,16 @@ UNIV_INTERN
ulong
ha_innobase::index_flags(
/*=====================*/
- uint,
- uint,
- bool)
+ uint index,
+ uint part,
+ bool all_parts)
const
{
- return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
- | HA_READ_RANGE | HA_KEYREAD_ONLY);
+ ulong extra_flag= 0;
+ if (table && index == table->s->primary_key)
+ extra_flag= HA_CLUSTERED_INDEX;
+ return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
+ | HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
}
/****************************************************************//**
@@ -4321,12 +4421,24 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
case MYSQL_TYPE_YEAR:
case MYSQL_TYPE_NEWDATE:
+ return(DATA_INT);
+
case MYSQL_TYPE_TIME:
+ case MYSQL_TYPE_DATETIME:
case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
+ /*
+ XtraDB should ideally just check field->keytype() and never
+ field->type(). The following check is here to only
+ change the new hires datetime/timestamp/time fields to
+ use DATA_FIXBINARY. We can't convert this function to
+ just test for field->keytype() as then the check if a
+ table is compatible will fail for old tables.
+ */
+ if (field->key_type() == HA_KEYTYPE_BINARY)
+ return(DATA_FIXBINARY);
+ return(DATA_INT);
case MYSQL_TYPE_FLOAT:
return(DATA_FLOAT);
case MYSQL_TYPE_DOUBLE:
@@ -4340,10 +4452,7 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_LONG_BLOB:
return(DATA_BLOB);
case MYSQL_TYPE_NULL:
- /* MySQL currently accepts "NULL" datatype, but will
- reject such datatype in the next release. We will cope
- with it and not trigger assertion failure in 5.1 */
- break;
+ return(DATA_FIXBINARY);
default:
ut_error;
}
@@ -4668,8 +4777,9 @@ build_template(
THD* thd, /*!< in: current user thread, used
only if templ_type is
ROW_MYSQL_REC_FIELDS */
- TABLE* table, /*!< in: MySQL table */
- uint templ_type) /*!< in: ROW_MYSQL_WHOLE_ROW or
+ TABLE* table, /* in: MySQL table */
+ ha_innobase* file, /* in: ha_innobase handler */
+ uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or
ROW_MYSQL_REC_FIELDS */
{
dict_index_t* index;
@@ -4683,7 +4793,9 @@ build_template(
ulint sql_idx, innodb_idx=0;
/* byte offset of the end of last requested column */
ulint mysql_prefix_len = 0;
-
+ ibool do_idx_cond_push= FALSE;
+ ibool need_second_pass= FALSE;
+
if (prebuilt->select_lock_type == LOCK_X) {
/* We always retrieve the whole clustered index record if we
use exclusive row level locks, for example, if the read is
@@ -4753,6 +4865,16 @@ build_template(
prebuilt->templ_contains_blob = FALSE;
+
+ /*
+ Setup index condition pushdown (note: we don't need to check if
+ this is a scan on primary key as that is checked in idx_cond_push)
+ */
+ if (file->active_index == file->pushed_idx_cond_keyno &&
+ file->active_index != MAX_KEY &&
+ templ_type == ROW_MYSQL_REC_FIELDS)
+ do_idx_cond_push= need_second_pass= TRUE;
+
/* Note that in InnoDB, i is the column number. MySQL calls columns
'fields'. */
for (sql_idx = 0; sql_idx < n_fields; sql_idx++) {
@@ -4766,6 +4888,8 @@ build_template(
and which we can skip. */
register const ibool index_contains_field =
dict_index_contains_col_or_prefix(index, innodb_idx);
+ register const ibool index_covers_field =
+ field->part_of_key.is_set(file->active_index);
if (!index_contains_field && prebuilt->read_just_key) {
/* If this is a 'key read', we do not need
@@ -4798,8 +4922,12 @@ build_template(
/* This field is not needed in the query, skip it */
goto skip_field;
- }
include_field:
+ if (do_idx_cond_push &&
+ ((need_second_pass && !index_covers_field) ||
+ (!need_second_pass && index_covers_field)))
+ goto skip_field;
+ }
n_requested_fields++;
templ->col_no = innodb_idx;
@@ -4854,6 +4982,13 @@ include_field:
prebuilt->templ_contains_blob = TRUE;
}
skip_field:
+ if (need_second_pass && (sql_idx+1 == n_fields))
+ {
+ prebuilt->n_index_fields= n_requested_fields;
+ need_second_pass= FALSE;
+ sql_idx= (~(ulint)0); /* to start from 0 */
+ innodb_idx= (~(ulint)0); /* to start from 0 */ ///psergey-merge-merge-last-change
+ }
if (field->stored_in_db) {
innodb_idx++;
}
@@ -4862,12 +4997,23 @@ skip_field:
prebuilt->n_template = n_requested_fields;
prebuilt->mysql_prefix_len = mysql_prefix_len;
+ if (do_idx_cond_push)
+ {
+ prebuilt->idx_cond_func= index_cond_func_innodb;
+ prebuilt->idx_cond_func_arg= file;
+ }
+ else
+ {
+ prebuilt->idx_cond_func= NULL;
+ prebuilt->n_index_fields= n_requested_fields;
+ }
+
if (index != clust_index && prebuilt->need_to_access_clustered) {
/* Change rec_field_no's to correspond to the clustered index
record */
- for (ulint i = 0; i < n_requested_fields; i++) {
+ for (ulint i = do_idx_cond_push? prebuilt->n_index_fields : 0;
+ i < n_requested_fields; i++) {
templ = prebuilt->mysql_template + i;
-
templ->rec_field_no = templ->clust_rec_field_no;
}
}
@@ -5068,7 +5214,7 @@ no_commit:
/* Altering to InnoDB format */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* We will need an IX lock on the destination table. */
prebuilt->sql_stat_start = TRUE;
} else {
@@ -5084,7 +5230,7 @@ no_commit:
locks, so they have to be acquired again. */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* Re-acquire the table lock on the source table. */
row_lock_table_for_mysql(prebuilt, src_table, mode);
/* We will need an IX lock on the destination table. */
@@ -5130,7 +5276,7 @@ no_commit:
/* Build the template used in converting quickly between
the two database formats */
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
+ build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW);
}
innodb_srv_conc_enter_innodb(prebuilt->trx);
@@ -5669,6 +5815,8 @@ ha_innobase::index_end(void)
int error = 0;
DBUG_ENTER("index_end");
active_index=MAX_KEY;
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
DBUG_RETURN(error);
}
@@ -5829,7 +5977,8 @@ ha_innobase::index_read(
necessarily prebuilt->index, but can also be the clustered index */
if (prebuilt->sql_stat_start) {
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
+ build_template(prebuilt, user_thd, table, this,
+ ROW_MYSQL_REC_FIELDS);
}
if (key_ptr) {
@@ -6044,7 +6193,7 @@ ha_innobase::change_active_index(
the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
copying. Starting from MySQL-4.1 we use a more efficient flag here. */
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
+ build_template(prebuilt, user_thd, table, this, ROW_MYSQL_REC_FIELDS);
DBUG_RETURN(0);
}
@@ -8240,6 +8389,7 @@ ha_innobase::info_low(
}
stats.check_time = 0;
+ stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
if (stats.records == 0) {
stats.mean_rec_length = 0;
@@ -8438,7 +8588,7 @@ ha_innobase::check(
/* Build the template; we will use a dummy template
in index scans done in checking */
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
+ build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW);
}
if (prebuilt->table->ibd_file_missing) {
@@ -8931,6 +9081,11 @@ ha_innobase::extra(
break;
case HA_EXTRA_RESET_STATE:
reset_template(prebuilt);
+ /* Reset index condition pushdown state */
+ pushed_idx_cond= FALSE;
+ pushed_idx_cond_keyno= MAX_KEY;
+ prebuilt->idx_cond_func= NULL;
+ in_range_check_pushed_down= FALSE;
break;
case HA_EXTRA_NO_KEYREAD:
prebuilt->read_just_key = 0;
@@ -8978,6 +9133,13 @@ ha_innobase::reset()
reset_template(prebuilt);
+ /* Reset index condition pushdown state */
+ pushed_idx_cond_keyno= MAX_KEY;
+ pushed_idx_cond= NULL;
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
+ prebuilt->idx_cond_func= NULL;
+
/* TODO: This should really be reset in reset_template() but for now
it's safer to do it explicitly here. */
@@ -9061,10 +9223,10 @@ ha_innobase::start_stmt(
trx->detailed_error[0] = '\0';
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else {
innobase_register_stmt(ht, thd);
}
@@ -9162,10 +9324,10 @@ ha_innobase::external_lock(
/* Set the MySQL flag to mark that there is an active
transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else if (trx->n_mysql_tables_in_use == 0) {
innobase_register_stmt(ht, thd);
}
@@ -9263,7 +9425,7 @@ ha_innobase::external_lock(
prebuilt->used_in_HANDLER = FALSE;
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
- if (trx->active_trans != 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) != 0) {
innobase_commit(ht, thd, TRUE);
}
} else {
@@ -9348,10 +9510,10 @@ ha_innobase::transactional_table_lock(
/* MySQL is setting a new transactional table lock */
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
@@ -10405,10 +10567,11 @@ innobase_xa_prepare(
innobase_release_stat_resources(trx);
- if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0 &&
+ trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but trx->conc_state != "
- "TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0, but"
+ " trx->conc_state != TRX_NOT_STARTED");
}
if (all
@@ -10417,7 +10580,7 @@ innobase_xa_prepare(
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
- ut_ad(trx->active_trans);
+ ut_ad(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
error = (int) trx_prepare_for_mysql(trx);
} else {
@@ -10441,36 +10604,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
- if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
- /* choose group commit rather than binlog order */
- return(error);
- }
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
return(error);
}
@@ -11760,9 +11893,10 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint,
"Enable/Disable flushing along modified age. (none, reflex, [estimate], keep_average)",
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
-static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
+static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_deprecated_enable_unsafe_group_commit,
PLUGIN_VAR_RQCMDARG,
- "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
+ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine. "
+ "(Deprecated, and does nothing, group commit is always enabled in a safe way)",
NULL, NULL, 0, 0, 1, 0);
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
@@ -12105,3 +12239,106 @@ test_innobase_convert_name()
}
#endif /* UNIV_COMPILE_TEST_FUNCS */
+
+
+/****************************************************************************
+ * DS-MRR implementation
+ ***************************************************************************/
+
+/**
+ * Multi Range Read interface, DS-MRR calls
+ */
+
+int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
+{
+ return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_innobase::multi_range_read_next(range_id_t *range_info)
+{
+ return ds_mrr.dsmrr_next(range_info);
+}
+
+ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags,
+ COST_VECT *cost)
+{
+ /* See comments in ha_myisam::multi_range_read_info_const */
+ ds_mrr.init(this, table);
+
+ if (prebuilt->select_lock_type != LOCK_NONE)
+ *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+ ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
+ bufsz, flags, cost);
+ return res;
+}
+
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
+{
+ ds_mrr.init(this, table);
+ ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
+ return res;
+}
+
+int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
+
+/*
+ A helper function used only in index_cond_func_innodb
+*/
+
+bool ha_innobase::is_thd_killed()
+{
+ return thd_killed(user_thd);
+}
+
+/**
+ * Index Condition Pushdown interface implementation
+ */
+
+C_MODE_START
+
+/*
+ Index condition check function to be called from within Innobase.
+ See note on ICP_RESULT for return values description.
+*/
+
+static xtradb_icp_result_t index_cond_func_innodb(void *arg)
+{
+ ha_innobase *h= (ha_innobase*)arg;
+ if (h->is_thd_killed())
+ return XTRADB_ICP_ABORTED_BY_USER;
+
+ if (h->end_range)
+ {
+ if (h->compare_key2(h->end_range) > 0)
+ return XTRADB_ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
+ }
+ return h->pushed_idx_cond->val_int()? XTRADB_ICP_MATCH : XTRADB_ICP_NO_MATCH;
+}
+
+C_MODE_END
+
+
+Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
+{
+ if (keyno_arg != primary_key && prebuilt->select_lock_type != LOCK_X)
+ {
+ pushed_idx_cond_keyno= keyno_arg;
+ pushed_idx_cond= idx_cond_arg;
+ in_range_check_pushed_down= TRUE;
+ return NULL; /* Table handler will check the entire condition */
+ }
+ return idx_cond_arg; /* Table handler will not make any checks */
+}
+
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index c60a5eae19e..599b48287e3 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -222,6 +222,28 @@ class ha_innobase: public handler
/** @} */
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
+ bool check_if_supported_virtual_columns(void) { return TRUE; }
+public:
+ /**
+ * Multi Range Read interface
+ */
+ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ int multi_range_read_next(range_id_t *range_info);
+ ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
+ DsMrr_impl ds_mrr;
+
+ Item *idx_cond_push(uint keyno, Item* idx_cond);
+
+ /* An helper function for index_cond_func_innodb: */
+ bool is_thd_killed();
};
/* Some accessor functions which the InnoDB plugin needs, but which
@@ -240,16 +262,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
#endif
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
/**
Check if a user thread is a replication slave thread
@param thd user thread
@@ -290,6 +302,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */
}
+/** Get the file name and position of the MySQL binlog corresponding to the
+ * current commit.
+ */
+extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index f6160614686..d989ce87aa3 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -168,7 +168,7 @@ field_store_time_t(
my_time.time_type = MYSQL_TIMESTAMP_DATETIME;
#endif
- return(field->store_time(&my_time, MYSQL_TIMESTAMP_DATETIME));
+ return(field->store_time(&my_time));
}
/*******************************************************************//**
diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h
index c7fa6d2a444..220878629fb 100644
--- a/storage/xtradb/include/db0err.h
+++ b/storage/xtradb/include/db0err.h
@@ -105,7 +105,8 @@ enum db_err {
DB_STRONG_FAIL,
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
- DB_END_OF_INDEX
+ DB_END_OF_INDEX,
+ DB_SEARCH_ABORTED_BY_USER= 1533
};
#endif
diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h
index 2b4b34f2600..1bca9029648 100644
--- a/storage/xtradb/include/log0log.h
+++ b/storage/xtradb/include/log0log.h
@@ -249,12 +249,15 @@ log_checkpoint(
/*===========*/
ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always); /*!< in: the function normally checks if the
+ ibool write_always, /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
+ ibool safe_to_ignore);/*!< in: TRUE if checkpoint can be ignored in
+ the case checkpoint's are disabled */
+
/****************************************************************//**
Makes a checkpoint at a given lsn or later. */
UNIV_INTERN
@@ -272,6 +275,18 @@ log_make_checkpoint_at(
physical write will always be made to
log files */
/****************************************************************//**
+Disable checkpoints. This is used when doing a volume snapshot
+to ensure that we don't get checkpoint between snapshoting two
+different volumes */
+UNIV_INTERN
+ibool log_disable_checkpoint();
+
+/****************************************************************//**
+Enable checkpoints that was disabled with log_disable_checkpoint() */
+UNIV_INTERN
+void log_enable_checkpoint();
+
+/****************************************************************//**
Makes a checkpoint at the latest lsn and writes it to first page of each
data file in the database, so that we know that the file spaces contain
all modifications up to that lsn. This can only be called at database
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index 732e930517b..46bda4c6b45 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -152,8 +152,8 @@ log. */
#define OS_FILE_LOG 256 /* This can be ORed to type */
/* @} */
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more
- than 64 */
+#define OS_AIO_N_PENDING_IOS_PER_THREAD 256 /*!< Windows might be able to handle
+more */
/** Modes for aio operations @{ */
#define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf
@@ -183,6 +183,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
+#define OS_WINXP 5 /*!< Microsoft Windows XP */
+#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
+#define OS_WIN7 7 /*!< Microsoft Windows 7 */
+
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h
index 7366e2c3402..002abebcb0b 100644
--- a/storage/xtradb/include/os0sync.h
+++ b/storage/xtradb/include/os0sync.h
@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i"
#include "ut0lst.h"
-#ifdef __WIN__
-
+#ifdef _WIN32
+/** Native event (slow)*/
+typedef HANDLE os_native_event_t;
/** Native mutex */
-#define os_fast_mutex_t CRITICAL_SECTION
-
-/** Native event */
-typedef HANDLE os_native_event_t;
-
-/** Operating system event */
-typedef struct os_event_struct os_event_struct_t;
-/** Operating system event handle */
-typedef os_event_struct_t* os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event_struct {
- os_native_event_t handle;
- /*!< Windows event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /*!< list of all created events */
-};
+typedef CRITICAL_SECTION os_fast_mutex_t;
+/** Native condition variable */
+typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t;
+/** Native condition variable */
+typedef pthread_cond_t os_cond_t;
+#endif
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
+#ifdef _WIN32
+ HANDLE handle; /*!< kernel event object, slow, used on older Windows */
+#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
ibool is_set; /*!< this is TRUE when the event is
@@ -76,12 +69,14 @@ struct os_event_struct {
this event */
ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */
- pthread_cond_t cond_var; /*!< condition variable is used in
+ os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
-#endif
+
+
+
/** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t;
@@ -186,33 +181,23 @@ os_event_wait_low(
os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0)
-
+#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
-a timeout is exceeded.
+a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
-os_event_wait_time(
-/*===============*/
- os_event_t event, /*!< in: event to wait */
- ulint wtime); /*!< in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
-#ifdef __WIN__
-/**********************************************************//**
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled.
-@return index of the event which was signaled */
-UNIV_INTERN
-ulint
-os_event_wait_multiple(
+os_event_wait_time_low(
/*===================*/
- ulint n, /*!< in: number of events in the
- array */
- os_native_event_t* native_event_array);
- /*!< in: pointer to an array of event
- handles */
-#endif
+ os_event_t event, /*!< in: event to wait */
+ ulint time_in_usec, /*!< in: timeout in
+ microseconds, or
+ OS_SYNC_INFINITE_TIME */
+ ib_int64_t reset_sig_count); /*!< in: zero or the value
+ returned by previous call of
+ os_event_reset(). */
+
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
@@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val)
-#elif defined(HAVE_WINDOWS_ATOMICS)
+#elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS
diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic
index 1f3ce38fa65..2c6c1dbe629 100644
--- a/storage/xtradb/include/os0sync.ic
+++ b/storage/xtradb/include/os0sync.ic
@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif
/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
+Acquires ownership of a fast mutex.
@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
- EnterCriticalSection(fast_mutex);
-
- return(0);
+ if (TryEnterCriticalSection(fast_mutex))
+ return 0;
+ return(1);
#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index 4acfd2e793b..cd9dec2f089 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -585,7 +585,18 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
+
+typedef enum xtradb_icp_result {
+ XTRADB_ICP_ERROR=-1,
+ XTRADB_ICP_NO_MATCH=0,
+ XTRADB_ICP_MATCH=1,
+ XTRADB_ICP_OUT_OF_RANGE=2,
+ XTRADB_ICP_ABORTED_BY_USER=3,
+} xtradb_icp_result_t;
+
+typedef xtradb_icp_result_t (*index_cond_func_t)(void *param);
/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
+
handle used within MySQL; these are used to save CPU time. */
struct row_prebuilt_struct {
@@ -783,6 +794,14 @@ struct row_prebuilt_struct {
/*----------------------*/
ulint magic_n2; /*!< this should be the same as
magic_n */
+ /*----------------------*/
+ index_cond_func_t idx_cond_func;/* Index Condition Pushdown function,
+ or NULL if there is none set */
+ void* idx_cond_func_arg;/* ICP function argument */
+ ulint n_index_fields; /* Number of fields at the start of
+ mysql_template. Valid only when using
+ ICP. */
+ /*----------------------*/
};
#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index d4329d16a62..29d88331532 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */
-
+#ifdef __WIN__
+extern ibool srv_use_native_conditions;
+#endif
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
@@ -234,7 +236,7 @@ extern ulong srv_ibuf_active_contract;
extern ulong srv_ibuf_accel_rate;
extern ulint srv_checkpoint_age_target;
extern ulong srv_flush_neighbor_pages;
-extern ulong srv_enable_unsafe_group_commit;
+extern ulong srv_deprecated_enable_unsafe_group_commit;
extern ulong srv_read_ahead;
extern ulong srv_adaptive_checkpoint;
diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h
index f2ff83101ab..6aaab1cc7d7 100644
--- a/storage/xtradb/include/sync0sync.h
+++ b/storage/xtradb/include/sync0sync.h
@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-#ifdef HAVE_WINDOWS_ATOMICS
+#ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */
#else
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index 173c63918d3..8858fe2fafa 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -511,9 +511,10 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ulint active_trans; /*!< 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
+ ulint active_flag; /*!< TRX_ACTIVE_IN_MYSQL - set if a
+ transaction in MySQL is active.
+ TRX_ACTIVE_COMMIT_ORDERED - set if
+ innobase_commit_ordered has run */
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@@ -824,6 +825,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
+/* Flag bits for trx_struct.active_flag */
+#define TRX_ACTIVE_IN_MYSQL (1<<0)
+#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
+
/** Commit node states */
enum commit_node_state {
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c
index c39f60bd4b9..82c6c3da23e 100644
--- a/storage/xtradb/log/log0log.c
+++ b/storage/xtradb/log/log0log.c
@@ -97,6 +97,8 @@ archive */
UNIV_INTERN byte log_archive_io;
#endif /* UNIV_LOG_ARCHIVE */
+UNIV_INTERN ulint log_disable_checkpoint_active= 0;
+
/* A margin for free space in the log buffer before a log entry is catenated */
#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
@@ -174,7 +176,7 @@ log_fsp_current_free_limit_set_and_checkpoint(
success = FALSE;
while (!success) {
- success = log_checkpoint(TRUE, TRUE);
+ success = log_checkpoint(TRUE, TRUE, FALSE);
}
}
@@ -1988,12 +1990,14 @@ log_checkpoint(
/*===========*/
ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always) /*!< in: the function normally checks if the
+ ibool write_always, /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
+ ibool safe_to_ignore) /*!< in: TRUE if checkpoint can be ignored in
+ the case checkpoint's are disabled */
{
ib_uint64_t oldest_lsn;
@@ -2024,14 +2028,27 @@ log_checkpoint(
mutex_enter(&(log_sys->mutex));
+ /* Return if this is not a forced checkpoint and either there is no
+ need for a checkpoint or if checkpoints are disabled */
if (!write_always
- && log_sys->last_checkpoint_lsn >= oldest_lsn) {
+ && (log_sys->last_checkpoint_lsn >= oldest_lsn ||
+ (safe_to_ignore && log_disable_checkpoint_active)))
+ {
mutex_exit(&(log_sys->mutex));
return(TRUE);
}
+ if (log_disable_checkpoint_active)
+ {
+ /* Wait until we are allowed to do a checkpoint */
+ mutex_exit(&(log_sys->mutex));
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ mutex_enter(&(log_sys->mutex));
+ }
+
ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
if (log_sys->n_pending_checkpoint_writes > 0) {
@@ -2092,7 +2109,73 @@ log_make_checkpoint_at(
while (!log_preflush_pool_modified_pages(lsn, TRUE));
- while (!log_checkpoint(TRUE, write_always));
+ while (!log_checkpoint(TRUE, write_always, FALSE));
+}
+
+/****************************************************************//**
+Disable checkpoints. This is used when doing a volumne snapshot
+to ensure that we don't get checkpoint between snapshoting two
+different volumes */
+
+UNIV_INTERN
+ibool log_disable_checkpoint()
+{
+ mutex_enter(&(log_sys->mutex));
+
+ /*
+ Wait if a checkpoint write is running.
+ This is the same code that is used in log_checkpoint() to ensure
+ that two checkpoints are not happening at the same time.
+ */
+ while (log_sys->n_pending_checkpoint_writes > 0)
+ {
+ mutex_exit(&(log_sys->mutex));
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ mutex_enter(&(log_sys->mutex));
+ }
+ /*
+ The following should never be true; It's is here just in case of
+ wrong usage of this function. (Better safe than sorry).
+ */
+
+ if (log_disable_checkpoint_active)
+ {
+ mutex_exit(&(log_sys->mutex));
+ return 1; /* Already disabled */
+ }
+ /*
+ Take the checkpoint lock to ensure we will not get any checkpoints
+ running
+ */
+ rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+ log_disable_checkpoint_active= 1;
+ mutex_exit(&(log_sys->mutex));
+ return 0;
+}
+
+
+/****************************************************************//**
+Enable checkpoints that was disabled with log_disable_checkpoint()
+This lock is called by MariaDB and only when we have done call earlier
+to log_disable_checkpoint().
+
+Note: We can't take a log->mutex lock here running log_checkpoint()
+which is waiting (log_sys->checkpoint_lock may already have it.
+This is however safe to do without a mutex as log_disable_checkpoint
+is protected by log_sys->checkpoint_lock.
+*/
+
+UNIV_INTERN
+void log_enable_checkpoint()
+{
+ ut_ad(log_disable_checkpoint_active);
+ /* Test variable, mostly to protect against wrong usage */
+ if (log_disable_checkpoint_active)
+ {
+ log_disable_checkpoint_active= 0;
+ rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+ }
}
/****************************************************************//**
@@ -2189,7 +2272,7 @@ loop:
}
if (do_checkpoint) {
- log_checkpoint(checkpoint_sync, FALSE);
+ log_checkpoint(checkpoint_sync, FALSE, FALSE);
if (checkpoint_sync) {
@@ -3099,6 +3182,10 @@ logs_empty_and_mark_files_at_shutdown(void)
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Starting shutdown...\n");
}
+
+ /* Enable checkpoints if someone had turned them off */
+ log_enable_checkpoint();
+
/* Wait until the master thread and all other operations are idle: our
algorithm only works if the server is idle at shutdown */
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index 5b8e656d8b2..e0acb9abd65 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -53,6 +53,10 @@ Created 10/21/1995 Heikki Tuuri
# endif /* __WIN__ */
#endif /* !UNIV_HOTBACKUP */
+#ifdef _WIN32
+#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1
+#endif
+
/* This specifies the file permissions InnoDB uses when it creates files in
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
my_umask */
@@ -121,6 +125,12 @@ typedef struct os_aio_slot_struct os_aio_slot_t;
/** The asynchronous i/o array slot structure */
struct os_aio_slot_struct{
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED control; /*!< Windows control block for the
+ aio request, MUST be first element in the structure*/
+ void *arr; /*!< Array this slot belongs to*/
+#endif
+
ibool is_read; /*!< TRUE if a read operation */
ulint pos; /*!< index of the slot in the aio
array */
@@ -148,12 +158,6 @@ struct os_aio_slot_struct{
and which can be used to identify
which pending aio operation was
completed */
-#ifdef WIN_ASYNC_IO
- os_event_t event; /*!< event object we need in the
- OVERLAPPED struct */
- OVERLAPPED control; /*!< Windows control block for the
- aio request */
-#endif
};
/** The asynchronous i/o array structure */
@@ -182,15 +186,6 @@ struct os_aio_array_struct{
/*!< Number of reserved slots in the
aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
-#ifdef __WIN__
- os_native_event_t* native_events;
- /*!< Pointer to an array of OS native
- event handles where we copied the
- handles from slots, in the same
- order. This can be used in
- WaitForMultipleObjects; used only in
- Windows */
-#endif
};
/** Array of events used in simulated aio */
@@ -250,6 +245,14 @@ UNIV_INTERN ulint os_n_pending_writes = 0;
/** Number of pending read operations */
UNIV_INTERN ulint os_n_pending_reads = 0;
+
+#ifdef _WIN32
+/** IO completion port used by background io threads */
+static HANDLE completion_port;
+/** Thread local storage index for the per-thread event used for synchronous IO */
+static DWORD tls_sync_io = TLS_OUT_OF_INDEXES;
+#endif
+
/***********************************************************************//**
Gets the operating system version. Currently works only on Windows.
@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
@@ -270,10 +273,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
- if (os_info.dwMajorVersion <= 4) {
- return(OS_WINNT);
- } else {
- return(OS_WIN2000);
+ switch(os_info.dwMajorVersion){
+ case 3:
+ case 4:
+ return OS_WINNT;
+ case 5:
+ return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
+ case 6:
+ return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
+ default:
+ return OS_WIN7;
}
} else {
ut_error;
@@ -286,6 +295,86 @@ os_get_os_version(void)
#endif
}
+
+#ifdef _WIN32
+/*
+Windows : Handling synchronous IO on files opened asynchronously.
+
+If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to
+a completion port, then every IO on this file would normally be enqueued to the
+completion port. Sometimes however we would like to do a synchronous IO. This is
+possible if we initialitze have overlapped.hEvent with a valid event and set its
+lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info)
+
+We'll create this special event once for each thread and store in thread local
+storage.
+*/
+
+
+/***********************************************************************//**
+Initialize tls index.for event handle used for synchronized IO on files that
+might be opened with FILE_FLAG_OVERLAPPED.
+*/
+static void win_init_syncio_event()
+{
+ tls_sync_io = TlsAlloc();
+ ut_a(tls_sync_io != TLS_OUT_OF_INDEXES);
+}
+
+/***********************************************************************//**
+Retrieve per-thread event for doing synchronous io on asyncronously opened files
+*/
+static HANDLE win_get_syncio_event()
+{
+ HANDLE h;
+ if(tls_sync_io == TLS_OUT_OF_INDEXES){
+ win_init_syncio_event();
+ }
+
+ h = (HANDLE)TlsGetValue(tls_sync_io);
+ if (h)
+ return h;
+ h = CreateEventA(NULL, FALSE, FALSE, NULL);
+ ut_a(h);
+ h = (HANDLE)((uintptr_t)h | 1);
+ TlsSetValue(tls_sync_io, h);
+ return h;
+}
+
+/*
+ TLS destructor, inspired by Chromium code
+ http://src.chromium.org/svn/trunk/src/base/threading/thread_local_storage_win.cc
+*/
+
+static void win_free_syncio_event()
+{
+ HANDLE h = win_get_syncio_event();
+ if (h) {
+ CloseHandle(h);
+ }
+}
+
+static void NTAPI win_tls_thread_exit(PVOID module, DWORD reason, PVOID reserved) {
+ if (DLL_THREAD_DETACH == reason || DLL_PROCESS_DETACH == reason)
+ win_free_syncio_event();
+}
+
+#ifdef _WIN64
+#pragma comment(linker, "/INCLUDE:_tls_used")
+#pragma comment(linker, "/INCLUDE:p_thread_callback_base")
+#pragma const_seg(".CRT$XLB")
+extern const PIMAGE_TLS_CALLBACK p_thread_callback_base;
+const PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit;
+#pragma data_seg()
+#else
+#pragma comment(linker, "/INCLUDE:__tls_used")
+#pragma comment(linker, "/INCLUDE:_p_thread_callback_base")
+#pragma data_seg(".CRT$XLB")
+PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit;
+#pragma data_seg()
+#endif
+#endif /*_WIN32 */
+
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
@@ -611,6 +700,9 @@ os_io_init_simple(void)
for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
os_file_seek_mutexes[i] = os_mutex_create(NULL);
}
+#ifdef _WIN32
+ win_init_syncio_event();
+#endif
}
/***********************************************************************//**
@@ -1358,6 +1450,16 @@ try_again:
ut_error;
}
+ if (type == OS_LOG_FILE) {
+ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
+ /* Map O_DSYNC to WRITE_THROUGH */
+ attributes |= FILE_FLAG_WRITE_THROUGH;
+ } else if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
+ /* Open log file without buffering */
+ attributes |= FILE_FLAG_NO_BUFFERING;
+ }
+ }
+
file = CreateFile((LPCTSTR) name,
GENERIC_READ | GENERIC_WRITE, /* read and write
access */
@@ -1402,6 +1504,9 @@ try_again:
}
} else {
*success = TRUE;
+ if (os_aio_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) {
+ ut_a(CreateIoCompletionPort(file, completion_port, 0, 0));
+ }
}
return(file);
@@ -2350,13 +2455,9 @@ _os_file_read(
#ifdef __WIN__
BOOL ret;
DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ OVERLAPPED overlapped;
+
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
@@ -2371,41 +2472,21 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
+ memset (&overlapped, 0, sizeof (overlapped));
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
+ overlapped.hEvent = win_get_syncio_event();
+ ret = ReadFile(file, buf, n, NULL, &overlapped);
+ if (ret) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
}
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
+ else if(GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+ }
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
@@ -2433,9 +2514,6 @@ try_again:
(ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret);
#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
retry = os_file_handle_error(NULL, "read");
if (retry) {
@@ -2477,13 +2555,11 @@ os_file_read_no_error_handling(
#ifdef __WIN__
BOOL ret;
DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ OVERLAPPED overlapped;
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
+
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
@@ -2498,41 +2574,21 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
+ memset (&overlapped, 0, sizeof (overlapped));
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
+ overlapped.hEvent = win_get_syncio_event();
+ ret = ReadFile(file, buf, n, NULL, &overlapped);
+ if (ret) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
}
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
+ else if(GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+ }
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
@@ -2554,9 +2610,6 @@ try_again:
return(TRUE);
}
#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) {
@@ -2609,14 +2662,9 @@ os_file_write(
#ifdef __WIN__
BOOL ret;
DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
ulint n_retries = 0;
ulint err;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ OVERLAPPED overlapped;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
@@ -2629,64 +2677,23 @@ os_file_write(
ut_ad(buf);
ut_ad(n > 0);
retry:
- low = (DWORD) offset;
- high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
+ memset (&overlapped, 0, sizeof (overlapped));
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: File pointer positioning to"
- " file %s failed at\n"
- "InnoDB: offset %lu %lu. Operating system"
- " error number %lu.\n"
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) GetLastError());
-
- return(FALSE);
+ overlapped.hEvent = win_get_syncio_event();
+ ret = WriteFile(file, buf, n, NULL, &overlapped);
+ if (ret) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
}
-
- ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
-
- /* Always do fsync to reduce the probability that when the OS crashes,
- a database page is only partially physically written to disk. */
-
-# ifdef UNIV_DO_FLUSH
- if (!os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(file));
+ else if(GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
}
-# endif /* UNIV_DO_FLUSH */
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
@@ -3071,9 +3078,6 @@ os_aio_array_create(
os_aio_array_t* array;
ulint i;
os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* over;
-#endif
ut_a(n > 0);
ut_a(n_segments > 0);
@@ -3089,23 +3093,12 @@ os_aio_array_create(
array->n_segments = n_segments;
array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
-#ifdef __WIN__
- array->native_events = ut_malloc(n * sizeof(os_native_event_t));
-#endif
+
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i);
-
slot->pos = i;
slot->reserved = FALSE;
-#ifdef WIN_ASYNC_IO
- slot->event = os_event_create(NULL);
-
- over = &(slot->control);
- over->hEvent = slot->event->handle;
-
- *((array->native_events) + i) = over->hEvent;
-#endif
}
return(array);
@@ -3119,18 +3112,7 @@ os_aio_array_free(
/*==============*/
os_aio_array_t* array) /*!< in, own: array to free */
{
-#ifdef WIN_ASYNC_IO
- ulint i;
- for (i = 0; i < array->n_slots; i++) {
- os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
- os_event_free(slot->event);
- }
-#endif /* WIN_ASYNC_IO */
-
-#ifdef __WIN__
- ut_free(array->native_events);
-#endif /* __WIN__ */
os_mutex_free(array->mutex);
os_event_free(array->not_full);
os_event_free(array->is_empty);
@@ -3209,7 +3191,11 @@ os_aio_init(
}
os_last_printout = time(NULL);
-
+#ifdef _WIN32
+ ut_a(completion_port == 0);
+ completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+ ut_a(completion_port);
+#endif
}
/***********************************************************************
@@ -3251,11 +3237,9 @@ os_aio_array_wake_win_aio_at_shutdown(
/*==================================*/
os_aio_array_t* array) /*!< in: aio array */
{
- ulint i;
-
- for (i = 0; i < array->n_slots; i++) {
-
- os_event_set((array->slots + i)->event);
+ if(completion_port)
+ {
+ PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
}
}
#endif
@@ -3480,7 +3464,8 @@ found:
control = &(slot->control);
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
- os_event_reset(slot->event);
+ control->hEvent = 0;
+ slot->arr = array;
#endif
os_mutex_exit(array->mutex);
@@ -3517,9 +3502,6 @@ os_aio_array_free_slot(
os_event_set(array->is_empty);
}
-#ifdef WIN_ASYNC_IO
- os_event_reset(slot->event);
-#endif
os_mutex_exit(array->mutex);
}
@@ -3689,12 +3671,8 @@ os_aio(
os_aio_array_t* array;
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
- ibool retval;
- BOOL ret = TRUE;
DWORD len = (DWORD) n;
- struct fil_node_struct * dummy_mess1;
- void* dummy_mess2;
- ulint dummy_type;
+ BOOL ret;
#endif
ulint err = 0;
ibool retry;
@@ -3713,26 +3691,23 @@ os_aio(
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
- if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
- && !os_aio_use_native_aio
-#endif
- ) {
+ if (mode == OS_AIO_SYNC)
+ {
+ ibool ret;
/* This is actually an ordinary synchronous read or write:
- no need to use an i/o-handler thread. NOTE that if we use
- Windows async i/o, Windows does not allow us to use
- ordinary synchronous os_file_read etc. on the same file,
- therefore we have built a special mechanism for synchronous
- wait in the Windows case. */
+ no need to use an i/o-handler thread */
if (type == OS_FILE_READ) {
- return(_os_file_read(file, buf, offset,
- offset_high, n, trx));
+ ret = _os_file_read(file, buf, offset,
+ offset_high, n, trx);
}
+ else {
+ ut_a(type == OS_FILE_WRITE);
- ut_a(type == OS_FILE_WRITE);
-
- return(os_file_write(name, file, buf, offset, offset_high, n));
+ ret = os_file_write(name, file, buf, offset, offset_high, n);
+ }
+ ut_a(ret);
+ return ret;
}
try_again:
@@ -3775,6 +3750,8 @@ try_again:
ret = ReadFile(file, buf, (DWORD)n, &len,
&(slot->control));
+ if(!ret && GetLastError() != ERROR_IO_PENDING)
+ err = 1;
#endif
} else {
if (!wake_later) {
@@ -3789,6 +3766,8 @@ try_again:
os_n_file_writes++;
ret = WriteFile(file, buf, (DWORD)n, &len,
&(slot->control));
+ if(!ret && GetLastError() != ERROR_IO_PENDING)
+ err = 1;
#endif
} else {
if (!wake_later) {
@@ -3801,34 +3780,7 @@ try_again:
ut_error;
}
-#ifdef WIN_ASYNC_IO
- if (os_aio_use_native_aio) {
- if ((ret && len == n)
- || (!ret && GetLastError() == ERROR_IO_PENDING)) {
- /* aio was queued successfully! */
-
- if (mode == OS_AIO_SYNC) {
- /* We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- retval = os_aio_windows_handle(ULINT_UNDEFINED,
- slot->pos,
- &dummy_mess1,
- &dummy_mess2,
- &dummy_type,
- &space_id);
-
- return(retval);
- }
- return(TRUE);
- }
-
- err = 1; /* Fall through the next if */
- }
-#endif
if (err == 0) {
/* aio was queued successfully! */
@@ -3881,52 +3833,26 @@ os_aio_windows_handle(
ulint* space_id)
{
ulint orig_seg = segment;
- os_aio_array_t* array;
os_aio_slot_t* slot;
- ulint n;
- ulint i;
ibool ret_val;
BOOL ret;
DWORD len;
BOOL retry = FALSE;
+ ULONG_PTR key;
- if (segment == ULINT_UNDEFINED) {
- array = os_aio_sync_array;
- segment = 0;
- } else {
- segment = os_aio_get_array_and_local_segment(&array, segment);
- }
-
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- ut_ad(os_aio_validate());
- ut_ad(segment < array->n_segments);
+ ret = GetQueuedCompletionStatus(completion_port, &len, &key,
+ (OVERLAPPED **)&slot, INFINITE);
- n = array->n_slots;
-
- if (array == os_aio_sync_array) {
- os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
- i = pos;
- } else {
- srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
- i = os_event_wait_multiple(n,
- (array->native_events)
- );
+ /* If shutdown key was received, repost the shutdown message and exit */
+ if (ret && (key == IOCP_SHUTDOWN_KEY)) {
+ PostQueuedCompletionStatus(completion_port, 0, key, NULL);
+ os_thread_exit(NULL);
}
- os_mutex_enter(array->mutex);
-
- slot = os_aio_array_get_nth_slot(array, i);
-
- ut_a(slot->reserved);
-
- if (orig_seg != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(orig_seg,
- "get windows aio return value");
+ if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+ os_thread_exit(NULL);
}
- ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
*message1 = slot->message1;
*message2 = slot->message2;
@@ -3951,8 +3877,6 @@ os_aio_windows_handle(
ret_val = FALSE;
}
- os_mutex_exit(array->mutex);
-
if (retry) {
/* retry failed read/write operation synchronously.
No need to hold array->mutex. */
@@ -3961,37 +3885,19 @@ os_aio_windows_handle(
switch (slot->type) {
case OS_FILE_WRITE:
- ret = WriteFile(slot->file, slot->buf,
- (DWORD) slot->len, &len,
- &(slot->control));
-
+ ret_val = os_file_write(slot->name, slot->file, slot->buf,
+ slot->control.Offset, slot->control.OffsetHigh, slot->len);
break;
case OS_FILE_READ:
- ret = ReadFile(slot->file, slot->buf,
- (DWORD) slot->len, &len,
- &(slot->control));
-
+ ret_val = os_file_read(slot->file, slot->buf,
+ slot->control.Offset, slot->control.OffsetHigh, slot->len);
break;
default:
ut_error;
}
-
- if (!ret && GetLastError() == ERROR_IO_PENDING) {
- /* aio was queued successfully!
- We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- ret = GetOverlappedResult(slot->file,
- &(slot->control),
- &len, TRUE);
- }
-
- ret_val = ret && len == slot->len;
}
- os_aio_array_free_slot(array, slot);
+ os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot);
return(ret_val);
}
diff --git a/storage/xtradb/os/os0sync.c b/storage/xtradb/os/os0sync.c
index dba997927cb..12153c309c0 100644
--- a/storage/xtradb/os/os0sync.c
+++ b/storage/xtradb/os/os0sync.c
@@ -38,6 +38,7 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0mem.h"
#include "srv0start.h"
+#include "srv0srv.h"
/* Type definition for an operating system mutex struct */
struct os_mutex_struct{
@@ -74,11 +75,227 @@ UNIV_INTERN ulint os_event_count = 0;
UNIV_INTERN ulint os_mutex_count = 0;
UNIV_INTERN ulint os_fast_mutex_count = 0;
+/* The number of microsecnds in a second. */
+static const ulint MICROSECS_IN_A_SECOND = 1000000;
+
/* Because a mutex is embedded inside an event and there is an
event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */
static void os_event_free_internal(os_event_t event);
+/* On Windows (Vista and later), load function pointers for condition
+variable handling. Those functions are not available in prior versions,
+so we have to use them via runtime loading, as long as we support XP. */
+static void os_cond_module_init(void);
+
+#ifdef __WIN__
+/* Prototypes and function pointers for condition variable functions */
+typedef VOID (WINAPI* InitializeConditionVariableProc)
+ (PCONDITION_VARIABLE ConditionVariable);
+static InitializeConditionVariableProc initialize_condition_variable;
+
+typedef BOOL (WINAPI* SleepConditionVariableCSProc)
+ (PCONDITION_VARIABLE ConditionVariable,
+ PCRITICAL_SECTION CriticalSection,
+ DWORD dwMilliseconds);
+static SleepConditionVariableCSProc sleep_condition_variable;
+
+typedef VOID (WINAPI* WakeAllConditionVariableProc)
+ (PCONDITION_VARIABLE ConditionVariable);
+static WakeAllConditionVariableProc wake_all_condition_variable;
+
+typedef VOID (WINAPI* WakeConditionVariableProc)
+ (PCONDITION_VARIABLE ConditionVariable);
+static WakeConditionVariableProc wake_condition_variable;
+#endif
+
+/*********************************************************//**
+Initialitze condition variable */
+UNIV_INLINE
+void
+os_cond_init(
+/*=========*/
+ os_cond_t* cond) /*!< in: condition variable. */
+{
+ ut_a(cond);
+
+#ifdef __WIN__
+ ut_a(initialize_condition_variable != NULL);
+ initialize_condition_variable(cond);
+#else
+ ut_a(pthread_cond_init(cond, NULL) == 0);
+#endif
+}
+
+/*********************************************************//**
+Do a timed wait on condition variable.
+@return TRUE if timed out, FALSE otherwise */
+UNIV_INLINE
+ibool
+os_cond_wait_timed(
+/*===============*/
+ os_cond_t* cond, /*!< in: condition variable. */
+ os_fast_mutex_t* mutex, /*!< in: fast mutex */
+#ifndef __WIN__
+ const struct timespec* abstime /*!< in: timeout */
+#else
+ DWORD time_in_ms /*!< in: timeout in
+ milliseconds*/
+#endif /* !__WIN__ */
+)
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD err;
+
+ ut_a(sleep_condition_variable != NULL);
+
+ ret = sleep_condition_variable(cond, mutex, time_in_ms);
+
+ if (!ret) {
+ err = GetLastError();
+ /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
+ "Condition variables are subject to spurious wakeups
+ (those not associated with an explicit wake) and stolen wakeups
+ (another thread manages to run before the woken thread)."
+ Check for both types of timeouts.
+ Conditions are checked by the caller.*/
+ if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
+ return(TRUE);
+ }
+ }
+
+ ut_a(ret);
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = pthread_cond_timedwait(cond, mutex, abstime);
+
+ switch (ret) {
+ case 0:
+ case ETIMEDOUT:
+ /* We play it safe by checking for EINTR even though
+ according to the POSIX documentation it can't return EINTR. */
+ case EINTR:
+ break;
+
+ default:
+ fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
+ "%d: abstime={%lu,%lu}\n",
+ ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
+ ut_error;
+ }
+
+ return(ret == ETIMEDOUT);
+#endif
+}
+/*********************************************************//**
+Wait on condition variable */
+UNIV_INLINE
+void
+os_cond_wait(
+/*=========*/
+ os_cond_t* cond, /*!< in: condition variable. */
+ os_fast_mutex_t* mutex) /*!< in: fast mutex */
+{
+ ut_a(cond);
+ ut_a(mutex);
+
+#ifdef __WIN__
+ ut_a(sleep_condition_variable != NULL);
+ ut_a(sleep_condition_variable(cond, mutex, INFINITE));
+#else
+ ut_a(pthread_cond_wait(cond, mutex) == 0);
+#endif
+}
+
+/*********************************************************//**
+Wakes all threads waiting for condition variable */
+UNIV_INLINE
+void
+os_cond_broadcast(
+/*==============*/
+ os_cond_t* cond) /*!< in: condition variable. */
+{
+ ut_a(cond);
+
+#ifdef __WIN__
+ ut_a(wake_all_condition_variable != NULL);
+ wake_all_condition_variable(cond);
+#else
+ ut_a(pthread_cond_broadcast(cond) == 0);
+#endif
+}
+
+/*********************************************************//**
+Wakes one thread waiting for condition variable */
+UNIV_INLINE
+void
+os_cond_signal(
+/*==========*/
+ os_cond_t* cond) /*!< in: condition variable. */
+{
+ ut_a(cond);
+
+#ifdef __WIN__
+ ut_a(wake_condition_variable != NULL);
+ wake_condition_variable(cond);
+#else
+ ut_a(pthread_cond_signal(cond) == 0);
+#endif
+}
+
+/*********************************************************//**
+Destroys condition variable */
+UNIV_INLINE
+void
+os_cond_destroy(
+/*============*/
+ os_cond_t* cond) /*!< in: condition variable. */
+{
+#ifdef __WIN__
+ /* Do nothing */
+#else
+ ut_a(pthread_cond_destroy(cond) == 0);
+#endif
+}
+
+/*********************************************************//**
+On Windows (Vista and later), load function pointers for condition variable
+handling. Those functions are not available in prior versions, so we have to
+use them via runtime loading, as long as we support XP. */
+static
+void
+os_cond_module_init(void)
+/*=====================*/
+{
+#ifdef __WIN__
+ HMODULE h_dll;
+
+ if (!srv_use_native_conditions)
+ return;
+
+ h_dll = GetModuleHandle("kernel32");
+
+ initialize_condition_variable = (InitializeConditionVariableProc)
+ GetProcAddress(h_dll, "InitializeConditionVariable");
+ sleep_condition_variable = (SleepConditionVariableCSProc)
+ GetProcAddress(h_dll, "SleepConditionVariableCS");
+ wake_all_condition_variable = (WakeAllConditionVariableProc)
+ GetProcAddress(h_dll, "WakeAllConditionVariable");
+ wake_condition_variable = (WakeConditionVariableProc)
+ GetProcAddress(h_dll, "WakeConditionVariable");
+
+ /* When using native condition variables, check function pointers */
+ ut_a(initialize_condition_variable);
+ ut_a(sleep_condition_variable);
+ ut_a(wake_all_condition_variable);
+ ut_a(wake_condition_variable);
+#endif
+}
+
/*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */
UNIV_INTERN
@@ -92,6 +309,9 @@ os_sync_init(void)
os_sync_mutex = NULL;
os_sync_mutex_inited = FALSE;
+ /* Now for Windows only */
+ os_cond_module_init();
+
os_sync_mutex = os_mutex_create(NULL);
os_sync_mutex_inited = TRUE;
@@ -146,42 +366,45 @@ os_event_create(
const char* name) /*!< in: the name of the event, if NULL
the event is created without a name */
{
-#ifdef __WIN__
- os_event_t event;
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- event->handle = CreateEvent(NULL, /* No security attributes */
- TRUE, /* Manual reset */
- FALSE, /* Initial state nonsignaled */
- (LPCTSTR) name);
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows event semaphore;"
- " Windows error %lu\n",
- (ulong) GetLastError());
- }
-#else /* Unix */
os_event_t event;
- UT_NOT_USED(name);
+#ifdef __WIN__
+ if(!srv_use_native_conditions) {
+
+ event = ut_malloc(sizeof(struct os_event_struct));
+
+ event->handle = CreateEvent(NULL,
+ TRUE,
+ FALSE,
+ (LPCTSTR) name);
+ if (!event->handle) {
+ fprintf(stderr,
+ "InnoDB: Could not create a Windows event"
+ " semaphore; Windows error %lu\n",
+ (ulong) GetLastError());
+ }
+ } else /* Windows with condition variables */
+#endif
- event = ut_malloc(sizeof(struct os_event_struct));
+ {
+ UT_NOT_USED(name);
- os_fast_mutex_init(&(event->os_mutex));
+ event = ut_malloc(sizeof(struct os_event_struct));
- ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
+ os_fast_mutex_init(&(event->os_mutex));
- event->is_set = FALSE;
+ os_cond_init(&(event->cond_var));
- /* We return this value in os_event_reset(), which can then be
- be used to pass to the os_event_wait_low(). The value of zero
- is reserved in os_event_wait_low() for the case when the
- caller does not want to pass any signal_count value. To
- distinguish between the two cases we initialize signal_count
- to 1 here. */
- event->signal_count = 1;
-#endif /* __WIN__ */
+ event->is_set = FALSE;
+
+ /* We return this value in os_event_reset(), which can then be
+ be used to pass to the os_event_wait_low(). The value of zero
+ is reserved in os_event_wait_low() for the case when the
+ caller does not want to pass any signal_count value. To
+ distinguish between the two cases we initialize signal_count
+ to 1 here. */
+ event->signal_count = 1;
+ }
/* The os_sync_mutex can be NULL because during startup an event
can be created [ because it's embedded in the mutex/rwlock ] before
@@ -211,10 +434,15 @@ os_event_set(
/*=========*/
os_event_t event) /*!< in: event to set */
{
-#ifdef __WIN__
ut_a(event);
- ut_a(SetEvent(event->handle));
-#else
+
+#ifdef __WIN__
+ if (!srv_use_native_conditions) {
+ ut_a(SetEvent(event->handle));
+ return;
+ }
+#endif
+
ut_a(event);
os_fast_mutex_lock(&(event->os_mutex));
@@ -224,11 +452,10 @@ os_event_set(
} else {
event->is_set = TRUE;
event->signal_count += 1;
- ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
+ os_cond_broadcast(&(event->cond_var));
}
os_fast_mutex_unlock(&(event->os_mutex));
-#endif
}
/**********************************************************//**
@@ -247,12 +474,14 @@ os_event_reset(
{
ib_int64_t ret = 0;
-#ifdef __WIN__
ut_a(event);
- ut_a(ResetEvent(event->handle));
-#else
- ut_a(event);
+#ifdef __WIN__
+ if(!srv_use_native_conditions) {
+ ut_a(ResetEvent(event->handle));
+ return(0);
+ }
+#endif
os_fast_mutex_lock(&(event->os_mutex));
@@ -264,7 +493,6 @@ os_event_reset(
ret = event->signal_count;
os_fast_mutex_unlock(&(event->os_mutex));
-#endif
return(ret);
}
@@ -277,17 +505,20 @@ os_event_free_internal(
os_event_t event) /*!< in: event to free */
{
#ifdef __WIN__
- ut_a(event);
+ if(!srv_use_native_conditions) {
+ ut_a(event);
+ ut_a(CloseHandle(event->handle));
+ } else
+#endif
+ {
+ ut_a(event);
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
+ /* This is to avoid freeing the mutex twice */
+ os_fast_mutex_free(&(event->os_mutex));
- /* This is to avoid freeing the mutex twice */
- os_fast_mutex_free(&(event->os_mutex));
+ os_cond_destroy(&(event->cond_var));
+ }
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
/* Remove from the list of events */
UT_LIST_REMOVE(os_event_list, os_event_list, event);
@@ -306,16 +537,18 @@ os_event_free(
os_event_t event) /*!< in: event to free */
{
-#ifdef __WIN__
ut_a(event);
+#ifdef __WIN__
+ if(!srv_use_native_conditions){
+ ut_a(CloseHandle(event->handle));
+ } else /*Windows with condition variables */
+#endif
+ {
+ os_fast_mutex_free(&(event->os_mutex));
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
+ os_cond_destroy(&(event->cond_var));
+ }
- os_fast_mutex_free(&(event->os_mutex));
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
/* Remove from the list of events */
os_mutex_enter(os_sync_mutex);
@@ -358,23 +591,24 @@ os_event_wait_low(
returned by previous call of
os_event_reset(). */
{
-#ifdef __WIN__
- DWORD err;
+
+ ib_int64_t old_signal_count;
- ut_a(event);
+#ifdef __WIN__
+ if(!srv_use_native_conditions) {
+ DWORD err;
- UT_NOT_USED(reset_sig_count);
+ ut_a(event);
- /* Specify an infinite time limit for waiting */
- err = WaitForSingleObject(event->handle, INFINITE);
+ UT_NOT_USED(reset_sig_count);
- ut_a(err == WAIT_OBJECT_0);
+ /* Specify an infinite wait */
+ err = WaitForSingleObject(event->handle, INFINITE);
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
+ ut_a(err == WAIT_OBJECT_0);
+ return;
}
-#else
- ib_int64_t old_signal_count;
+#endif
os_fast_mutex_lock(&(event->os_mutex));
@@ -399,13 +633,12 @@ os_event_wait_low(
return;
}
- pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
+ os_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we
have to check if the event really has been signaled after
we came here to wait */
}
-#endif
}
/**********************************************************//**
@@ -414,112 +647,112 @@ a timeout is exceeded.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
-os_event_wait_time(
-/*===============*/
- os_event_t event, /*!< in: event to wait */
- ulint wtime) /*!< in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
+os_event_wait_time_low(
+/*===================*/
+ os_event_t event, /*!< in: event to wait */
+ ulint time_in_usec, /*!< in: timeout in
+ microseconds, or
+ OS_SYNC_INFINITE_TIME */
+ ib_int64_t reset_sig_count) /*!< in: zero or the value
+ returned by previous call of
+ os_event_reset(). */
+
{
+ ibool timed_out = FALSE;
+
#ifdef __WIN__
- DWORD err;
+ DWORD time_in_ms;
- ut_a(event);
+ if (!srv_use_native_conditions) {
+ DWORD err;
- if (wtime != OS_SYNC_INFINITE_TIME) {
- err = WaitForSingleObject(event->handle, (DWORD) wtime / 1000);
- } else {
- err = WaitForSingleObject(event->handle, INFINITE);
- }
+ ut_a(event);
- if (err == WAIT_OBJECT_0) {
+ if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+ time_in_ms = time_in_usec / 1000;
+ err = WaitForSingleObject(event->handle, time_in_ms);
+ } else {
+ err = WaitForSingleObject(event->handle, INFINITE);
+ }
- return(0);
- } else if (err == WAIT_TIMEOUT) {
+ if (err == WAIT_OBJECT_0) {
+ return(0);
+ } else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
+ return(OS_SYNC_TIME_EXCEEDED);
+ }
- return(OS_SYNC_TIME_EXCEEDED);
- } else {
ut_error;
- return(1000000); /* dummy value to eliminate compiler warn. */
+ /* Dummy value to eliminate compiler warning. */
+ return(42);
+ } else {
+ ut_a(sleep_condition_variable != NULL);
+
+ if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+ time_in_ms = time_in_usec / 1000;
+ } else {
+ time_in_ms = INFINITE;
+ }
}
#else
- int err;
- int ret = 0;
- ulint tmp;
- ib_int64_t old_count;
- struct timeval tv_start;
- struct timespec timeout;
-
- if (wtime == OS_SYNC_INFINITE_TIME) {
- os_event_wait(event);
- return 0;
- }
+ struct timespec abstime;
- /* Compute the absolute point in time at which to time out. */
- gettimeofday(&tv_start, NULL);
- tmp = tv_start.tv_usec + wtime;
- timeout.tv_sec = tv_start.tv_sec + (tmp / 1000000);
- timeout.tv_nsec = (tmp % 1000000) * 1000;
+ if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+ struct timeval tv;
+ int ret;
+ ulint sec;
+ ulint usec;
- os_fast_mutex_lock(&(event->os_mutex));
- old_count = event->signal_count;
+ ret = ut_usectime(&sec, &usec);
+ ut_a(ret == 0);
- for (;;) {
- if (event->is_set == TRUE || event->signal_count != old_count)
- break;
+ tv.tv_sec = sec;
+ tv.tv_usec = usec;
- err = pthread_cond_timedwait(&(event->cond_var),
- &(event->os_mutex), &timeout);
- if (err == ETIMEDOUT) {
- ret = OS_SYNC_TIME_EXCEEDED;
- break;
+ tv.tv_usec += time_in_usec;
+
+ if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
+ tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND;
+ tv.tv_usec %= MICROSECS_IN_A_SECOND;
}
+
+ abstime.tv_sec = tv.tv_sec;
+ abstime.tv_nsec = tv.tv_usec * 1000;
+ } else {
+ abstime.tv_nsec = 999999999;
+ abstime.tv_sec = (time_t) ULINT_MAX;
}
- os_fast_mutex_unlock(&(event->os_mutex));
+ ut_a(abstime.tv_nsec <= 999999999);
+
+#endif /* __WIN__ */
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+ os_fast_mutex_lock(&event->os_mutex);
- os_thread_exit(NULL);
+ if (!reset_sig_count) {
+ reset_sig_count = event->signal_count;
}
- return ret;
-#endif
-}
+ do {
+ if (event->is_set || event->signal_count != reset_sig_count) {
-#ifdef __WIN__
-/**********************************************************//**
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled.
-@return index of the event which was signaled */
-UNIV_INTERN
-ulint
-os_event_wait_multiple(
-/*===================*/
- ulint n, /*!< in: number of events in the
- array */
- os_native_event_t* native_event_array)
- /*!< in: pointer to an array of event
- handles */
-{
- DWORD index;
+ break;
+ }
- ut_a(native_event_array);
- ut_a(n > 0);
+ timed_out = os_cond_wait_timed(
+ &event->cond_var, &event->os_mutex,
+#ifndef __WIN__
+ &abstime
+#else
+ time_in_ms
+#endif /* !__WIN__ */
+ );
- index = WaitForMultipleObjects((DWORD) n, native_event_array,
- FALSE, /* Wait for any 1 event */
- INFINITE); /* Infinite wait time
- limit */
- ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
- ut_a(index < WAIT_OBJECT_0 + n);
+ } while (!timed_out);
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
+ os_fast_mutex_unlock(&event->os_mutex);
- return(index - WAIT_OBJECT_0);
+ return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
}
-#endif
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
@@ -532,15 +765,6 @@ os_mutex_create(
const char* name) /*!< in: the name of the mutex, if NULL
the mutex is created without a name */
{
-#ifdef __WIN__
- HANDLE mutex;
- os_mutex_t mutex_str;
-
- mutex = CreateMutex(NULL, /* No security attributes */
- FALSE, /* Initial state: no owner */
- (LPCTSTR) name);
- ut_a(mutex);
-#else
os_fast_mutex_t* mutex;
os_mutex_t mutex_str;
@@ -549,7 +773,6 @@ os_mutex_create(
mutex = ut_malloc(sizeof(os_fast_mutex_t));
os_fast_mutex_init(mutex);
-#endif
mutex_str = ut_malloc(sizeof(os_mutex_str_t));
mutex_str->handle = mutex;
@@ -580,25 +803,11 @@ os_mutex_enter(
/*===========*/
os_mutex_t mutex) /*!< in: mutex to acquire */
{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(mutex);
-
- /* Specify infinite time limit for waiting */
- err = WaitForSingleObject(mutex->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
-
- (mutex->count)++;
- ut_a(mutex->count == 1);
-#else
os_fast_mutex_lock(mutex->handle);
(mutex->count)++;
ut_a(mutex->count == 1);
-#endif
}
/**********************************************************//**
@@ -614,11 +823,7 @@ os_mutex_exit(
ut_a(mutex->count == 1);
(mutex->count)--;
-#ifdef __WIN__
- ut_a(ReleaseMutex(mutex->handle));
-#else
os_fast_mutex_unlock(mutex->handle);
-#endif
}
/**********************************************************//**
@@ -647,15 +852,9 @@ os_mutex_free(
os_mutex_exit(os_sync_mutex);
}
-#ifdef __WIN__
- ut_a(CloseHandle(mutex->handle));
-
- ut_free(mutex);
-#else
os_fast_mutex_free(mutex->handle);
ut_free(mutex->handle);
ut_free(mutex);
-#endif
}
/*********************************************************//**
diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c
index 1ced125b7bd..13dccdffb96 100644
--- a/storage/xtradb/row/row0sel.c
+++ b/storage/xtradb/row/row0sel.c
@@ -2692,8 +2692,10 @@ row_sel_store_mysql_rec(
ibool rec_clust, /*!< in: TRUE if rec is in the
clustered index instead of
prebuilt->index */
- const ulint* offsets) /*!< in: array returned by
- rec_get_offsets(rec) */
+ const ulint* offsets, /* in: array returned by
+ rec_get_offsets() */
+ ulint start_field_no, /* in: start from this field */
+ ulint end_field_no) /* in: end at this field */
{
mem_heap_t* extern_field_heap = NULL;
mem_heap_t* heap;
@@ -2709,7 +2711,7 @@ row_sel_store_mysql_rec(
prebuilt->blob_heap = NULL;
}
- for (i = 0; i < prebuilt->n_template; i++) {
+ for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) {
const mysql_row_templ_t*templ = prebuilt->mysql_template + i;
const byte* data;
@@ -3154,10 +3156,14 @@ row_sel_pop_cached_row_for_mysql(
/* Copy NULL bit of the current field from cached_rec
to buf */
if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
+ /*buf[templ->mysql_null_byte_offset]
^= (buf[templ->mysql_null_byte_offset]
^ cached_rec[templ->mysql_null_byte_offset])
- & (byte)templ->mysql_null_bit_mask;
+ & (byte)templ->mysql_null_bit_mask;*/
+ byte *null_byte= buf + templ->mysql_null_byte_offset;
+ (*null_byte)&= ~templ->mysql_null_bit_mask;
+ (*null_byte)|= cached_rec[templ->mysql_null_byte_offset] &
+ templ->mysql_null_bit_mask;
}
}
}
@@ -3194,7 +3200,10 @@ row_sel_push_cache_row_for_mysql(
ibool rec_clust, /*!< in: TRUE if rec is in the
clustered index instead of
prebuilt->index */
- const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+ const ulint* offsets, /* in: rec_get_offsets() */
+ ulint start_field_no, /* in: start from this field */
+ byte* remainder_buf) /* in: if start_field_no !=0,
+ where to take prev fields */
{
byte* buf;
ulint i;
@@ -3228,12 +3237,44 @@ row_sel_push_cache_row_for_mysql(
prebuilt->mysql_row_len);
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
- prebuilt->fetch_cache[
+ prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
- prebuilt, rec, rec_clust, offsets))) {
+ prebuilt,
+ rec,
+ rec_clust,
+ offsets,
+ start_field_no,
+ prebuilt->n_template))) {
return(FALSE);
}
+ if (start_field_no) {
+
+ for (i=0; i < start_field_no; i++) {
+ register ulint offs;
+ mysql_row_templ_t* templ;
+ register byte * null_byte;
+
+ templ = prebuilt->mysql_template + i;
+
+ if (templ->mysql_null_bit_mask) {
+ offs = templ->mysql_null_byte_offset;
+
+ null_byte= prebuilt->fetch_cache[
+ prebuilt->n_fetch_cached]+offs;
+ (*null_byte)&= ~templ->mysql_null_bit_mask;
+ (*null_byte)|= (*(remainder_buf + offs) &
+ templ->mysql_null_bit_mask);
+ }
+
+ offs = templ->mysql_col_offset;
+ memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached]
+ + offs,
+ remainder_buf + offs,
+ templ->mysql_col_len);
+ }
+ }
+
prebuilt->n_fetch_cached++;
return(TRUE);
}
@@ -3319,7 +3360,8 @@ and fetch prev. NOTE that if we do a search with a full key value
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
position and fetch next or fetch prev must not be tried to the cursor!
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, DB_SEARCH_ABORTED_BY_USER or
+DB_TOO_BIG_RECORD */
UNIV_INTERN
ulint
row_search_for_mysql(
@@ -3373,8 +3415,10 @@ row_search_for_mysql(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
+ ibool some_fields_in_buffer;
ibool table_lock_waited = FALSE;
ibool problematic_use = FALSE;
+ ibool get_clust_rec = 0;
rec_offs_init(offsets_);
@@ -3638,8 +3682,9 @@ row_search_for_mysql(
ut_ad(!rec_get_deleted_flag(rec, comp));
if (!row_sel_store_mysql_rec(buf, prebuilt,
- rec, FALSE,
- offsets)) {
+ rec, FALSE,
+ offsets, 0,
+ prebuilt->n_template)) {
/* Only fresh inserts may contain
incomplete externally stored
columns. Pretend that such
@@ -4301,7 +4346,8 @@ no_gap_lock:
if (!lock_sec_rec_cons_read_sees(
rec, trx->read_view)) {
- goto requires_clust_rec;
+ get_clust_rec = TRUE;
+ goto idx_cond_check;
}
}
}
@@ -4346,12 +4392,39 @@ no_gap_lock:
goto next_rec;
}
+
+idx_cond_check:
+ if (prebuilt->idx_cond_func) {
+ int res;
+ ibool ib_res;
+ ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE);
+ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ ib_res= row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE,
+ offsets, 0, prebuilt->n_index_fields);
+ /*
+ The above call will fail and return FALSE when requested to
+ store an "externally stored column" (afaiu, a blob). Index
+ Condition Pushdown is not supported for indexes with blob
+ columns, so we should never get this error.
+ */
+ ut_ad(ib_res);
+ res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg);
+ if (res == XTRADB_ICP_NO_MATCH)
+ goto next_rec;
+ else if (res != XTRADB_ICP_MATCH) {
+ err= (res == XTRADB_ICP_ABORTED_BY_USER ?
+ DB_SEARCH_ABORTED_BY_USER :
+ DB_RECORD_NOT_FOUND);
+ goto idx_cond_failed;
+ }
+ /* res == XTRADB_ICP_MATCH */
+ }
+
/* Get the clustered index record if needed, if we did not do the
search using the clustered index. */
+ if (get_clust_rec || (index != clust_index
+ && prebuilt->need_to_access_clustered)) {
- if (index != clust_index && prebuilt->need_to_access_clustered) {
-
-requires_clust_rec:
/* We use a 'goto' to the preceding label if a consistent
read of a secondary index record requires us to look up old
versions of the associated clustered index record. */
@@ -4447,10 +4520,15 @@ requires_clust_rec:
are BLOBs in the fields to be fetched. In HANDLER we do
not cache rows because there the cursor is a scrollable
cursor. */
+ some_fields_in_buffer = (index != clust_index
+ && prebuilt->idx_cond_func);
if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
result_rec != rec,
- offsets)) {
+ offsets,
+ some_fields_in_buffer?
+ prebuilt->n_index_fields : 0,
+ buf)) {
/* Only fresh inserts may contain incomplete
externally stored columns. Pretend that such
records do not exist. Such records may only be
@@ -4491,7 +4569,10 @@ requires_clust_rec:
if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec,
result_rec != rec,
- offsets)) {
+ offsets,
+ prebuilt->idx_cond_func?
+ prebuilt->n_index_fields: 0,
+ prebuilt->n_template)) {
/* Only fresh inserts may contain
incomplete externally stored
columns. Pretend that such records do
@@ -4527,6 +4608,9 @@ got_row:
HANDLER command where the user can move the cursor with PREV or NEXT
even after a unique search. */
+ err = DB_SUCCESS;
+
+idx_cond_failed:
if (!unique_search_from_clust_index
|| prebuilt->select_lock_type != LOCK_NONE
|| prebuilt->used_in_HANDLER) {
@@ -4536,12 +4620,11 @@ got_row:
btr_pcur_store_position(pcur, &mtr);
}
- err = DB_SUCCESS;
-
goto normal_return;
next_rec:
/* Reset the old and new "did semi-consistent read" flags. */
+ get_clust_rec = FALSE;
if (UNIV_UNLIKELY(prebuilt->row_read_type
== ROW_READ_DID_SEMI_CONSISTENT)) {
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index f39d1b8a758..9a142e1ca86 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
+#ifdef __WIN__
+/* Windows native condition variables. We use runtime loading / function
+pointers, because they are not available on Windows Server 2003 and
+Windows XP/2000.
+
+We use condition for events on Windows if possible, even if os_event
+resembles Windows kernel event object well API-wise. The reason is
+performance, kernel objects are heavyweights and WaitForSingleObject() is a
+performance killer causing calling thread to context switch. Besides, Innodb
+is preallocating large number (often millions) of os_events. With kernel event
+objects it takes a big chunk out of non-paged pool, which is better suited
+for tasks like IO than for storing idle event objects. */
+UNIV_INTERN ibool srv_use_native_conditions = FALSE;
+#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL;
@@ -414,7 +428,7 @@ UNIV_INTERN ulong srv_ibuf_accel_rate = 100;
UNIV_INTERN ulint srv_checkpoint_age_target = 0;
UNIV_INTERN ulong srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
-UNIV_INTERN ulong srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+UNIV_INTERN ulong srv_deprecated_enable_unsafe_group_commit = 0;
UNIV_INTERN ulong srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
UNIV_INTERN ulong srv_adaptive_checkpoint = 0; /* 0: none 1: reflex 2: estimate */
@@ -3239,7 +3253,7 @@ retry_flush_batch:
/* Make a new checkpoint about once in 10 seconds */
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(TRUE, FALSE, TRUE);
srv_main_thread_op_info = "reserving kernel mutex";
@@ -3360,7 +3374,7 @@ flush_loop:
srv_main_thread_op_info = "making checkpoint";
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(TRUE, FALSE, TRUE);
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
index cef045d72e1..d002a1bb682 100644
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@@ -1265,6 +1265,7 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95:
case OS_WIN31:
case OS_WINNT:
+ srv_use_native_conditions = FALSE;
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
@@ -1272,24 +1273,26 @@ innobase_start_or_create_for_mysql(void)
os_aio_use_native_aio = FALSE;
break;
- default:
- /* On Win 2000 and XP use async i/o */
- //os_aio_use_native_aio = TRUE;
- os_aio_use_native_aio = FALSE;
- fprintf(stderr,
- "InnoDB: Windows native async i/o is disabled as default.\n"
- "InnoDB: It is not applicable for the current"
- " multi io threads implementation.\n");
- break;
+
+ case OS_WIN2000:
+ case OS_WINXP:
+ /* On 2000 and XP, async IO is available, but no condition variables. */
+ os_aio_use_native_aio = TRUE;
+ srv_use_native_conditions = FALSE;
+ break;
+
+ default:
+ os_aio_use_native_aio = TRUE;
+ srv_use_native_conditions = TRUE;
}
#endif
+
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
@@ -1307,7 +1310,7 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
-#else
+#ifdef _WIN32
} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
os_aio_use_native_aio = FALSE;
@@ -1315,16 +1318,10 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE;
-
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = TRUE;
- srv_n_read_io_threads = srv_n_write_io_threads = 1;
- fprintf(stderr,
- "InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n"
- "InnoDB: Windows native async i/o is enabled.\n"
- "InnoDB: And io threads are restricted.\n");
#endif
} else {
fprintf(stderr,
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index 98bd9e4ac58..7ea3e09036f 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -121,7 +121,7 @@ trx_create(
trx->table_id = ut_dulint_zero;
trx->mysql_thd = NULL;
- trx->active_trans = 0;
+ trx->active_flag = 0;
trx->duplicates = 0;
trx->n_mysql_tables_in_use = 0;