diff options
author | unknown <heikki@donna.mysql.fi> | 2001-08-04 19:36:14 +0300 |
---|---|---|
committer | unknown <heikki@donna.mysql.fi> | 2001-08-04 19:36:14 +0300 |
commit | c67510f0939fbcad4f2f0efcc79272322d5ffa57 (patch) | |
tree | e6dc89cb458f496f2b93e907afb60d3cd886cc18 /innobase/row | |
parent | ee82985a98c2c302e54a81ef586ffc8a867a550c (diff) | |
download | mariadb-git-c67510f0939fbcad4f2f0efcc79272322d5ffa57.tar.gz |
srv0srv.h Support raw disk partitions as data files
srv0start.c Support raw disk partitions as data files
srv0srv.c Support raw disk partitions as data files
row0purge.c < 4 GB rows, doublewrite, hang fixes
row0row.c < 4 GB rows, doublewrite, hang fixes
row0sel.c < 4 GB rows, doublewrite, hang fixes
row0uins.c < 4 GB rows, doublewrite, hang fixes
row0umod.c < 4 GB rows, doublewrite, hang fixes
row0undo.c < 4 GB rows, doublewrite, hang fixes
row0upd.c < 4 GB rows, doublewrite, hang fixes
srv0srv.c < 4 GB rows, doublewrite, hang fixes
srv0start.c < 4 GB rows, doublewrite, hang fixes
sync0rw.c < 4 GB rows, doublewrite, hang fixes
sync0sync.c < 4 GB rows, doublewrite, hang fixes
trx0purge.c < 4 GB rows, doublewrite, hang fixes
trx0rec.c < 4 GB rows, doublewrite, hang fixes
trx0sys.c < 4 GB rows, doublewrite, hang fixes
btr0btr.c < 4 GB rows, doublewrite, hang fixes
btr0cur.c < 4 GB rows, doublewrite, hang fixes
buf0buf.c < 4 GB rows, doublewrite, hang fixes
buf0flu.c < 4 GB rows, doublewrite, hang fixes
buf0rea.c < 4 GB rows, doublewrite, hang fixes
data0data.c < 4 GB rows, doublewrite, hang fixes
fil0fil.c < 4 GB rows, doublewrite, hang fixes
fsp0fsp.c < 4 GB rows, doublewrite, hang fixes
ibuf0ibuf.c < 4 GB rows, doublewrite, hang fixes
lock0lock.c < 4 GB rows, doublewrite, hang fixes
log0log.c < 4 GB rows, doublewrite, hang fixes
log0recv.c < 4 GB rows, doublewrite, hang fixes
os0file.c < 4 GB rows, doublewrite, hang fixes
page0cur.c < 4 GB rows, doublewrite, hang fixes
pars0pars.c < 4 GB rows, doublewrite, hang fixes
rem0cmp.c < 4 GB rows, doublewrite, hang fixes
rem0rec.c < 4 GB rows, doublewrite, hang fixes
row0ins.c < 4 GB rows, doublewrite, hang fixes
row0mysql.c < 4 GB rows, doublewrite, hang fixes
univ.i < 4 GB rows, doublewrite, hang fixes
data0data.ic < 4 GB rows, doublewrite, hang fixes
mach0data.ic < 4 GB rows, doublewrite, hang fixes
rem0rec.ic < 4 GB rows, doublewrite, hang fixes
row0upd.ic < 4 GB rows, doublewrite, hang fixes
trx0rec.ic < 4 GB rows, doublewrite, hang fixes
rem0cmp.h < 4 GB rows, doublewrite, hang fixes
rem0rec.h < 4 GB rows, doublewrite, hang fixes
row0ins.h < 4 GB rows, doublewrite, hang fixes
row0mysql.h < 4 GB rows, doublewrite, hang fixes
row0row.h < 4 GB rows, doublewrite, hang fixes
row0upd.h < 4 GB rows, doublewrite, hang fixes
srv0srv.h < 4 GB rows, doublewrite, hang fixes
sync0sync.h < 4 GB rows, doublewrite, hang fixes
trx0rec.h < 4 GB rows, doublewrite, hang fixes
trx0sys.h < 4 GB rows, doublewrite, hang fixes
trx0types.h < 4 GB rows, doublewrite, hang fixes
trx0undo.h < 4 GB rows, doublewrite, hang fixes
ut0dbg.h < 4 GB rows, doublewrite, hang fixes
ut0ut.h < 4 GB rows, doublewrite, hang fixes
btr0btr.h < 4 GB rows, doublewrite, hang fixes
btr0cur.h < 4 GB rows, doublewrite, hang fixes
buf0buf.h < 4 GB rows, doublewrite, hang fixes
buf0flu.h < 4 GB rows, doublewrite, hang fixes
data0data.h < 4 GB rows, doublewrite, hang fixes
dict0mem.h < 4 GB rows, doublewrite, hang fixes
fil0fil.h < 4 GB rows, doublewrite, hang fixes
fsp0fsp.h < 4 GB rows, doublewrite, hang fixes
os0file.h < 4 GB rows, doublewrite, hang fixes
innobase/include/btr0btr.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/btr0cur.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/buf0buf.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/buf0flu.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/data0data.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/dict0mem.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/fil0fil.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/fsp0fsp.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/os0file.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/rem0cmp.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/rem0rec.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/row0ins.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/row0mysql.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/row0row.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/row0upd.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/sync0sync.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/trx0rec.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/trx0sys.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/trx0types.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/trx0undo.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/ut0dbg.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/ut0ut.h:
< 4 GB rows, doublewrite, hang fixes
innobase/include/data0data.ic:
< 4 GB rows, doublewrite, hang fixes
innobase/include/mach0data.ic:
< 4 GB rows, doublewrite, hang fixes
innobase/include/rem0rec.ic:
< 4 GB rows, doublewrite, hang fixes
innobase/include/row0upd.ic:
< 4 GB rows, doublewrite, hang fixes
innobase/include/trx0rec.ic:
< 4 GB rows, doublewrite, hang fixes
innobase/include/univ.i:
< 4 GB rows, doublewrite, hang fixes
innobase/btr/btr0btr.c:
< 4 GB rows, doublewrite, hang fixes
innobase/btr/btr0cur.c:
< 4 GB rows, doublewrite, hang fixes
innobase/buf/buf0buf.c:
< 4 GB rows, doublewrite, hang fixes
innobase/buf/buf0flu.c:
< 4 GB rows, doublewrite, hang fixes
innobase/buf/buf0rea.c:
< 4 GB rows, doublewrite, hang fixes
innobase/data/data0data.c:
< 4 GB rows, doublewrite, hang fixes
innobase/fil/fil0fil.c:
< 4 GB rows, doublewrite, hang fixes
innobase/fsp/fsp0fsp.c:
< 4 GB rows, doublewrite, hang fixes
innobase/ibuf/ibuf0ibuf.c:
< 4 GB rows, doublewrite, hang fixes
innobase/lock/lock0lock.c:
< 4 GB rows, doublewrite, hang fixes
innobase/log/log0log.c:
< 4 GB rows, doublewrite, hang fixes
innobase/log/log0recv.c:
< 4 GB rows, doublewrite, hang fixes
innobase/os/os0file.c:
< 4 GB rows, doublewrite, hang fixes
innobase/page/page0cur.c:
< 4 GB rows, doublewrite, hang fixes
innobase/pars/pars0pars.c:
< 4 GB rows, doublewrite, hang fixes
innobase/rem/rem0cmp.c:
< 4 GB rows, doublewrite, hang fixes
innobase/rem/rem0rec.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0ins.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0mysql.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0purge.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0row.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0sel.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0uins.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0umod.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0undo.c:
< 4 GB rows, doublewrite, hang fixes
innobase/row/row0upd.c:
< 4 GB rows, doublewrite, hang fixes
innobase/sync/sync0rw.c:
< 4 GB rows, doublewrite, hang fixes
innobase/sync/sync0sync.c:
< 4 GB rows, doublewrite, hang fixes
innobase/trx/trx0purge.c:
< 4 GB rows, doublewrite, hang fixes
innobase/trx/trx0rec.c:
< 4 GB rows, doublewrite, hang fixes
innobase/trx/trx0sys.c:
< 4 GB rows, doublewrite, hang fixes
innobase/srv/srv0srv.c:
Support raw disk partitions as data files
innobase/srv/srv0start.c:
Support raw disk partitions as data files
innobase/include/srv0srv.h:
Support raw disk partitions as data files
Diffstat (limited to 'innobase/row')
-rw-r--r-- | innobase/row/row0ins.c | 91 | ||||
-rw-r--r-- | innobase/row/row0mysql.c | 42 | ||||
-rw-r--r-- | innobase/row/row0purge.c | 99 | ||||
-rw-r--r-- | innobase/row/row0row.c | 34 | ||||
-rw-r--r-- | innobase/row/row0sel.c | 63 | ||||
-rw-r--r-- | innobase/row/row0uins.c | 9 | ||||
-rw-r--r-- | innobase/row/row0umod.c | 121 | ||||
-rw-r--r-- | innobase/row/row0undo.c | 12 | ||||
-rw-r--r-- | innobase/row/row0upd.c | 143 |
9 files changed, 495 insertions, 119 deletions
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index e57622fd1c5..8542dcae326 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -234,7 +234,13 @@ row_ins_clust_index_entry_by_modify( depending on whether mtr holds just a leaf latch or also a tree latch */ btr_cur_t* cursor, /* in: B-tree cursor */ + big_rec_t** big_rec,/* out: possible big rec vector of fields + which have to be stored externally by the + caller */ dtuple_t* entry, /* in: index entry to insert */ + ulint* ext_vec,/* in: array containing field numbers of + externally stored fields in entry, or NULL */ + ulint n_ext_vec,/* in: number of fields in ext_vec */ que_thr_t* thr, /* in: query thread */ mtr_t* mtr) /* in: mtr */ { @@ -243,8 +249,10 @@ row_ins_clust_index_entry_by_modify( upd_t* update; ulint err; - ut_ad((cursor->index)->type & DICT_CLUSTERED); + ut_ad(cursor->index->type & DICT_CLUSTERED); + *big_rec = NULL; + rec = btr_cur_get_rec(cursor); ut_ad(rec_get_deleted_flag(rec)); @@ -254,21 +262,21 @@ row_ins_clust_index_entry_by_modify( /* Build an update vector containing all the fields to be modified; NOTE that this vector may contain also system columns! */ - update = row_upd_build_difference(cursor->index, entry, rec, heap); - + update = row_upd_build_difference(cursor->index, entry, ext_vec, + n_ext_vec, rec, heap); if (mode == BTR_MODIFY_LEAF) { /* Try optimistic updating of the record, keeping changes within the page */ - err = btr_cur_optimistic_update(0, cursor, update, 0, thr, - mtr); - if ((err == DB_OVERFLOW) || (err == DB_UNDERFLOW)) { + err = btr_cur_optimistic_update(0, cursor, update, 0, thr, mtr); + + if (err == DB_OVERFLOW || err == DB_UNDERFLOW) { err = DB_FAIL; } } else { - ut_ad(mode == BTR_MODIFY_TREE); - err = btr_cur_pessimistic_update(0, cursor, update, 0, thr, - mtr); + ut_a(mode == BTR_MODIFY_TREE); + err = btr_cur_pessimistic_update(0, cursor, big_rec, update, + 0, thr, mtr); } mem_heap_free(heap); @@ -597,14 +605,18 @@ row_ins_index_entry_low( pessimistic descent down the index tree */ dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: index entry to insert */ + ulint* ext_vec,/* in: array containing field numbers of + externally stored fields in entry, or NULL */ + ulint n_ext_vec,/* in: number of fields in ext_vec */ que_thr_t* thr) /* in: query thread */ { btr_cur_t cursor; ulint modify; - rec_t* dummy_rec; + rec_t* insert_rec; rec_t* rec; ulint err; ulint n_unique; + big_rec_t* big_rec = NULL; mtr_t mtr; log_free_check(); @@ -682,24 +694,54 @@ row_ins_index_entry_low( if (index->type & DICT_CLUSTERED) { err = row_ins_clust_index_entry_by_modify(mode, - &cursor, entry, - thr, &mtr); + &cursor, &big_rec, + entry, + ext_vec, n_ext_vec, + thr, &mtr); } else { err = row_ins_sec_index_entry_by_modify(&cursor, thr, &mtr); } - } else if (mode == BTR_MODIFY_LEAF) { - err = btr_cur_optimistic_insert(0, &cursor, entry, - &dummy_rec, thr, &mtr); } else { - ut_ad(mode == BTR_MODIFY_TREE); - err = btr_cur_pessimistic_insert(0, &cursor, entry, - &dummy_rec, thr, &mtr); + if (mode == BTR_MODIFY_LEAF) { + err = btr_cur_optimistic_insert(0, &cursor, entry, + &insert_rec, &big_rec, thr, &mtr); + } else { + ut_a(mode == BTR_MODIFY_TREE); + err = btr_cur_pessimistic_insert(0, &cursor, entry, + &insert_rec, &big_rec, thr, &mtr); + } + + if (err == DB_SUCCESS) { + if (ext_vec) { + rec_set_field_extern_bits(insert_rec, + ext_vec, n_ext_vec, &mtr); + } + } } + function_exit: mtr_commit(&mtr); + if (big_rec) { + mtr_start(&mtr); + + btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, &cursor, 0, &mtr); + + err = btr_store_big_rec_extern_fields(index, + btr_cur_get_rec(&cursor), + big_rec, &mtr); + if (modify) { + dtuple_big_rec_free(big_rec); + } else { + dtuple_convert_back_big_rec(index, entry, big_rec); + } + + mtr_commit(&mtr); + } + return(err); } @@ -716,14 +758,17 @@ row_ins_index_entry( DB_DUPLICATE_KEY, or some other error code */ dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: index entry to insert */ + ulint* ext_vec,/* in: array containing field numbers of + externally stored fields in entry, or NULL */ + ulint n_ext_vec,/* in: number of fields in ext_vec */ que_thr_t* thr) /* in: query thread */ { ulint err; /* Try first optimistic descent to the B-tree */ - err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, thr); - + err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, + ext_vec, n_ext_vec, thr); if (err != DB_FAIL) { return(err); @@ -731,8 +776,8 @@ row_ins_index_entry( /* Try then pessimistic descent to the B-tree */ - err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry, thr); - + err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry, + ext_vec, n_ext_vec, thr); return(err); } @@ -784,7 +829,7 @@ row_ins_index_entry_step( ut_ad(dtuple_check_typed(node->entry)); - err = row_ins_index_entry(node->index, node->entry, thr); + err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr); return(err); } diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 8e1a584f667..9bbc45a5c9a 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -625,7 +625,8 @@ row_update_for_mysql( ut_ad(prebuilt && trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - + UT_NOT_USED(mysql_rec); + node = prebuilt->upd_node; clust_index = dict_table_get_first_index(table); @@ -777,7 +778,9 @@ row_get_mysql_key_number_for_index( } /************************************************************************* -Does a table creation operation for MySQL. */ +Does a table creation operation for MySQL. If the name of the created +table ends to characters INNODB_MONITOR, then this also starts +printing of monitor output by the master thread. */ int row_create_table_for_mysql( @@ -789,6 +792,8 @@ row_create_table_for_mysql( tab_node_t* node; mem_heap_t* heap; que_thr_t* thr; + ulint namelen; + ulint keywordlen; ulint err; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); @@ -833,6 +838,20 @@ row_create_table_for_mysql( } trx->error_state = DB_SUCCESS; + } else { + namelen = ut_strlen(table->name); + + keywordlen = ut_strlen("innodb_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_monitor", keywordlen)) { + + /* Table name ends to characters innodb_monitor: + start monitor prints */ + + srv_print_innodb_monitor = TRUE; + } } mutex_exit(&(dict_sys->mutex)); @@ -900,7 +919,9 @@ row_create_index_for_mysql( } /************************************************************************* -Drops a table for MySQL. */ +Drops a table for MySQL. If the name of the dropped table ends to +characters INNODB_MONITOR, then this also stops printing of monitor +output by the master thread. */ int row_drop_table_for_mysql( @@ -918,11 +939,26 @@ row_drop_table_for_mysql( char* str1; char* str2; ulint len; + ulint namelen; + ulint keywordlen; char buf[10000]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_a(name != NULL); + namelen = ut_strlen(name); + keywordlen = ut_strlen("innodb_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(name + namelen - keywordlen, + "innodb_monitor", keywordlen)) { + + /* Table name ends to characters innodb_monitor: + stop monitor prints */ + + srv_print_innodb_monitor = FALSE; + } + /* We use the private SQL parser of Innobase to generate the query graphs needed in deleting the dictionary data from system tables in Innobase. Deleting a row from SYS_INDEXES table also diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index 0a6fabe584c..ec880d3fe04 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -347,20 +347,36 @@ row_purge_del_mark( } /*************************************************************** -Purges an update of an existing record. */ +Purges an update of an existing record. Also purges an update of a delete +marked record if that record contained an externally stored field. */ static void -row_purge_upd_exist( -/*================*/ +row_purge_upd_exist_or_extern( +/*==========================*/ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; + upd_field_t* ufield; + ibool is_insert; + ulint rseg_id; + ulint page_no; + ulint offset; + ulint internal_offset; + byte* data_field; + ulint data_field_len; + ulint i; + mtr_t mtr; ut_ad(node && thr); + if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { + + goto skip_secondaries; + } + heap = mem_heap_create(1024); while (node->index != NULL) { @@ -378,6 +394,53 @@ row_purge_upd_exist( } mem_heap_free(heap); + +skip_secondaries: + /* Free possible externally stored fields */ + for (i = 0; i < upd_get_n_fields(node->update); i++) { + + ufield = upd_get_nth_field(node->update, i); + + if (ufield->extern_storage) { + /* We use the fact that new_val points to + node->undo_rec and get thus the offset of + dfield data inside the unod record. Then we + can calculate from node->roll_ptr the file + address of the new_val data */ + + internal_offset = ((byte*)ufield->new_val.data) + - node->undo_rec; + + ut_a(internal_offset < UNIV_PAGE_SIZE); + + trx_undo_decode_roll_ptr(node->roll_ptr, + &is_insert, &rseg_id, + &page_no, &offset); + mtr_start(&mtr); + + /* We have to acquire an X-latch to the clustered + index tree */ + + index = dict_table_get_first_index(node->table); + + mtr_x_lock(dict_tree_get_lock(index->tree), &mtr); + + /* We assume in purge of externally stored fields + that the space id of the undo log record is 0! */ + + data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr) + + offset + internal_offset; + + buf_page_dbg_add_level(buf_frame_align(data_field), + SYNC_TRX_UNDO_PAGE); + + data_field_len = ufield->new_val.len; + + btr_free_externally_stored_field(index, data_field, + data_field_len, &mtr); + mtr_commit(&mtr); + } + } } /*************************************************************** @@ -388,6 +451,9 @@ row_purge_parse_undo_rec( /*=====================*/ /* out: TRUE if purge operation required */ purge_node_t* node, /* in: row undo node */ + ibool* updated_extern, + /* out: TRUE if an externally stored field + was updated */ que_thr_t* thr) /* in: query thread */ { dict_index_t* clust_index; @@ -403,10 +469,10 @@ row_purge_parse_undo_rec( ut_ad(node && thr); ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, - &undo_no, &table_id); + updated_extern, &undo_no, &table_id); node->rec_type = type; - if (type == TRX_UNDO_UPD_DEL_REC) { + if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) { return(FALSE); } @@ -416,7 +482,7 @@ row_purge_parse_undo_rec( node->table = NULL; if (type == TRX_UNDO_UPD_EXIST_REC - && cmpl_info & UPD_NODE_NO_ORD_CHANGE) { + && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) { /* Purge requires no changes to indexes: we may return */ @@ -455,8 +521,11 @@ row_purge_parse_undo_rec( /* Read to the partial row the fields that occur in indexes */ - ptr = trx_undo_rec_get_partial_row(ptr, clust_index, &(node->row), - node->heap); + if (!cmpl_info & UPD_NODE_NO_ORD_CHANGE) { + ptr = trx_undo_rec_get_partial_row(ptr, clust_index, + &(node->row), node->heap); + } + return(TRUE); } @@ -475,6 +544,7 @@ row_purge( { dulint roll_ptr; ibool purge_needed; + ibool updated_extern; ut_ad(node && thr); @@ -494,7 +564,8 @@ row_purge( if (node->undo_rec == &trx_purge_dummy_rec) { purge_needed = FALSE; } else { - purge_needed = row_purge_parse_undo_rec(node, thr); + purge_needed = row_purge_parse_undo_rec(node, &updated_extern, + thr); } if (purge_needed) { @@ -503,11 +574,13 @@ row_purge( node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); - if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - row_purge_upd_exist(node, thr); - } else { - ut_ad(node->rec_type == TRX_UNDO_DEL_MARK_REC); + if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { row_purge_del_mark(node, thr); + + } else if (updated_extern + || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { + + row_purge_upd_exist_or_extern(node, thr); } if (node->found_clust) { diff --git a/innobase/row/row0row.c b/innobase/row/row0row.c index f85789fa0d6..59169ef2a98 100644 --- a/innobase/row/row0row.c +++ b/innobase/row/row0row.c @@ -146,15 +146,17 @@ row_build_index_entry( /*********************************************************************** An inverse function to dict_row_build_index_entry. Builds a row from a -record in a clustered index. */ +record in a clustered index. NOTE that externally stored (often big) +fields are always copied to heap. */ dtuple_t* row_build( /*======*/ /* out, own: row built; see the NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap as the latter only places pointers to + ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or + ROW_COPY_ALSO_EXTERNALS, + the two last copy also the data fields to + heap as the first only places pointers to data fields on the index page, and thus is more efficient */ dict_index_t* index, /* in: clustered index */ @@ -170,19 +172,19 @@ row_build( { dtuple_t* row; dict_table_t* table; - ulint n_fields; - ulint i; + dict_col_t* col; dfield_t* dfield; + ulint n_fields; byte* field; ulint len; ulint row_len; - dict_col_t* col; byte* buf; + ulint i; ut_ad(index && rec && heap); ut_ad(index->type & DICT_CLUSTERED); - if (type == ROW_COPY_DATA) { + if (type != ROW_COPY_POINTERS) { /* Take a copy of rec to heap */ buf = mem_heap_alloc(heap, rec_get_size(rec)); rec = rec_copy(buf, rec); @@ -207,6 +209,13 @@ row_build( dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); field = rec_get_nth_field(rec, i, &len); + if (type == ROW_COPY_ALSO_EXTERNALS + && rec_get_nth_field_extern_bit(rec, i)) { + + field = btr_rec_copy_externally_stored_field(rec, + i, &len, heap); + } + dfield_set_data(dfield, field, len); } @@ -215,6 +224,7 @@ row_build( return(row); } +#ifdef notdefined /*********************************************************************** An inverse function to dict_row_build_index_entry. Builds a row from a record in a clustered index. */ @@ -229,7 +239,9 @@ row_build_to_tuple( directly into this record, therefore, the buffer page of this record must be at least s-latched and the latch held - as long as the row dtuple is used! */ + as long as the row dtuple is used! + NOTE 2: does not work with externally + stored fields! */ { dict_table_t* table; ulint n_fields; @@ -265,9 +277,11 @@ row_build_to_tuple( ut_ad(dtuple_check_typed(row)); } +#endif /*********************************************************************** -Converts an index record to a typed data tuple. */ +Converts an index record to a typed data tuple. NOTE that externally +stored (often big) fields are NOT copied to heap. */ dtuple_t* row_rec_to_index_entry( diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 5a77fc5daaa..b74bd29a89e 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -2036,7 +2036,8 @@ row_sel_store_mysql_rec( which was described in prebuilt's template */ { - mysql_row_templ_t* templ; + mysql_row_templ_t* templ; + mem_heap_t* extern_field_heap = NULL; byte* data; ulint len; byte* blob_buf; @@ -2059,6 +2060,24 @@ row_sel_store_mysql_rec( data = rec_get_nth_field(rec, templ->rec_field_no, &len); + if (rec_get_nth_field_extern_bit(rec, templ->rec_field_no)) { + /* Copy an externally stored field to the temporary + heap */ + + if (prebuilt->trx->has_search_latch) { + rw_lock_s_unlock(&btr_search_latch); + prebuilt->trx->has_search_latch = FALSE; + } + + extern_field_heap = mem_heap_create(UNIV_PAGE_SIZE); + + data = btr_rec_copy_externally_stored_field(rec, + templ->rec_field_no, &len, + extern_field_heap); + + ut_a(len != UNIV_SQL_NULL); + } + if (len != UNIV_SQL_NULL) { if (templ->type == DATA_BLOB) { @@ -2081,6 +2100,10 @@ row_sel_store_mysql_rec( mysql_rec + templ->mysql_col_offset, templ->mysql_col_len, data, len, templ->type, templ->is_unsigned); + + if (extern_field_heap) { + mem_heap_free(extern_field_heap); + } } else { mysql_rec[templ->mysql_null_byte_offset] |= (byte) (templ->mysql_null_bit_mask); @@ -2450,6 +2473,7 @@ row_search_for_mysql( ibool unique_search_from_clust_index = FALSE; ibool mtr_has_extra_clust_latch = FALSE; ibool moves_up = FALSE; + ulint cnt = 0; mtr_t mtr; ut_ad(index && pcur && search_tuple); @@ -2457,6 +2481,11 @@ row_search_for_mysql( ut_ad(sync_thread_levels_empty_gen(FALSE)); +/* printf("Match mode %lu\n search tuple ", match_mode); + dtuple_print(search_tuple); + + printf("N tables locked %lu\n", trx->mysql_n_tables_locked); +*/ if (direction == 0) { prebuilt->n_rows_fetched = 0; prebuilt->n_fetch_cached = 0; @@ -2528,6 +2557,8 @@ row_search_for_mysql( mtr_commit(&mtr); + /* printf("%s record not found 1\n", index->name); */ + return(DB_RECORD_NOT_FOUND); } @@ -2565,17 +2596,18 @@ row_search_for_mysql( mtr_commit(&mtr); + /* printf("%s shortcut\n", index->name); */ + return(DB_SUCCESS); } else if (shortcut == SEL_EXHAUSTED) { mtr_commit(&mtr); + /* printf("%s record not found 2\n", + index->name); */ return(DB_RECORD_NOT_FOUND); } - - /* Commit the mini-transaction since it can - hold latches */ mtr_commit(&mtr); mtr_start(&mtr); @@ -2659,7 +2691,12 @@ rec_loop: cons_read_requires_clust_rec = FALSE; rec = btr_pcur_get_rec(pcur); - +/* + printf("Using index %s cnt %lu ", index->name, cnt); + printf("; Page no %lu\n", + buf_frame_get_page_no(buf_frame_align(rec))); + rec_print(rec); +*/ if (rec == page_get_infimum_rec(buf_frame_align(rec))) { /* The infimum record on a page cannot be in the result set, @@ -2700,12 +2737,15 @@ rec_loop: /* Test if the index record matches completely to search_tuple in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */ + /* printf("Comparing rec and search tuple\n"); */ + if (0 != cmp_dtuple_rec(search_tuple, rec)) { btr_pcur_store_position(pcur, &mtr); ret = DB_RECORD_NOT_FOUND; - + /* printf("%s record not found 3\n", index->name); */ + goto normal_return; } @@ -2716,6 +2756,7 @@ rec_loop: btr_pcur_store_position(pcur, &mtr); ret = DB_RECORD_NOT_FOUND; + /* printf("%s record not found 4\n", index->name); */ goto normal_return; } @@ -2884,6 +2925,8 @@ next_rec: moved = sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur, moves_up, &mtr); if (moved) { + cnt++; + goto rec_loop; } } @@ -2906,6 +2949,8 @@ next_rec: goto normal_return; } + cnt++; + goto rec_loop; /*-------------------------------------------------------------*/ lock_wait_or_error: @@ -2931,7 +2976,9 @@ lock_wait_or_error: goto rec_loop; } - + + /* printf("Using index %s cnt %lu ret value %lu err\n", index->name, + cnt, err); */ return(err); normal_return: @@ -2945,5 +2992,7 @@ normal_return: ret = DB_SUCCESS; } + /* printf("Using index %s cnt %lu ret value %lu\n", index->name, + cnt, err); */ return(ret); } diff --git a/innobase/row/row0uins.c b/innobase/row/row0uins.c index c9330318ac0..47807877779 100644 --- a/innobase/row/row0uins.c +++ b/innobase/row/row0uins.c @@ -242,11 +242,12 @@ row_undo_ins_parse_undo_rec( dulint table_id; ulint type; ulint dummy; + ibool dummy_extern; ut_ad(node && thr); - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, &undo_no, - &table_id); + ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, + &dummy_extern, &undo_no, &table_id); ut_ad(type == TRX_UNDO_INSERT_REC); node->rec_type = type; @@ -284,9 +285,9 @@ row_undo_ins( row_undo_ins_parse_undo_rec(node, thr); if (node->table == NULL) { - found = FALSE; + found = FALSE; } else { - found = row_undo_search_clust_to_pcur(node, thr); + found = row_undo_search_clust_to_pcur(node, thr); } if (!found) { diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index c8db428bade..0221c51b985 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -94,12 +94,12 @@ row_undo_mod_clust_low( mtr_t* mtr, /* in: mtr */ ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { + big_rec_t* dummy_big_rec; dict_index_t* index; btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; ibool success; - ibool do_remove; index = dict_table_get_first_index(node->table); @@ -110,49 +110,80 @@ row_undo_mod_clust_low( ut_ad(success); + if (mode == BTR_MODIFY_LEAF) { + + err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG + | BTR_KEEP_SYS_FLAG, + btr_cur, node->update, + node->cmpl_info, thr, mtr); + } else { + ut_ad(mode == BTR_MODIFY_TREE); + + err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG + | BTR_KEEP_SYS_FLAG, + btr_cur, &dummy_big_rec, node->update, + node->cmpl_info, thr, mtr); + } + + return(err); +} + +/*************************************************************** +Removes a clustered index record after undo if possible. */ +static +ulint +row_undo_mod_remove_clust_low( +/*==========================*/ + /* out: DB_SUCCESS, DB_FAIL, or error code: + we may run out of file space */ + undo_node_t* node, /* in: row undo node */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr, /* in: mtr */ + ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ +{ + btr_pcur_t* pcur; + btr_cur_t* btr_cur; + ulint err; + ibool success; + + pcur = &(node->pcur); + btr_cur = btr_pcur_get_btr_cur(pcur); + + success = btr_pcur_restore_position(mode, pcur, mtr); + + if (!success) { + + return(DB_SUCCESS); + } + /* Find out if we can remove the whole clustered index record */ if (node->rec_type == TRX_UNDO_UPD_DEL_REC && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { - do_remove = TRUE; + /* Ok, we can remove */ } else { - do_remove = FALSE; + return(DB_SUCCESS); } if (mode == BTR_MODIFY_LEAF) { + success = btr_cur_optimistic_delete(btr_cur, mtr); - if (do_remove) { - success = btr_cur_optimistic_delete(btr_cur, mtr); - - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } + if (success) { + err = DB_SUCCESS; } else { - err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG - | BTR_KEEP_SYS_FLAG, - btr_cur, node->update, - node->cmpl_info, thr, mtr); + err = DB_FAIL; } } else { ut_ad(mode == BTR_MODIFY_TREE); - if (do_remove) { - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr); - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - } else { - err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG - | BTR_KEEP_SYS_FLAG, - btr_cur, node->update, - node->cmpl_info, thr, mtr); - } + /* The delete operation may fail if we have little + file space left: TODO: easiest to crash the database + and restart with more file space */ } return(err); @@ -204,10 +235,31 @@ row_undo_mod_clust( err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE); } - node->state = UNDO_NODE_FETCH_NEXT; - btr_pcur_commit_specify_mtr(pcur, &mtr); + if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) { + + mtr_start(&mtr); + + err = row_undo_mod_remove_clust_low(node, thr, &mtr, + BTR_MODIFY_LEAF); + if (err != DB_SUCCESS) { + btr_pcur_commit_specify_mtr(pcur, &mtr); + + /* We may have to modify tree structure: do a + pessimistic descent down the index tree */ + + mtr_start(&mtr); + + err = row_undo_mod_remove_clust_low(node, thr, &mtr, + BTR_MODIFY_TREE); + } + + btr_pcur_commit_specify_mtr(pcur, &mtr); + } + + node->state = UNDO_NODE_FETCH_NEXT; + trx_undo_rec_release(node->trx, node->undo_no); if (more_vers && err == DB_SUCCESS) { @@ -388,7 +440,6 @@ row_undo_mod_del_unmark_sec( mem_free(err_buf); } else { - btr_cur = btr_pcur_get_btr_cur(&pcur); err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, @@ -546,11 +597,12 @@ row_undo_mod_parse_undo_rec( ulint info_bits; ulint type; ulint cmpl_info; + ibool dummy_extern; ut_ad(node && thr); ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, - &undo_no, &table_id); + &dummy_extern, &undo_no, &table_id); node->rec_type = type; node->table = dict_table_get_on_id(table_id, thr_get_trx(thr)); @@ -598,10 +650,9 @@ row_undo_mod( row_undo_mod_parse_undo_rec(node, thr); if (node->table == NULL) { - found = FALSE; + found = FALSE; } else { - - found = row_undo_search_clust_to_pcur(node, thr); + found = row_undo_search_clust_to_pcur(node, thr); } if (!found) { diff --git a/innobase/row/row0undo.c b/innobase/row/row0undo.c index 10ac3af6de9..5119254f405 100644 --- a/innobase/row/row0undo.c +++ b/innobase/row/row0undo.c @@ -124,6 +124,8 @@ row_undo_node_create( undo->state = UNDO_NODE_FETCH_NEXT; undo->trx = trx; + btr_pcur_init(&(undo->pcur)); + undo->heap = mem_heap_create(256); return(undo); @@ -303,6 +305,16 @@ row_undo_step( if (err != DB_SUCCESS) { /* SQL error detected */ + fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n", err); + + if (err == DB_OUT_OF_FILE_SPACE) { + fprintf(stderr, + "InnoDB: Error 13 means out of tablespace.\n" + "InnoDB: Consider increasing your tablespace.\n"); + + exit(1); + } + ut_a(0); return(NULL); diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 5bca2a24c01..d339474df61 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -90,8 +90,10 @@ upd_node_create( node->in_mysql_interface = FALSE; node->row = NULL; + node->ext_vec = NULL; node->index = NULL; - + node->update = NULL; + node->select = NULL; node->heap = mem_heap_create(128); @@ -160,7 +162,8 @@ row_upd_index_entry_sys_field( } /*************************************************************** -Returns TRUE if row update changes size of some field in index. */ +Returns TRUE if row update changes size of some field in index +or if some field to be updated is stored externally in rec or update. */ ibool row_upd_changes_field_size( @@ -199,6 +202,16 @@ row_upd_changes_field_size( return(TRUE); } + + if (rec_get_nth_field_extern_bit(rec, upd_field->field_no)) { + + return(TRUE); + } + + if (upd_field->extern_storage) { + + return(TRUE); + } } return(FALSE); @@ -441,6 +454,34 @@ row_upd_index_parse( return(ptr); } + +/******************************************************************* +Returns TRUE if ext_vec contains i. */ +UNIV_INLINE +ibool +upd_ext_vec_contains( +/*=================*/ + /* out: TRUE if i is in ext_vec */ + ulint* ext_vec, /* in: array of indexes or NULL */ + ulint n_ext_vec, /* in: number of numbers in ext_vec */ + ulint i) /* in: a number */ +{ + ulint j; + + if (ext_vec == NULL) { + + return(FALSE); + } + + for (j = 0; j < n_ext_vec; j++) { + if (ext_vec[j] == i) { + + return(TRUE); + } + } + + return(FALSE); +} /******************************************************************* Builds an update vector from those fields, excluding the roll ptr and @@ -454,6 +495,9 @@ row_upd_build_difference( fields, excluding roll ptr and trx id */ dict_index_t* index, /* in: clustered index */ dtuple_t* entry, /* in: entry to insert */ + ulint* ext_vec,/* in: array containing field numbers of + externally stored fields in entry, or NULL */ + ulint n_ext_vec,/* in: number of fields in ext_vec */ rec_t* rec, /* in: clustered index record */ mem_heap_t* heap) /* in: memory heap from which allocated */ { @@ -480,16 +524,25 @@ row_upd_build_difference( for (i = 0; i < dtuple_get_n_fields(entry); i++) { data = rec_get_nth_field(rec, i, &len); + dfield = dtuple_get_nth_field(entry, i); - if ((i != trx_id_pos) && (i != roll_ptr_pos) - && !dfield_data_is_equal(dfield, len, data)) { + if ((rec_get_nth_field_extern_bit(rec, i) + != upd_ext_vec_contains(ext_vec, n_ext_vec, i)) + || ((i != trx_id_pos) && (i != roll_ptr_pos) + && !dfield_data_is_equal(dfield, len, data))) { upd_field = upd_get_nth_field(update, n_diff); dfield_copy(&(upd_field->new_val), dfield); upd_field_set_field_no(upd_field, i, index); + + if (upd_ext_vec_contains(ext_vec, n_ext_vec, i)) { + upd_field->extern_storage = TRUE; + } else { + upd_field->extern_storage = FALSE; + } n_diff++; } @@ -630,9 +683,7 @@ row_upd_changes_ord_field( } /*************************************************************** -Checks if an update vector changes an ordering field of an index record. -This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. */ +Checks if an update vector changes an ordering field of an index record. */ ibool row_upd_changes_some_index_ord_field( @@ -642,19 +693,24 @@ row_upd_changes_some_index_ord_field( dict_table_t* table, /* in: table */ upd_t* update) /* in: update vector for the row */ { + upd_field_t* upd_field; dict_index_t* index; - + ulint i; + index = dict_table_get_first_index(table); - while (index) { - if (row_upd_changes_ord_field(NULL, index, update)) { + for (i = 0; i < upd_get_n_fields(update); i++) { - return(TRUE); - } + upd_field = upd_get_nth_field(update, i); - index = dict_table_get_next_index(index); - } + if (dict_field_get_col(dict_index_get_nth_field(index, + upd_field->field_no)) + ->ord_part) { + return(TRUE); + } + } + return(FALSE); } @@ -710,15 +766,17 @@ row_upd_eval_new_vals( /*************************************************************** Stores to the heap the row on which the node->pcur is positioned. */ -UNIV_INLINE +static void row_upd_store_row( /*==============*/ upd_node_t* node) /* in: row update node */ { dict_index_t* clust_index; + upd_t* update; + rec_t* rec; - ut_ad((node->pcur)->latch_mode != BTR_NO_LATCHES); + ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES); if (node->row != NULL) { mem_heap_empty(node->heap); @@ -727,8 +785,20 @@ row_upd_store_row( clust_index = dict_table_get_first_index(node->table); - node->row = row_build(ROW_COPY_DATA, clust_index, - btr_pcur_get_rec(node->pcur), node->heap); + rec = btr_pcur_get_rec(node->pcur); + + node->row = row_build(ROW_COPY_DATA, clust_index, rec, node->heap); + + node->ext_vec = mem_heap_alloc(node->heap, rec_get_n_fields(rec)); + + if (node->is_delete) { + update = NULL; + } else { + update = node->update; + } + + node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec, + rec, update); } /*************************************************************** @@ -812,7 +882,7 @@ row_upd_sec_index_entry( row_upd_index_replace_new_col_vals(entry, index, node->update); /* Insert new index entry */ - err = row_ins_index_entry(index, entry, thr); + err = row_ins_index_entry(index, entry, NULL, 0, thr); mem_heap_free(heap); @@ -870,6 +940,8 @@ row_upd_clust_rec_by_insert( dict_table_t* table; mem_heap_t* heap; dtuple_t* entry; + ulint* ext_vec; + ulint n_ext_vec; ulint err; ut_ad(node); @@ -897,14 +969,18 @@ row_upd_clust_rec_by_insert( heap = mem_heap_create(1024); + ext_vec = mem_heap_alloc(heap, + sizeof(ulint) * dtuple_get_n_fields(node->row)); + n_ext_vec = 0; + entry = row_build_index_entry(node->row, index, heap); row_upd_clust_index_replace_new_col_vals(entry, node->update); - + row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); - err = row_ins_index_entry(index, entry, thr); - + err = row_ins_index_entry(index, entry, node->ext_vec, + node->n_ext_vec, thr); mem_heap_free(heap); return(err); @@ -924,6 +1000,7 @@ row_upd_clust_rec( que_thr_t* thr, /* in: query thread */ mtr_t* mtr) /* in: mtr; gets committed here */ { + big_rec_t* big_rec = NULL; btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; @@ -973,9 +1050,24 @@ row_upd_clust_rec( ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, - node->update, node->cmpl_info, thr, mtr); + &big_rec, node->update, + node->cmpl_info, thr, mtr); mtr_commit(mtr); + if (err == DB_SUCCESS && big_rec) { + mtr_start(mtr); + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); + + err = btr_store_big_rec_extern_fields(index, + btr_cur_get_rec(btr_cur), + big_rec, mtr); + mtr_commit(mtr); + } + + if (big_rec) { + dtuple_big_rec_free(big_rec); + } + return(err); } @@ -1194,10 +1286,12 @@ row_upd( ut_ad(node && thr); if (node->in_mysql_interface) { + /* We do not get the cmpl_info value from the MySQL interpreter: we must calculate it on the fly: */ - if (row_upd_changes_some_index_ord_field(node->table, + if (node->is_delete || + row_upd_changes_some_index_ord_field(node->table, node->update)) { node->cmpl_info = 0; } else { @@ -1239,6 +1333,7 @@ function_exit: if (node->row != NULL) { mem_heap_empty(node->heap); node->row = NULL; + node->n_ext_vec = 0; } node->state = UPD_NODE_UPDATE_CLUSTERED; |