diff options
Diffstat (limited to 'innobase/row/row0upd.c')
-rw-r--r-- | innobase/row/row0upd.c | 1394 |
1 files changed, 1394 insertions, 0 deletions
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c new file mode 100644 index 00000000000..44843494247 --- /dev/null +++ b/innobase/row/row0upd.c @@ -0,0 +1,1394 @@ +/****************************************************** +Update of a row + +(c) 1996 Innobase Oy + +Created 12/27/1996 Heikki Tuuri +*******************************************************/ + +#include "row0upd.h" + +#ifdef UNIV_NONINL +#include "row0upd.ic" +#endif + +#include "dict0dict.h" +#include "dict0boot.h" +#include "dict0crea.h" +#include "mach0data.h" +#include "trx0undo.h" +#include "btr0btr.h" +#include "btr0cur.h" +#include "que0que.h" +#include "row0ins.h" +#include "row0sel.h" +#include "row0row.h" +#include "rem0cmp.h" +#include "lock0lock.h" +#include "log0log.h" +#include "pars0sym.h" +#include "eval0eval.h" + + +/* What kind of latch and lock can we assume when the control comes to + ------------------------------------------------------------------- +an update node? +-------------- +Efficiency of massive updates would require keeping an x-latch on a +clustered index page through many updates, and not setting an explicit +x-lock on clustered index records, as they anyway will get an implicit +x-lock when they are updated. A problem is that the read nodes in the +graph should know that they must keep the latch when passing the control +up to the update node, and not set any record lock on the record which +will be updated. Another problem occurs if the execution is stopped, +as the kernel switches to another query thread, or the transaction must +wait for a lock. Then we should be able to release the latch and, maybe, +acquire an explicit x-lock on the record. + Because this seems too complicated, we conclude that the less +efficient solution of releasing all the latches when the control is +transferred to another node, and acquiring explicit x-locks, is better. */ + +/* How is a delete performed? If there is a delete without an +explicit cursor, i.e., a searched delete, there are at least +two different situations: +the implicit select cursor may run on (1) the clustered index or +on (2) a secondary index. The delete is performed by setting +the delete bit in the record and substituting the id of the +deleting transaction for the original trx id, and substituting a +new roll ptr for previous roll ptr. The old trx id and roll ptr +are saved in the undo log record. Thus, no physical changes occur +in the index tree structure at the time of the delete. Only +when the undo log is purged, the index records will be physically +deleted from the index trees. + +The query graph executing a searched delete would consist of +a delete node which has as a subtree a select subgraph. +The select subgraph should return a (persistent) cursor +in the clustered index, placed on page which is x-latched. +The delete node should look for all secondary index records for +this clustered index entry and mark them as deleted. When is +the x-latch freed? The most efficient way for performing a +searched delete is obviously to keep the x-latch for several +steps of query graph execution. */ + +/************************************************************************* +Creates an update node for a query graph. */ + +upd_node_t* +upd_node_create( +/*============*/ + /* out, own: update node */ + mem_heap_t* heap) /* in: mem heap where created */ +{ + upd_node_t* node; + + node = mem_heap_alloc(heap, sizeof(upd_node_t)); + node->common.type = QUE_NODE_UPDATE; + + node->state = UPD_NODE_UPDATE_CLUSTERED; + node->select_will_do_update = FALSE; + node->in_mysql_interface = FALSE; + + node->row = NULL; + node->index = NULL; + + node->select = NULL; + + node->heap = mem_heap_create(128); + node->magic_n = UPD_NODE_MAGIC_N; + + node->cmpl_info = 0; + + return(node); +} + +/************************************************************************* +Updates the trx id and roll ptr field in a clustered index record in database +recovery. */ + +void +row_upd_rec_sys_fields_in_recovery( +/*===============================*/ + rec_t* rec, /* in: record */ + ulint pos, /* in: TRX_ID position in rec */ + dulint trx_id, /* in: transaction id */ + dulint roll_ptr)/* in: roll ptr of the undo log record */ +{ + byte* field; + ulint len; + + field = rec_get_nth_field(rec, pos, &len); + ut_ad(len == DATA_TRX_ID_LEN); + trx_write_trx_id(field, trx_id); + + field = rec_get_nth_field(rec, pos + 1, &len); + ut_ad(len == DATA_ROLL_PTR_LEN); + trx_write_roll_ptr(field, roll_ptr); +} + +/************************************************************************* +Sets the trx id or roll ptr field of a clustered index entry. */ + +void +row_upd_index_entry_sys_field( +/*==========================*/ + dtuple_t* entry, /* in: index entry, where the memory buffers + for sys fields are already allocated: + the function just copies the new values to + them */ + dict_index_t* index, /* in: clustered index */ + ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ + dulint val) /* in: value to write */ +{ + dfield_t* dfield; + byte* field; + ulint pos; + + ut_ad(index->type & DICT_CLUSTERED); + + pos = dict_index_get_sys_col_pos(index, type); + + dfield = dtuple_get_nth_field(entry, pos); + field = dfield_get_data(dfield); + + if (type == DATA_TRX_ID) { + trx_write_trx_id(field, val); + } else { + ut_ad(type == DATA_ROLL_PTR); + trx_write_roll_ptr(field, val); + } +} + +/*************************************************************** +Returns TRUE if row update changes size of some field in index. */ + +ibool +row_upd_changes_field_size( +/*=======================*/ + /* out: TRUE if the update changes the size of + some field in index */ + rec_t* rec, /* in: record in clustered index */ + dict_index_t* index, /* in: clustered index */ + upd_t* update) /* in: update vector */ +{ + upd_field_t* upd_field; + dfield_t* new_val; + ulint old_len; + ulint new_len; + ulint n_fields; + ulint i; + + ut_ad(index->type & DICT_CLUSTERED); + + n_fields = upd_get_n_fields(update); + + for (i = 0; i < n_fields; i++) { + upd_field = upd_get_nth_field(update, i); + + new_val = &(upd_field->new_val); + new_len = new_val->len; + + if (new_len == UNIV_SQL_NULL) { + new_len = dtype_get_sql_null_size( + dict_index_get_nth_type(index, i)); + } + + old_len = rec_get_nth_field_size(rec, upd_field->field_no); + + if (old_len != new_len) { + + return(TRUE); + } + } + + return(FALSE); +} + +/*************************************************************** +Replaces the new column values stored in the update vector to the record +given. No field size changes are allowed. This function is used only for +a clustered index */ + +void +row_upd_rec_in_place( +/*=================*/ + rec_t* rec, /* in/out: record where replaced */ + upd_t* update) /* in: update vector */ +{ + upd_field_t* upd_field; + dfield_t* new_val; + ulint n_fields; + ulint i; + + rec_set_info_bits(rec, update->info_bits); + + n_fields = upd_get_n_fields(update); + + for (i = 0; i < n_fields; i++) { + upd_field = upd_get_nth_field(update, i); + new_val = &(upd_field->new_val); + + rec_set_nth_field(rec, upd_field->field_no, + dfield_get_data(new_val), + dfield_get_len(new_val)); + } +} + +/************************************************************************* +Writes into the redo log the values of trx id and roll ptr and enough info +to determine their positions within a clustered index record. */ + +byte* +row_upd_write_sys_vals_to_log( +/*==========================*/ + /* out: new pointer to mlog */ + dict_index_t* index, /* in: clustered index */ + trx_t* trx, /* in: transaction */ + dulint roll_ptr,/* in: roll ptr of the undo log record */ + byte* log_ptr,/* pointer to a buffer of size > 20 opened + in mlog */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(mtr); + + log_ptr += mach_write_compressed(log_ptr, + dict_index_get_sys_col_pos(index, DATA_TRX_ID)); + + trx_write_roll_ptr(log_ptr, roll_ptr); + log_ptr += DATA_ROLL_PTR_LEN; + + log_ptr += mach_dulint_write_compressed(log_ptr, trx->id); + + return(log_ptr); +} + +/************************************************************************* +Parses the log data of system field values. */ + +byte* +row_upd_parse_sys_vals( +/*===================*/ + /* out: log data end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + ulint* pos, /* out: TRX_ID position in record */ + dulint* trx_id, /* out: trx id */ + dulint* roll_ptr)/* out: roll ptr */ +{ + ptr = mach_parse_compressed(ptr, end_ptr, pos); + + if (ptr == NULL) { + + return(NULL); + } + + if (end_ptr < ptr + DATA_ROLL_PTR_LEN) { + + return(NULL); + } + + *roll_ptr = trx_read_roll_ptr(ptr); + ptr += DATA_ROLL_PTR_LEN; + + ptr = mach_dulint_parse_compressed(ptr, end_ptr, trx_id); + + return(ptr); +} + +/*************************************************************** +Writes to the redo log the new values of the fields occurring in the index. */ + +void +row_upd_index_write_log( +/*====================*/ + upd_t* update, /* in: update vector */ + byte* log_ptr,/* in: pointer to mlog buffer: must contain at least + MLOG_BUF_MARGIN bytes of free space; the buffer is + closed within this function */ + mtr_t* mtr) /* in: mtr into whose log to write */ +{ + upd_field_t* upd_field; + dfield_t* new_val; + ulint len; + ulint n_fields; + byte* buf_end; + ulint i; + + n_fields = upd_get_n_fields(update); + + buf_end = log_ptr + MLOG_BUF_MARGIN; + + mach_write_to_1(log_ptr, update->info_bits); + log_ptr++; + log_ptr += mach_write_compressed(log_ptr, n_fields); + + for (i = 0; i < n_fields; i++) { + + ut_ad(MLOG_BUF_MARGIN > 30); + + if (log_ptr + 30 > buf_end) { + mlog_close(mtr, log_ptr); + + log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN); + buf_end = log_ptr + MLOG_BUF_MARGIN; + } + + upd_field = upd_get_nth_field(update, i); + + new_val = &(upd_field->new_val); + + len = new_val->len; + + log_ptr += mach_write_compressed(log_ptr, upd_field->field_no); + log_ptr += mach_write_compressed(log_ptr, len); + + if (len != UNIV_SQL_NULL) { + if (log_ptr + len < buf_end) { + ut_memcpy(log_ptr, new_val->data, len); + + log_ptr += len; + } else { + mlog_close(mtr, log_ptr); + + mlog_catenate_string(mtr, new_val->data, len); + + log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN); + buf_end = log_ptr + MLOG_BUF_MARGIN; + } + } + } + + mlog_close(mtr, log_ptr); +} + +/************************************************************************* +Parses the log data written by row_upd_index_write_log. */ + +byte* +row_upd_index_parse( +/*================*/ + /* out: log data end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + mem_heap_t* heap, /* in: memory heap where update vector is + built */ + upd_t** update_out)/* out: update vector */ +{ + upd_t* update; + upd_field_t* upd_field; + dfield_t* new_val; + ulint len; + ulint n_fields; + byte* buf; + ulint info_bits; + ulint i; + + if (end_ptr < ptr + 1) { + + return(NULL); + } + + info_bits = mach_read_from_1(ptr); + ptr++; + ptr = mach_parse_compressed(ptr, end_ptr, &n_fields); + + if (ptr == NULL) { + + return(NULL); + } + + update = upd_create(n_fields, heap); + update->info_bits = info_bits; + + for (i = 0; i < n_fields; i++) { + upd_field = upd_get_nth_field(update, i); + new_val = &(upd_field->new_val); + + ptr = mach_parse_compressed(ptr, end_ptr, + &(upd_field->field_no)); + if (ptr == NULL) { + + return(NULL); + } + + ptr = mach_parse_compressed(ptr, end_ptr, &len); + + if (ptr == NULL) { + + return(NULL); + } + + new_val->len = len; + + if (len != UNIV_SQL_NULL) { + + if (end_ptr < ptr + len) { + + return(NULL); + } else { + buf = mem_heap_alloc(heap, len); + ut_memcpy(buf, ptr, len); + + ptr += len; + + new_val->data = buf; + } + } + } + + *update_out = update; + + return(ptr); +} + +/******************************************************************* +Builds an update vector from those fields, excluding the roll ptr and +trx id fields, which in an index entry differ from a record that has +the equal ordering fields. */ + +upd_t* +row_upd_build_difference( +/*=====================*/ + /* out, own: update vector of differing + fields, excluding roll ptr and trx id */ + dict_index_t* index, /* in: clustered index */ + dtuple_t* entry, /* in: entry to insert */ + rec_t* rec, /* in: clustered index record */ + mem_heap_t* heap) /* in: memory heap from which allocated */ +{ + upd_field_t* upd_field; + dfield_t* dfield; + byte* data; + ulint len; + upd_t* update; + ulint n_diff; + ulint roll_ptr_pos; + ulint trx_id_pos; + ulint i; + + /* This function is used only for a clustered index */ + ut_ad(index->type & DICT_CLUSTERED); + + update = upd_create(dtuple_get_n_fields(entry), heap); + + n_diff = 0; + + roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR); + trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); + + for (i = 0; i < dtuple_get_n_fields(entry); i++) { + + data = rec_get_nth_field(rec, i, &len); + dfield = dtuple_get_nth_field(entry, i); + + if ((i != trx_id_pos) && (i != roll_ptr_pos) + && !dfield_data_is_equal(dfield, len, data)) { + + upd_field = upd_get_nth_field(update, n_diff); + + dfield_copy(&(upd_field->new_val), dfield); + + upd_field_set_field_no(upd_field, i, index); + + n_diff++; + } + } + + update->n_fields = n_diff; + + return(update); +} + +/*************************************************************** +Replaces the new column values stored in the update vector to the index entry +given. */ + +void +row_upd_index_replace_new_col_vals( +/*===============================*/ + dtuple_t* entry, /* in/out: index entry where replaced */ + dict_index_t* index, /* in: index; NOTE that may also be a + non-clustered index */ + upd_t* update) /* in: update vector */ +{ + upd_field_t* upd_field; + dfield_t* dfield; + dfield_t* new_val; + ulint field_no; + dict_index_t* clust_index; + ulint i; + + ut_ad(index); + + clust_index = dict_table_get_first_index(index->table); + + dtuple_set_info_bits(entry, update->info_bits); + + for (i = 0; i < upd_get_n_fields(update); i++) { + + upd_field = upd_get_nth_field(update, i); + + field_no = dict_index_get_nth_col_pos(index, + dict_index_get_nth_col_no(clust_index, + upd_field->field_no)); + if (field_no != ULINT_UNDEFINED) { + dfield = dtuple_get_nth_field(entry, field_no); + + new_val = &(upd_field->new_val); + + dfield_set_data(dfield, new_val->data, new_val->len); + } + } +} + +/*************************************************************** +Replaces the new column values stored in the update vector to the +clustered index entry given. */ + +void +row_upd_clust_index_replace_new_col_vals( +/*=====================================*/ + dtuple_t* entry, /* in/out: index entry where replaced */ + upd_t* update) /* in: update vector */ +{ + upd_field_t* upd_field; + dfield_t* dfield; + dfield_t* new_val; + ulint field_no; + ulint i; + + dtuple_set_info_bits(entry, update->info_bits); + + for (i = 0; i < upd_get_n_fields(update); i++) { + + upd_field = upd_get_nth_field(update, i); + + field_no = upd_field->field_no; + + dfield = dtuple_get_nth_field(entry, field_no); + + new_val = &(upd_field->new_val); + + dfield_set_data(dfield, new_val->data, new_val->len); + } +} + +/*************************************************************** +Checks if an update vector changes an ordering field of an index record. +This function is fast if the update vector is short or the number of ordering +fields in the index is small. Otherwise, this can be quadratic. */ + +ibool +row_upd_changes_ord_field( +/*======================*/ + /* out: TRUE if update vector changes + an ordering field in the index record */ + dtuple_t* row, /* in: old value of row, or NULL if the + row and the data values in update are not + known when this function is called, e.g., at + compile time */ + dict_index_t* index, /* in: index of the record */ + upd_t* update) /* in: update vector for the row */ +{ + upd_field_t* upd_field; + dict_field_t* ind_field; + dict_col_t* col; + ulint n_unique; + ulint n_upd_fields; + ulint col_pos; + ulint col_no; + ulint i, j; + + ut_ad(update && index); + + n_unique = dict_index_get_n_unique(index); + n_upd_fields = upd_get_n_fields(update); + + for (i = 0; i < n_unique; i++) { + + ind_field = dict_index_get_nth_field(index, i); + col = dict_field_get_col(ind_field); + col_pos = dict_col_get_clust_pos(col); + col_no = dict_col_get_no(col); + + for (j = 0; j < n_upd_fields; j++) { + + upd_field = upd_get_nth_field(update, j); + + if (col_pos == upd_field->field_no + && (row == NULL + || !dfield_datas_are_equal( + dtuple_get_nth_field(row, col_no), + &(upd_field->new_val)))) { + return(TRUE); + } + } + } + + return(FALSE); +} + +/*************************************************************** +Checks if an update vector changes an ordering field of an index record. +This function is fast if the update vector is short or the number of ordering +fields in the index is small. Otherwise, this can be quadratic. */ + +ibool +row_upd_changes_some_index_ord_field( +/*=================================*/ + /* out: TRUE if update vector may change + an ordering field in an index record */ + dict_table_t* table, /* in: table */ + upd_t* update) /* in: update vector for the row */ +{ + dict_index_t* index; + + index = dict_table_get_first_index(table); + + while (index) { + if (row_upd_changes_ord_field(NULL, index, update)) { + + return(TRUE); + } + + index = dict_table_get_next_index(index); + } + + return(FALSE); +} + +/************************************************************************* +Copies the column values from a record. */ +UNIV_INLINE +void +row_upd_copy_columns( +/*=================*/ + rec_t* rec, /* in: record in a clustered index */ + sym_node_t* column) /* in: first column in a column list, or + NULL */ +{ + byte* data; + ulint len; + + while (column) { + data = rec_get_nth_field(rec, + column->field_nos[SYM_CLUST_FIELD_NO], + &len); + eval_node_copy_and_alloc_val(column, data, len); + + column = UT_LIST_GET_NEXT(col_var_list, column); + } +} + +/************************************************************************* +Calculates the new values for fields to update. Note that row_upd_copy_columns +must have been called first. */ +UNIV_INLINE +void +row_upd_eval_new_vals( +/*==================*/ + upd_t* update) /* in: update vector */ +{ + que_node_t* exp; + upd_field_t* upd_field; + ulint n_fields; + ulint i; + + n_fields = upd_get_n_fields(update); + + for (i = 0; i < n_fields; i++) { + upd_field = upd_get_nth_field(update, i); + + exp = upd_field->exp; + + eval_exp(exp); + + dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp)); + } +} + +/*************************************************************** +Stores to the heap the row on which the node->pcur is positioned. */ +UNIV_INLINE +void +row_upd_store_row( +/*==============*/ + upd_node_t* node) /* in: row update node */ +{ + dict_index_t* clust_index; + + ut_ad((node->pcur)->latch_mode != BTR_NO_LATCHES); + + if (node->row != NULL) { + mem_heap_empty(node->heap); + node->row = NULL; + } + + clust_index = dict_table_get_first_index(node->table); + + node->row = row_build(ROW_COPY_DATA, clust_index, + btr_pcur_get_rec(node->pcur), node->heap); +} + +/*************************************************************** +Updates a secondary index entry of a row. */ +static +ulint +row_upd_sec_index_entry( +/*====================*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code or DB_LOCK_WAIT */ + upd_node_t* node, /* in: row update node */ + que_thr_t* thr) /* in: query thread */ +{ + ibool found; + dict_index_t* index; + dtuple_t* entry; + mtr_t mtr; + btr_pcur_t pcur; + btr_cur_t* btr_cur; + mem_heap_t* heap; + rec_t* rec; + ulint err; + + index = node->index; + + heap = mem_heap_create(1024); + + /* Build old index entry */ + entry = row_build_index_entry(node->row, index, heap); + + log_free_check(); + mtr_start(&mtr); + + found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, + &mtr); + ut_ad(found); + + btr_cur = btr_pcur_get_btr_cur(&pcur); + + rec = btr_cur_get_rec(btr_cur); + + /* Delete mark the old index record; it can already be delete marked if + we return after a lock wait in row_ins_index_entry below */ + + if (!rec_get_deleted_flag(rec)) { + err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, + &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + if (node->is_delete || (err != DB_SUCCESS)) { + + mem_heap_free(heap); + + return(err); + } + + /* Build a new index entry */ + row_upd_index_replace_new_col_vals(entry, index, node->update); + + /* Insert new index entry */ + err = row_ins_index_entry(index, entry, thr); + + mem_heap_free(heap); + + return(err); +} + +/*************************************************************** +Updates secondary index record if it is changed in the row update. This +should be quite rare in database applications. */ +UNIV_INLINE +ulint +row_upd_sec_step( +/*=============*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code or DB_LOCK_WAIT */ + upd_node_t* node, /* in: row update node */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + + ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC) + || (node->state == UPD_NODE_UPDATE_SOME_SEC)); + ut_ad(!(node->index->type & DICT_CLUSTERED)); + + if ((node->state == UPD_NODE_UPDATE_ALL_SEC) + || row_upd_changes_ord_field(node->row, node->index, + node->update)) { + err = row_upd_sec_index_entry(node, thr); + + return(err); + } + + return(DB_SUCCESS); +} + +/*************************************************************** +Marks the clustered index record deleted and inserts the updated version +of the record to the index. This function should be used when the ordering +fields of the clustered index record change. This should be quite rare in +database applications. */ +static +ulint +row_upd_clust_rec_by_insert( +/*========================*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code or DB_LOCK_WAIT */ + upd_node_t* node, /* in: row update node */ + dict_index_t* index, /* in: clustered index of the record */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr; gets committed here */ +{ + btr_pcur_t* pcur; + btr_cur_t* btr_cur; + trx_t* trx; + dict_table_t* table; + mem_heap_t* heap; + dtuple_t* entry; + ulint err; + + ut_ad(node); + ut_ad(index->type & DICT_CLUSTERED); + + trx = thr_get_trx(thr); + table = node->table; + pcur = node->pcur; + btr_cur = btr_pcur_get_btr_cur(pcur); + + if (node->state != UPD_NODE_INSERT_CLUSTERED) { + + err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, + btr_cur, TRUE, thr, mtr); + if (err != DB_SUCCESS) { + mtr_commit(mtr); + + return(err); + } + } + + mtr_commit(mtr); + + node->state = UPD_NODE_INSERT_CLUSTERED; + + heap = mem_heap_create(1024); + + entry = row_build_index_entry(node->row, index, heap); + + row_upd_clust_index_replace_new_col_vals(entry, node->update); + + row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); + + err = row_ins_index_entry(index, entry, thr); + + mem_heap_free(heap); + + return(err); +} + +/*************************************************************** +Updates a clustered index record of a row when the ordering fields do +not change. */ +static +ulint +row_upd_clust_rec( +/*==============*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code or DB_LOCK_WAIT */ + upd_node_t* node, /* in: row update node */ + dict_index_t* index, /* in: clustered index */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr; gets committed here */ +{ + btr_pcur_t* pcur; + btr_cur_t* btr_cur; + ulint err; + + ut_ad(node); + ut_ad(index->type & DICT_CLUSTERED); + + pcur = node->pcur; + btr_cur = btr_pcur_get_btr_cur(pcur); + + ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + + /* Try optimistic updating of the record, keeping changes within + the page; we do not check locks because we assume the x-lock on the + record to update */ + + if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) { + err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, + btr_cur, node->update, + node->cmpl_info, thr, mtr); + } else { + err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG, + btr_cur, node->update, + node->cmpl_info, thr, mtr); + } + + mtr_commit(mtr); + + if (err == DB_SUCCESS) { + + return(err); + } + + /* We may have to modify the tree structure: do a pessimistic descent + down the index tree */ + + mtr_start(mtr); + + /* NOTE: this transaction has an s-lock or x-lock on the record and + therefore other transactions cannot modify the record when we have no + latch on the page. In addition, we assume that other query threads of + the same transaction do not modify the record in the meantime. + Therefore we can assert that the restoration of the cursor succeeds. */ + + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); + + ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + + err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, + node->update, node->cmpl_info, thr, mtr); + mtr_commit(mtr); + + return(err); +} + +/*************************************************************** +Delete marks a clustered index record. */ +static +ulint +row_upd_del_mark_clust_rec( +/*=======================*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code or DB_LOCK_WAIT */ + upd_node_t* node, /* in: row update node */ + dict_index_t* index, /* in: clustered index */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr; gets committed here */ +{ + btr_pcur_t* pcur; + btr_cur_t* btr_cur; + ulint err; + + ut_ad(node); + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(node->is_delete); + + pcur = node->pcur; + btr_cur = btr_pcur_get_btr_cur(pcur); + + ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + + /* Store row because we have to build also the secondary index + entries */ + + row_upd_store_row(node); + + /* Mark the clustered index record deleted; we do not have to check + locks, because we assume that we have an x-lock on the record */ + + err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur, + TRUE, thr, mtr); + mtr_commit(mtr); + + return(err); +} + +/*************************************************************** +Updates the clustered index record. */ +static +ulint +row_upd_clust_step( +/*===============*/ + /* out: DB_SUCCESS if operation successfully + completed, DB_LOCK_WAIT in case of a lock wait, + else error code */ + upd_node_t* node, /* in: row update node */ + que_thr_t* thr) /* in: query thread */ +{ + dict_index_t* index; + btr_pcur_t* pcur; + ibool success; + ulint err; + mtr_t mtr_buf; + mtr_t* mtr; + + index = dict_table_get_first_index(node->table); + + pcur = node->pcur; + + /* We have to restore the cursor to its position */ + mtr = &mtr_buf; + + mtr_start(mtr); + + /* If the restoration does not succeed, then the same + transaction has deleted the record on which the cursor was, + and that is an SQL error. If the restoration succeeds, it may + still be that the same transaction has successively deleted + and inserted a record with the same ordering fields, but in + that case we know that the transaction has at least an + implicit x-lock on the record. */ + + ut_a(pcur->rel_pos == BTR_PCUR_ON); + + success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); + + if (!success) { + err = DB_RECORD_NOT_FOUND; + + mtr_commit(mtr); + + return(err); + } + + /* If this is a row in SYS_INDEXES table of the data dictionary, + then we have to free the file segments of the index tree associated + with the index */ + + if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) { + + dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr); + + mtr_commit(mtr); + + mtr_start(mtr); + + success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, + mtr); + if (!success) { + err = DB_ERROR; + + mtr_commit(mtr); + + return(err); + } + } + + if (!node->has_clust_rec_x_lock) { + err = lock_clust_rec_modify_check_and_lock(0, + btr_pcur_get_rec(pcur), + index, thr); + if (err != DB_SUCCESS) { + mtr_commit(mtr); + + return(err); + } + } + + /* NOTE: the following function calls will also commit mtr */ + + if (node->is_delete) { + err = row_upd_del_mark_clust_rec(node, index, thr, mtr); + + if (err != DB_SUCCESS) { + + return(err); + } + + node->state = UPD_NODE_UPDATE_ALL_SEC; + node->index = dict_table_get_next_index(index); + + return(err); + } + + /* If the update is made for MySQL, we already have the update vector + ready, else we have to do some evaluation: */ + + if (!node->in_mysql_interface) { + /* Copy the necessary columns from clust_rec and calculate the + new values to set */ + + row_upd_copy_columns(btr_pcur_get_rec(pcur), + UT_LIST_GET_FIRST(node->columns)); + row_upd_eval_new_vals(node->update); + } + + if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { + + err = row_upd_clust_rec(node, index, thr, mtr); + + return(err); + } + + row_upd_store_row(node); + + if (row_upd_changes_ord_field(node->row, index, node->update)) { + + /* Update causes an ordering field (ordering fields within + the B-tree) of the clustered index record to change: perform + the update by delete marking and inserting. + + TODO! What to do to the 'Halloween problem', where an update + moves the record forward in index so that it is again + updated when the cursor arrives there? Solution: the + read operation must check the undo record undo number when + choosing records to update. MySQL solves now the problem + externally! */ + + err = row_upd_clust_rec_by_insert(node, index, thr, mtr); + + if (err != DB_SUCCESS) { + + return(err); + } + + node->state = UPD_NODE_UPDATE_ALL_SEC; + } else { + err = row_upd_clust_rec(node, index, thr, mtr); + + if (err != DB_SUCCESS) { + + return(err); + } + + node->state = UPD_NODE_UPDATE_SOME_SEC; + } + + node->index = dict_table_get_next_index(index); + + return(err); +} + +/*************************************************************** +Updates the affected index records of a row. When the control is transferred +to this node, we assume that we have a persistent cursor which was on a +record, and the position of the cursor is stored in the cursor. */ +static +ulint +row_upd( +/*====*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code or DB_LOCK_WAIT */ + upd_node_t* node, /* in: row update node */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + + ut_ad(node && thr); + + if (node->in_mysql_interface) { + /* We do not get the cmpl_info value from the MySQL + interpreter: we must calculate it on the fly: */ + + if (row_upd_changes_some_index_ord_field(node->table, + node->update)) { + node->cmpl_info = 0; + } else { + node->cmpl_info = UPD_NODE_NO_ORD_CHANGE; + } + } + + if (node->state == UPD_NODE_UPDATE_CLUSTERED + || node->state == UPD_NODE_INSERT_CLUSTERED) { + + err = row_upd_clust_step(node, thr); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + } + + if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { + + goto function_exit; + } + + while (node->index != NULL) { + err = row_upd_sec_step(node, thr); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->index = dict_table_get_next_index(node->index); + } + +function_exit: + if (err == DB_SUCCESS) { + /* Do some cleanup */ + + if (node->row != NULL) { + mem_heap_empty(node->heap); + node->row = NULL; + } + + node->state = UPD_NODE_UPDATE_CLUSTERED; + } + + return(err); +} + +/*************************************************************** +Updates a row in a table. This is a high-level function used in SQL execution +graphs. */ + +que_thr_t* +row_upd_step( +/*=========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + upd_node_t* node; + sel_node_t* sel_node; + que_node_t* parent; + ulint err = DB_SUCCESS; + trx_t* trx; + + ut_ad(thr); + + trx = thr_get_trx(thr); + + node = thr->run_node; + + sel_node = node->select; + + parent = que_node_get_parent(node); + + ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE); + + if (thr->prev_node == parent) { + node->state = UPD_NODE_SET_IX_LOCK; + } + + if (node->state == UPD_NODE_SET_IX_LOCK) { + + if (!node->has_clust_rec_x_lock) { + /* It may be that the current session has not yet + started its transaction, or it has been committed: */ + + trx_start_if_not_started(thr_get_trx(thr)); + + err = lock_table(0, node->table, LOCK_IX, thr); + + if (err != DB_SUCCESS) { + + goto error_handling; + } + } + + node->state = UPD_NODE_UPDATE_CLUSTERED; + + if (node->searched_update) { + /* Reset the cursor */ + sel_node->state = SEL_NODE_OPEN; + + /* Fetch a row to update */ + + thr->run_node = sel_node; + + return(thr); + } + } + + /* sel_node is NULL if we are in the MySQL interface */ + + if (sel_node && (sel_node->state != SEL_NODE_FETCH)) { + + if (!node->searched_update) { + /* An explicit cursor should be positioned on a row + to update */ + + ut_error; + + err = DB_ERROR; + + goto error_handling; + } + + ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); + + /* No more rows to update, or the select node performed the + updates directly in-place */ + + thr->run_node = parent; + + return(thr); + } + + /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ + + err = row_upd(node, thr); + +error_handling: + trx->error_state = err; + + if (err == DB_SUCCESS) { + /* Ok: do nothing */ + } else if (err == DB_LOCK_WAIT) { + + return(NULL); + } else { + return(NULL); + } + + /* DO THE TRIGGER ACTIONS HERE */ + + if (node->searched_update) { + /* Fetch next row to update */ + + thr->run_node = sel_node; + } else { + /* It was an explicit cursor update */ + + thr->run_node = parent; + } + + node->state = UPD_NODE_UPDATE_CLUSTERED; + + return(thr); +} + +/************************************************************************* +Performs an in-place update for the current clustered index record in +select. */ + +void +row_upd_in_place_in_select( +/*=======================*/ + sel_node_t* sel_node, /* in: select node */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr */ +{ + upd_node_t* node; + btr_pcur_t* pcur; + btr_cur_t* btr_cur; + ulint err; + + ut_ad(sel_node->select_will_do_update); + ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF); + ut_ad(sel_node->asc); + + node = que_node_get_parent(sel_node); + + ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE); + + pcur = node->pcur; + btr_cur = btr_pcur_get_btr_cur(pcur); + + /* Copy the necessary columns from clust_rec and calculate the new + values to set */ + + row_upd_copy_columns(btr_pcur_get_rec(pcur), + UT_LIST_GET_FIRST(node->columns)); + row_upd_eval_new_vals(node->update); + + ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + + ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE); + ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); + ut_ad(node->select_will_do_update); + + err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur, + node->update, node->cmpl_info, + thr, mtr); + ut_ad(err == DB_SUCCESS); +} |