summaryrefslogtreecommitdiff
path: root/innobase/row/row0upd.c
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/row/row0upd.c')
-rw-r--r--innobase/row/row0upd.c1394
1 files changed, 1394 insertions, 0 deletions
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
new file mode 100644
index 00000000000..44843494247
--- /dev/null
+++ b/innobase/row/row0upd.c
@@ -0,0 +1,1394 @@
+/******************************************************
+Update of a row
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "row0upd.h"
+
+#ifdef UNIV_NONINL
+#include "row0upd.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "mach0data.h"
+#include "trx0undo.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "que0que.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0row.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "pars0sym.h"
+#include "eval0eval.h"
+
+
+/* What kind of latch and lock can we assume when the control comes to
+ -------------------------------------------------------------------
+an update node?
+--------------
+Efficiency of massive updates would require keeping an x-latch on a
+clustered index page through many updates, and not setting an explicit
+x-lock on clustered index records, as they anyway will get an implicit
+x-lock when they are updated. A problem is that the read nodes in the
+graph should know that they must keep the latch when passing the control
+up to the update node, and not set any record lock on the record which
+will be updated. Another problem occurs if the execution is stopped,
+as the kernel switches to another query thread, or the transaction must
+wait for a lock. Then we should be able to release the latch and, maybe,
+acquire an explicit x-lock on the record.
+ Because this seems too complicated, we conclude that the less
+efficient solution of releasing all the latches when the control is
+transferred to another node, and acquiring explicit x-locks, is better. */
+
+/* How is a delete performed? If there is a delete without an
+explicit cursor, i.e., a searched delete, there are at least
+two different situations:
+the implicit select cursor may run on (1) the clustered index or
+on (2) a secondary index. The delete is performed by setting
+the delete bit in the record and substituting the id of the
+deleting transaction for the original trx id, and substituting a
+new roll ptr for previous roll ptr. The old trx id and roll ptr
+are saved in the undo log record. Thus, no physical changes occur
+in the index tree structure at the time of the delete. Only
+when the undo log is purged, the index records will be physically
+deleted from the index trees.
+
+The query graph executing a searched delete would consist of
+a delete node which has as a subtree a select subgraph.
+The select subgraph should return a (persistent) cursor
+in the clustered index, placed on page which is x-latched.
+The delete node should look for all secondary index records for
+this clustered index entry and mark them as deleted. When is
+the x-latch freed? The most efficient way for performing a
+searched delete is obviously to keep the x-latch for several
+steps of query graph execution. */
+
+/*************************************************************************
+Creates an update node for a query graph. */
+
+upd_node_t*
+upd_node_create(
+/*============*/
+ /* out, own: update node */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ upd_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(upd_node_t));
+ node->common.type = QUE_NODE_UPDATE;
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+ node->select_will_do_update = FALSE;
+ node->in_mysql_interface = FALSE;
+
+ node->row = NULL;
+ node->index = NULL;
+
+ node->select = NULL;
+
+ node->heap = mem_heap_create(128);
+ node->magic_n = UPD_NODE_MAGIC_N;
+
+ node->cmpl_info = 0;
+
+ return(node);
+}
+
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record in database
+recovery. */
+
+void
+row_upd_rec_sys_fields_in_recovery(
+/*===============================*/
+ rec_t* rec, /* in: record */
+ ulint pos, /* in: TRX_ID position in rec */
+ dulint trx_id, /* in: transaction id */
+ dulint roll_ptr)/* in: roll ptr of the undo log record */
+{
+ byte* field;
+ ulint len;
+
+ field = rec_get_nth_field(rec, pos, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ trx_write_trx_id(field, trx_id);
+
+ field = rec_get_nth_field(rec, pos + 1, &len);
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+ trx_write_roll_ptr(field, roll_ptr);
+}
+
+/*************************************************************************
+Sets the trx id or roll ptr field of a clustered index entry. */
+
+void
+row_upd_index_entry_sys_field(
+/*==========================*/
+ dtuple_t* entry, /* in: index entry, where the memory buffers
+ for sys fields are already allocated:
+ the function just copies the new values to
+ them */
+ dict_index_t* index, /* in: clustered index */
+ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
+ dulint val) /* in: value to write */
+{
+ dfield_t* dfield;
+ byte* field;
+ ulint pos;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ pos = dict_index_get_sys_col_pos(index, type);
+
+ dfield = dtuple_get_nth_field(entry, pos);
+ field = dfield_get_data(dfield);
+
+ if (type == DATA_TRX_ID) {
+ trx_write_trx_id(field, val);
+ } else {
+ ut_ad(type == DATA_ROLL_PTR);
+ trx_write_roll_ptr(field, val);
+ }
+}
+
+/***************************************************************
+Returns TRUE if row update changes size of some field in index. */
+
+ibool
+row_upd_changes_field_size(
+/*=======================*/
+ /* out: TRUE if the update changes the size of
+ some field in index */
+ rec_t* rec, /* in: record in clustered index */
+ dict_index_t* index, /* in: clustered index */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint old_len;
+ ulint new_len;
+ ulint n_fields;
+ ulint i;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+
+ new_val = &(upd_field->new_val);
+ new_len = new_val->len;
+
+ if (new_len == UNIV_SQL_NULL) {
+ new_len = dtype_get_sql_null_size(
+ dict_index_get_nth_type(index, i));
+ }
+
+ old_len = rec_get_nth_field_size(rec, upd_field->field_no);
+
+ if (old_len != new_len) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the record
+given. No field size changes are allowed. This function is used only for
+a clustered index */
+
+void
+row_upd_rec_in_place(
+/*=================*/
+ rec_t* rec, /* in/out: record where replaced */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint n_fields;
+ ulint i;
+
+ rec_set_info_bits(rec, update->info_bits);
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+ new_val = &(upd_field->new_val);
+
+ rec_set_nth_field(rec, upd_field->field_no,
+ dfield_get_data(new_val),
+ dfield_get_len(new_val));
+ }
+}
+
+/*************************************************************************
+Writes into the redo log the values of trx id and roll ptr and enough info
+to determine their positions within a clustered index record. */
+
+byte*
+row_upd_write_sys_vals_to_log(
+/*==========================*/
+ /* out: new pointer to mlog */
+ dict_index_t* index, /* in: clustered index */
+ trx_t* trx, /* in: transaction */
+ dulint roll_ptr,/* in: roll ptr of the undo log record */
+ byte* log_ptr,/* pointer to a buffer of size > 20 opened
+ in mlog */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mtr);
+
+ log_ptr += mach_write_compressed(log_ptr,
+ dict_index_get_sys_col_pos(index, DATA_TRX_ID));
+
+ trx_write_roll_ptr(log_ptr, roll_ptr);
+ log_ptr += DATA_ROLL_PTR_LEN;
+
+ log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
+
+ return(log_ptr);
+}
+
+/*************************************************************************
+Parses the log data of system field values. */
+
+byte*
+row_upd_parse_sys_vals(
+/*===================*/
+ /* out: log data end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ ulint* pos, /* out: TRX_ID position in record */
+ dulint* trx_id, /* out: trx id */
+ dulint* roll_ptr)/* out: roll ptr */
+{
+ ptr = mach_parse_compressed(ptr, end_ptr, pos);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (end_ptr < ptr + DATA_ROLL_PTR_LEN) {
+
+ return(NULL);
+ }
+
+ *roll_ptr = trx_read_roll_ptr(ptr);
+ ptr += DATA_ROLL_PTR_LEN;
+
+ ptr = mach_dulint_parse_compressed(ptr, end_ptr, trx_id);
+
+ return(ptr);
+}
+
+/***************************************************************
+Writes to the redo log the new values of the fields occurring in the index. */
+
+void
+row_upd_index_write_log(
+/*====================*/
+ upd_t* update, /* in: update vector */
+ byte* log_ptr,/* in: pointer to mlog buffer: must contain at least
+ MLOG_BUF_MARGIN bytes of free space; the buffer is
+ closed within this function */
+ mtr_t* mtr) /* in: mtr into whose log to write */
+{
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint len;
+ ulint n_fields;
+ byte* buf_end;
+ ulint i;
+
+ n_fields = upd_get_n_fields(update);
+
+ buf_end = log_ptr + MLOG_BUF_MARGIN;
+
+ mach_write_to_1(log_ptr, update->info_bits);
+ log_ptr++;
+ log_ptr += mach_write_compressed(log_ptr, n_fields);
+
+ for (i = 0; i < n_fields; i++) {
+
+ ut_ad(MLOG_BUF_MARGIN > 30);
+
+ if (log_ptr + 30 > buf_end) {
+ mlog_close(mtr, log_ptr);
+
+ log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
+ buf_end = log_ptr + MLOG_BUF_MARGIN;
+ }
+
+ upd_field = upd_get_nth_field(update, i);
+
+ new_val = &(upd_field->new_val);
+
+ len = new_val->len;
+
+ log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
+ log_ptr += mach_write_compressed(log_ptr, len);
+
+ if (len != UNIV_SQL_NULL) {
+ if (log_ptr + len < buf_end) {
+ ut_memcpy(log_ptr, new_val->data, len);
+
+ log_ptr += len;
+ } else {
+ mlog_close(mtr, log_ptr);
+
+ mlog_catenate_string(mtr, new_val->data, len);
+
+ log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
+ buf_end = log_ptr + MLOG_BUF_MARGIN;
+ }
+ }
+ }
+
+ mlog_close(mtr, log_ptr);
+}
+
+/*************************************************************************
+Parses the log data written by row_upd_index_write_log. */
+
+byte*
+row_upd_index_parse(
+/*================*/
+ /* out: log data end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ mem_heap_t* heap, /* in: memory heap where update vector is
+ built */
+ upd_t** update_out)/* out: update vector */
+{
+ upd_t* update;
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint len;
+ ulint n_fields;
+ byte* buf;
+ ulint info_bits;
+ ulint i;
+
+ if (end_ptr < ptr + 1) {
+
+ return(NULL);
+ }
+
+ info_bits = mach_read_from_1(ptr);
+ ptr++;
+ ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ update = upd_create(n_fields, heap);
+ update->info_bits = info_bits;
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+ new_val = &(upd_field->new_val);
+
+ ptr = mach_parse_compressed(ptr, end_ptr,
+ &(upd_field->field_no));
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &len);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ new_val->len = len;
+
+ if (len != UNIV_SQL_NULL) {
+
+ if (end_ptr < ptr + len) {
+
+ return(NULL);
+ } else {
+ buf = mem_heap_alloc(heap, len);
+ ut_memcpy(buf, ptr, len);
+
+ ptr += len;
+
+ new_val->data = buf;
+ }
+ }
+ }
+
+ *update_out = update;
+
+ return(ptr);
+}
+
+/*******************************************************************
+Builds an update vector from those fields, excluding the roll ptr and
+trx id fields, which in an index entry differ from a record that has
+the equal ordering fields. */
+
+upd_t*
+row_upd_build_difference(
+/*=====================*/
+ /* out, own: update vector of differing
+ fields, excluding roll ptr and trx id */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t* rec, /* in: clustered index record */
+ mem_heap_t* heap) /* in: memory heap from which allocated */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ byte* data;
+ ulint len;
+ upd_t* update;
+ ulint n_diff;
+ ulint roll_ptr_pos;
+ ulint trx_id_pos;
+ ulint i;
+
+ /* This function is used only for a clustered index */
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ update = upd_create(dtuple_get_n_fields(entry), heap);
+
+ n_diff = 0;
+
+ roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR);
+ trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+ for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+
+ data = rec_get_nth_field(rec, i, &len);
+ dfield = dtuple_get_nth_field(entry, i);
+
+ if ((i != trx_id_pos) && (i != roll_ptr_pos)
+ && !dfield_data_is_equal(dfield, len, data)) {
+
+ upd_field = upd_get_nth_field(update, n_diff);
+
+ dfield_copy(&(upd_field->new_val), dfield);
+
+ upd_field_set_field_no(upd_field, i, index);
+
+ n_diff++;
+ }
+ }
+
+ update->n_fields = n_diff;
+
+ return(update);
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
+void
+row_upd_index_replace_new_col_vals(
+/*===============================*/
+ dtuple_t* entry, /* in/out: index entry where replaced */
+ dict_index_t* index, /* in: index; NOTE that may also be a
+ non-clustered index */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ dfield_t* new_val;
+ ulint field_no;
+ dict_index_t* clust_index;
+ ulint i;
+
+ ut_ad(index);
+
+ clust_index = dict_table_get_first_index(index->table);
+
+ dtuple_set_info_bits(entry, update->info_bits);
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+
+ upd_field = upd_get_nth_field(update, i);
+
+ field_no = dict_index_get_nth_col_pos(index,
+ dict_index_get_nth_col_no(clust_index,
+ upd_field->field_no));
+ if (field_no != ULINT_UNDEFINED) {
+ dfield = dtuple_get_nth_field(entry, field_no);
+
+ new_val = &(upd_field->new_val);
+
+ dfield_set_data(dfield, new_val->data, new_val->len);
+ }
+ }
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the
+clustered index entry given. */
+
+void
+row_upd_clust_index_replace_new_col_vals(
+/*=====================================*/
+ dtuple_t* entry, /* in/out: index entry where replaced */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ dfield_t* new_val;
+ ulint field_no;
+ ulint i;
+
+ dtuple_set_info_bits(entry, update->info_bits);
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+
+ upd_field = upd_get_nth_field(update, i);
+
+ field_no = upd_field->field_no;
+
+ dfield = dtuple_get_nth_field(entry, field_no);
+
+ new_val = &(upd_field->new_val);
+
+ dfield_set_data(dfield, new_val->data, new_val->len);
+ }
+}
+
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_ord_field(
+/*======================*/
+ /* out: TRUE if update vector changes
+ an ordering field in the index record */
+ dtuple_t* row, /* in: old value of row, or NULL if the
+ row and the data values in update are not
+ known when this function is called, e.g., at
+ compile time */
+ dict_index_t* index, /* in: index of the record */
+ upd_t* update) /* in: update vector for the row */
+{
+ upd_field_t* upd_field;
+ dict_field_t* ind_field;
+ dict_col_t* col;
+ ulint n_unique;
+ ulint n_upd_fields;
+ ulint col_pos;
+ ulint col_no;
+ ulint i, j;
+
+ ut_ad(update && index);
+
+ n_unique = dict_index_get_n_unique(index);
+ n_upd_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_unique; i++) {
+
+ ind_field = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(ind_field);
+ col_pos = dict_col_get_clust_pos(col);
+ col_no = dict_col_get_no(col);
+
+ for (j = 0; j < n_upd_fields; j++) {
+
+ upd_field = upd_get_nth_field(update, j);
+
+ if (col_pos == upd_field->field_no
+ && (row == NULL
+ || !dfield_datas_are_equal(
+ dtuple_get_nth_field(row, col_no),
+ &(upd_field->new_val)))) {
+ return(TRUE);
+ }
+ }
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_some_index_ord_field(
+/*=================================*/
+ /* out: TRUE if update vector may change
+ an ordering field in an index record */
+ dict_table_t* table, /* in: table */
+ upd_t* update) /* in: update vector for the row */
+{
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+
+ while (index) {
+ if (row_upd_changes_ord_field(NULL, index, update)) {
+
+ return(TRUE);
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Copies the column values from a record. */
+UNIV_INLINE
+void
+row_upd_copy_columns(
+/*=================*/
+ rec_t* rec, /* in: record in a clustered index */
+ sym_node_t* column) /* in: first column in a column list, or
+ NULL */
+{
+ byte* data;
+ ulint len;
+
+ while (column) {
+ data = rec_get_nth_field(rec,
+ column->field_nos[SYM_CLUST_FIELD_NO],
+ &len);
+ eval_node_copy_and_alloc_val(column, data, len);
+
+ column = UT_LIST_GET_NEXT(col_var_list, column);
+ }
+}
+
+/*************************************************************************
+Calculates the new values for fields to update. Note that row_upd_copy_columns
+must have been called first. */
+UNIV_INLINE
+void
+row_upd_eval_new_vals(
+/*==================*/
+ upd_t* update) /* in: update vector */
+{
+ que_node_t* exp;
+ upd_field_t* upd_field;
+ ulint n_fields;
+ ulint i;
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+
+ exp = upd_field->exp;
+
+ eval_exp(exp);
+
+ dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp));
+ }
+}
+
+/***************************************************************
+Stores to the heap the row on which the node->pcur is positioned. */
+UNIV_INLINE
+void
+row_upd_store_row(
+/*==============*/
+ upd_node_t* node) /* in: row update node */
+{
+ dict_index_t* clust_index;
+
+ ut_ad((node->pcur)->latch_mode != BTR_NO_LATCHES);
+
+ if (node->row != NULL) {
+ mem_heap_empty(node->heap);
+ node->row = NULL;
+ }
+
+ clust_index = dict_table_get_first_index(node->table);
+
+ node->row = row_build(ROW_COPY_DATA, clust_index,
+ btr_pcur_get_rec(node->pcur), node->heap);
+}
+
+/***************************************************************
+Updates a secondary index entry of a row. */
+static
+ulint
+row_upd_sec_index_entry(
+/*====================*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ibool found;
+ dict_index_t* index;
+ dtuple_t* entry;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ btr_cur_t* btr_cur;
+ mem_heap_t* heap;
+ rec_t* rec;
+ ulint err;
+
+ index = node->index;
+
+ heap = mem_heap_create(1024);
+
+ /* Build old index entry */
+ entry = row_build_index_entry(node->row, index, heap);
+
+ log_free_check();
+ mtr_start(&mtr);
+
+ found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
+ &mtr);
+ ut_ad(found);
+
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ rec = btr_cur_get_rec(btr_cur);
+
+ /* Delete mark the old index record; it can already be delete marked if
+ we return after a lock wait in row_ins_index_entry below */
+
+ if (!rec_get_deleted_flag(rec)) {
+ err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr,
+ &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ if (node->is_delete || (err != DB_SUCCESS)) {
+
+ mem_heap_free(heap);
+
+ return(err);
+ }
+
+ /* Build a new index entry */
+ row_upd_index_replace_new_col_vals(entry, index, node->update);
+
+ /* Insert new index entry */
+ err = row_ins_index_entry(index, entry, thr);
+
+ mem_heap_free(heap);
+
+ return(err);
+}
+
+/***************************************************************
+Updates secondary index record if it is changed in the row update. This
+should be quite rare in database applications. */
+UNIV_INLINE
+ulint
+row_upd_sec_step(
+/*=============*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
+ || (node->state == UPD_NODE_UPDATE_SOME_SEC));
+ ut_ad(!(node->index->type & DICT_CLUSTERED));
+
+ if ((node->state == UPD_NODE_UPDATE_ALL_SEC)
+ || row_upd_changes_ord_field(node->row, node->index,
+ node->update)) {
+ err = row_upd_sec_index_entry(node, thr);
+
+ return(err);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Marks the clustered index record deleted and inserts the updated version
+of the record to the index. This function should be used when the ordering
+fields of the clustered index record change. This should be quite rare in
+database applications. */
+static
+ulint
+row_upd_clust_rec_by_insert(
+/*========================*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ dict_index_t* index, /* in: clustered index of the record */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr; gets committed here */
+{
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ trx_t* trx;
+ dict_table_t* table;
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ ulint err;
+
+ ut_ad(node);
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ trx = thr_get_trx(thr);
+ table = node->table;
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ if (node->state != UPD_NODE_INSERT_CLUSTERED) {
+
+ err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
+ btr_cur, TRUE, thr, mtr);
+ if (err != DB_SUCCESS) {
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
+ mtr_commit(mtr);
+
+ node->state = UPD_NODE_INSERT_CLUSTERED;
+
+ heap = mem_heap_create(1024);
+
+ entry = row_build_index_entry(node->row, index, heap);
+
+ row_upd_clust_index_replace_new_col_vals(entry, node->update);
+
+ row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
+
+ err = row_ins_index_entry(index, entry, thr);
+
+ mem_heap_free(heap);
+
+ return(err);
+}
+
+/***************************************************************
+Updates a clustered index record of a row when the ordering fields do
+not change. */
+static
+ulint
+row_upd_clust_rec(
+/*==============*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ dict_index_t* index, /* in: clustered index */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr; gets committed here */
+{
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+
+ ut_ad(node);
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ /* Try optimistic updating of the record, keeping changes within
+ the page; we do not check locks because we assume the x-lock on the
+ record to update */
+
+ if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
+ err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
+ btr_cur, node->update,
+ node->cmpl_info, thr, mtr);
+ } else {
+ err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
+ btr_cur, node->update,
+ node->cmpl_info, thr, mtr);
+ }
+
+ mtr_commit(mtr);
+
+ if (err == DB_SUCCESS) {
+
+ return(err);
+ }
+
+ /* We may have to modify the tree structure: do a pessimistic descent
+ down the index tree */
+
+ mtr_start(mtr);
+
+ /* NOTE: this transaction has an s-lock or x-lock on the record and
+ therefore other transactions cannot modify the record when we have no
+ latch on the page. In addition, we assume that other query threads of
+ the same transaction do not modify the record in the meantime.
+ Therefore we can assert that the restoration of the cursor succeeds. */
+
+ ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
+ node->update, node->cmpl_info, thr, mtr);
+ mtr_commit(mtr);
+
+ return(err);
+}
+
+/***************************************************************
+Delete marks a clustered index record. */
+static
+ulint
+row_upd_del_mark_clust_rec(
+/*=======================*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ dict_index_t* index, /* in: clustered index */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr; gets committed here */
+{
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+
+ ut_ad(node);
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(node->is_delete);
+
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ /* Store row because we have to build also the secondary index
+ entries */
+
+ row_upd_store_row(node);
+
+ /* Mark the clustered index record deleted; we do not have to check
+ locks, because we assume that we have an x-lock on the record */
+
+ err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur,
+ TRUE, thr, mtr);
+ mtr_commit(mtr);
+
+ return(err);
+}
+
+/***************************************************************
+Updates the clustered index record. */
+static
+ulint
+row_upd_clust_step(
+/*===============*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, DB_LOCK_WAIT in case of a lock wait,
+ else error code */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_index_t* index;
+ btr_pcur_t* pcur;
+ ibool success;
+ ulint err;
+ mtr_t mtr_buf;
+ mtr_t* mtr;
+
+ index = dict_table_get_first_index(node->table);
+
+ pcur = node->pcur;
+
+ /* We have to restore the cursor to its position */
+ mtr = &mtr_buf;
+
+ mtr_start(mtr);
+
+ /* If the restoration does not succeed, then the same
+ transaction has deleted the record on which the cursor was,
+ and that is an SQL error. If the restoration succeeds, it may
+ still be that the same transaction has successively deleted
+ and inserted a record with the same ordering fields, but in
+ that case we know that the transaction has at least an
+ implicit x-lock on the record. */
+
+ ut_a(pcur->rel_pos == BTR_PCUR_ON);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+
+ if (!success) {
+ err = DB_RECORD_NOT_FOUND;
+
+ mtr_commit(mtr);
+
+ return(err);
+ }
+
+ /* If this is a row in SYS_INDEXES table of the data dictionary,
+ then we have to free the file segments of the index tree associated
+ with the index */
+
+ if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
+
+ dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
+
+ mtr_commit(mtr);
+
+ mtr_start(mtr);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
+ mtr);
+ if (!success) {
+ err = DB_ERROR;
+
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
+ if (!node->has_clust_rec_x_lock) {
+ err = lock_clust_rec_modify_check_and_lock(0,
+ btr_pcur_get_rec(pcur),
+ index, thr);
+ if (err != DB_SUCCESS) {
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
+ /* NOTE: the following function calls will also commit mtr */
+
+ if (node->is_delete) {
+ err = row_upd_del_mark_clust_rec(node, index, thr, mtr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->state = UPD_NODE_UPDATE_ALL_SEC;
+ node->index = dict_table_get_next_index(index);
+
+ return(err);
+ }
+
+ /* If the update is made for MySQL, we already have the update vector
+ ready, else we have to do some evaluation: */
+
+ if (!node->in_mysql_interface) {
+ /* Copy the necessary columns from clust_rec and calculate the
+ new values to set */
+
+ row_upd_copy_columns(btr_pcur_get_rec(pcur),
+ UT_LIST_GET_FIRST(node->columns));
+ row_upd_eval_new_vals(node->update);
+ }
+
+ if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+
+ err = row_upd_clust_rec(node, index, thr, mtr);
+
+ return(err);
+ }
+
+ row_upd_store_row(node);
+
+ if (row_upd_changes_ord_field(node->row, index, node->update)) {
+
+ /* Update causes an ordering field (ordering fields within
+ the B-tree) of the clustered index record to change: perform
+ the update by delete marking and inserting.
+
+ TODO! What to do to the 'Halloween problem', where an update
+ moves the record forward in index so that it is again
+ updated when the cursor arrives there? Solution: the
+ read operation must check the undo record undo number when
+ choosing records to update. MySQL solves now the problem
+ externally! */
+
+ err = row_upd_clust_rec_by_insert(node, index, thr, mtr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->state = UPD_NODE_UPDATE_ALL_SEC;
+ } else {
+ err = row_upd_clust_rec(node, index, thr, mtr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->state = UPD_NODE_UPDATE_SOME_SEC;
+ }
+
+ node->index = dict_table_get_next_index(index);
+
+ return(err);
+}
+
+/***************************************************************
+Updates the affected index records of a row. When the control is transferred
+to this node, we assume that we have a persistent cursor which was on a
+record, and the position of the cursor is stored in the cursor. */
+static
+ulint
+row_upd(
+/*====*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(node && thr);
+
+ if (node->in_mysql_interface) {
+ /* We do not get the cmpl_info value from the MySQL
+ interpreter: we must calculate it on the fly: */
+
+ if (row_upd_changes_some_index_ord_field(node->table,
+ node->update)) {
+ node->cmpl_info = 0;
+ } else {
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
+ }
+ }
+
+ if (node->state == UPD_NODE_UPDATE_CLUSTERED
+ || node->state == UPD_NODE_INSERT_CLUSTERED) {
+
+ err = row_upd_clust_step(node, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+ }
+
+ if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+
+ goto function_exit;
+ }
+
+ while (node->index != NULL) {
+ err = row_upd_sec_step(node, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+function_exit:
+ if (err == DB_SUCCESS) {
+ /* Do some cleanup */
+
+ if (node->row != NULL) {
+ mem_heap_empty(node->heap);
+ node->row = NULL;
+ }
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+ }
+
+ return(err);
+}
+
+/***************************************************************
+Updates a row in a table. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+row_upd_step(
+/*=========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ upd_node_t* node;
+ sel_node_t* sel_node;
+ que_node_t* parent;
+ ulint err = DB_SUCCESS;
+ trx_t* trx;
+
+ ut_ad(thr);
+
+ trx = thr_get_trx(thr);
+
+ node = thr->run_node;
+
+ sel_node = node->select;
+
+ parent = que_node_get_parent(node);
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
+
+ if (thr->prev_node == parent) {
+ node->state = UPD_NODE_SET_IX_LOCK;
+ }
+
+ if (node->state == UPD_NODE_SET_IX_LOCK) {
+
+ if (!node->has_clust_rec_x_lock) {
+ /* It may be that the current session has not yet
+ started its transaction, or it has been committed: */
+
+ trx_start_if_not_started(thr_get_trx(thr));
+
+ err = lock_table(0, node->table, LOCK_IX, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto error_handling;
+ }
+ }
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ if (node->searched_update) {
+ /* Reset the cursor */
+ sel_node->state = SEL_NODE_OPEN;
+
+ /* Fetch a row to update */
+
+ thr->run_node = sel_node;
+
+ return(thr);
+ }
+ }
+
+ /* sel_node is NULL if we are in the MySQL interface */
+
+ if (sel_node && (sel_node->state != SEL_NODE_FETCH)) {
+
+ if (!node->searched_update) {
+ /* An explicit cursor should be positioned on a row
+ to update */
+
+ ut_error;
+
+ err = DB_ERROR;
+
+ goto error_handling;
+ }
+
+ ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
+
+ /* No more rows to update, or the select node performed the
+ updates directly in-place */
+
+ thr->run_node = parent;
+
+ return(thr);
+ }
+
+ /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+
+ err = row_upd(node, thr);
+
+error_handling:
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ return(NULL);
+ }
+
+ /* DO THE TRIGGER ACTIONS HERE */
+
+ if (node->searched_update) {
+ /* Fetch next row to update */
+
+ thr->run_node = sel_node;
+ } else {
+ /* It was an explicit cursor update */
+
+ thr->run_node = parent;
+ }
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ return(thr);
+}
+
+/*************************************************************************
+Performs an in-place update for the current clustered index record in
+select. */
+
+void
+row_upd_in_place_in_select(
+/*=======================*/
+ sel_node_t* sel_node, /* in: select node */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ upd_node_t* node;
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+
+ ut_ad(sel_node->select_will_do_update);
+ ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF);
+ ut_ad(sel_node->asc);
+
+ node = que_node_get_parent(sel_node);
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
+
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ /* Copy the necessary columns from clust_rec and calculate the new
+ values to set */
+
+ row_upd_copy_columns(btr_pcur_get_rec(pcur),
+ UT_LIST_GET_FIRST(node->columns));
+ row_upd_eval_new_vals(node->update);
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE);
+ ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
+ ut_ad(node->select_will_do_update);
+
+ err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
+ node->update, node->cmpl_info,
+ thr, mtr);
+ ut_ad(err == DB_SUCCESS);
+}