1 files changed, 1394 insertions, 0 deletions
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
new file mode 100644
index 00000000000..44843494247
--- /dev/null
+++ b/innobase/row/row0upd.c
@@ -0,0 +1,1394 @@
+/******************************************************
+Update of a row
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "row0upd.h"
+
+#ifdef UNIV_NONINL
+#include "row0upd.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "mach0data.h"
+#include "trx0undo.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "que0que.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0row.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "pars0sym.h"
+#include "eval0eval.h"
+
+
+/* What kind of latch and lock can we assume when the control comes to
+   -------------------------------------------------------------------
+an update node?
+--------------
+Efficiency of massive updates would require keeping an x-latch on a
+clustered index page through many updates, and not setting an explicit
+x-lock on clustered index records, as they anyway will get an implicit
+x-lock when they are updated. A problem is that the read nodes in the
+graph should know that they must keep the latch when passing the control
+up to the update node, and not set any record lock on the record which
+will be updated. Another problem occurs if the execution is stopped,
+as the kernel switches to another query thread, or the transaction must
+wait for a lock. Then we should be able to release the latch and, maybe,
+acquire an explicit x-lock on the record.
+	Because this seems too complicated, we conclude that the less
+efficient solution of releasing all the latches when the control is
+transferred to another node, and acquiring explicit x-locks, is better. */
+
+/* How is a delete performed? If there is a delete without an
+explicit cursor, i.e., a searched delete, there are at least
+two different situations:
+the implicit select cursor may run on (1) the clustered index or
+on (2) a secondary index. The delete is performed by setting
+the delete bit in the record and substituting the id of the
+deleting transaction for the original trx id, and substituting a
+new roll ptr for previous roll ptr. The old trx id and roll ptr
+are saved in the undo log record. Thus, no physical changes occur
+in the index tree structure at the time of the delete. Only
+when the undo log is purged, the index records will be physically
+deleted from the index trees.
+
+The query graph executing a searched delete would consist of
+a delete node which has as a subtree a select subgraph.
+The select subgraph should return a (persistent) cursor
+in the clustered index, placed on page which is x-latched.
+The delete node should look for all secondary index records for
+this clustered index entry and mark them as deleted. When is
+the x-latch freed? The most efficient way for performing a
+searched delete is obviously to keep the x-latch for several
+steps of query graph execution. */
+
+/*************************************************************************
+Creates an update node for a query graph. */
+
+upd_node_t*
+upd_node_create(
+/*============*/
+				/* out, own: update node */
+	mem_heap_t*	heap)	/* in: mem heap where created */
+{
+	upd_node_t*	node;
+
+	node = mem_heap_alloc(heap, sizeof(upd_node_t));
+	node->common.type = QUE_NODE_UPDATE;
+
+	node->state = UPD_NODE_UPDATE_CLUSTERED;
+	node->select_will_do_update = FALSE;
+	node->in_mysql_interface = FALSE;
+
+	node->row = NULL;
+	node->index = NULL;
+
+	node->select = NULL;
+	
+	node->heap = mem_heap_create(128);
+	node->magic_n = UPD_NODE_MAGIC_N;	
+
+	node->cmpl_info = 0;
+	
+	return(node);
+}
+
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record in database
+recovery. */
+
+void
+row_upd_rec_sys_fields_in_recovery(
+/*===============================*/
+	rec_t*	rec,	/* in: record */
+	ulint	pos,	/* in: TRX_ID position in rec */
+	dulint	trx_id,	/* in: transaction id */
+	dulint	roll_ptr)/* in: roll ptr of the undo log record */
+{
+	byte*	field;
+	ulint	len;
+
+	field = rec_get_nth_field(rec, pos, &len);
+	ut_ad(len == DATA_TRX_ID_LEN);
+	trx_write_trx_id(field, trx_id);
+
+	field = rec_get_nth_field(rec, pos + 1, &len);
+	ut_ad(len == DATA_ROLL_PTR_LEN);
+	trx_write_roll_ptr(field, roll_ptr);
+}
+
+/*************************************************************************
+Sets the trx id or roll ptr field of a clustered index entry. */
+
+void
+row_upd_index_entry_sys_field(
+/*==========================*/
+	dtuple_t*	entry,	/* in: index entry, where the memory buffers
+				for sys fields are already allocated:
+				the function just copies the new values to
+				them */
+	dict_index_t*	index,	/* in: clustered index */
+	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
+	dulint		val)	/* in: value to write */
+{
+	dfield_t*	dfield;
+	byte*		field;
+	ulint		pos;
+
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	pos = dict_index_get_sys_col_pos(index, type);
+
+	dfield = dtuple_get_nth_field(entry, pos);
+	field = dfield_get_data(dfield);
+
+	if (type == DATA_TRX_ID) {
+		trx_write_trx_id(field, val);
+	} else {
+		ut_ad(type == DATA_ROLL_PTR);
+		trx_write_roll_ptr(field, val);
+	}
+}
+
+/***************************************************************
+Returns TRUE if row update changes size of some field in index. */
+
+ibool
+row_upd_changes_field_size(
+/*=======================*/
+				/* out: TRUE if the update changes the size of
+				some field in index */		
+	rec_t*		rec,	/* in: record in clustered index */
+	dict_index_t*	index,	/* in: clustered index */
+	upd_t*		update)	/* in: update vector */
+{
+	upd_field_t*	upd_field;
+	dfield_t*	new_val;
+	ulint		old_len;
+	ulint		new_len;
+	ulint		n_fields;
+	ulint		i;
+
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	n_fields = upd_get_n_fields(update);
+
+	for (i = 0; i < n_fields; i++) {
+		upd_field = upd_get_nth_field(update, i);
+
+		new_val = &(upd_field->new_val);
+		new_len = new_val->len;
+
+		if (new_len == UNIV_SQL_NULL) {
+			new_len = dtype_get_sql_null_size(
+					dict_index_get_nth_type(index, i));
+		}
+
+		old_len = rec_get_nth_field_size(rec, upd_field->field_no);
+		
+		if (old_len != new_len) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the record
+given. No field size changes are allowed. This function is used only for
+a clustered index */
+
+void
+row_upd_rec_in_place(
+/*=================*/
+	rec_t*	rec,	/* in/out: record where replaced */
+	upd_t*	update)	/* in: update vector */
+{
+	upd_field_t*	upd_field;
+	dfield_t*	new_val;
+	ulint		n_fields;
+	ulint		i;
+
+	rec_set_info_bits(rec, update->info_bits);
+
+	n_fields = upd_get_n_fields(update);
+
+	for (i = 0; i < n_fields; i++) {
+		upd_field = upd_get_nth_field(update, i);
+		new_val = &(upd_field->new_val);
+		
+		rec_set_nth_field(rec, upd_field->field_no,
+						dfield_get_data(new_val),
+						dfield_get_len(new_val));
+	}
+}
+
+/*************************************************************************
+Writes into the redo log the values of trx id and roll ptr and enough info
+to determine their positions within a clustered index record. */
+
+byte*
+row_upd_write_sys_vals_to_log(
+/*==========================*/
+				/* out: new pointer to mlog */
+	dict_index_t*	index,	/* in: clustered index */
+	trx_t*		trx,	/* in: transaction */
+	dulint		roll_ptr,/* in: roll ptr of the undo log record */
+	byte*		log_ptr,/* pointer to a buffer of size > 20 opened
+				in mlog */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(mtr);
+
+	log_ptr += mach_write_compressed(log_ptr,
+			dict_index_get_sys_col_pos(index, DATA_TRX_ID));
+
+	trx_write_roll_ptr(log_ptr, roll_ptr);
+	log_ptr += DATA_ROLL_PTR_LEN;	
+
+	log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
+
+	return(log_ptr);
+}
+
+/*************************************************************************
+Parses the log data of system field values. */
+
+byte*
+row_upd_parse_sys_vals(
+/*===================*/
+			/* out: log data end or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	ulint*	pos,	/* out: TRX_ID position in record */
+	dulint*	trx_id,	/* out: trx id */
+	dulint*	roll_ptr)/* out: roll ptr */
+{
+	ptr = mach_parse_compressed(ptr, end_ptr, pos);
+
+	if (ptr == NULL) {
+
+		return(NULL);
+	}
+
+	if (end_ptr < ptr + DATA_ROLL_PTR_LEN) {
+
+		return(NULL);
+	}
+	
+	*roll_ptr = trx_read_roll_ptr(ptr);
+	ptr += DATA_ROLL_PTR_LEN;	
+
+	ptr = mach_dulint_parse_compressed(ptr, end_ptr, trx_id);
+
+	return(ptr);
+}
+
+/***************************************************************
+Writes to the redo log the new values of the fields occurring in the index. */
+
+void
+row_upd_index_write_log(
+/*====================*/
+	upd_t*	update,	/* in: update vector */
+	byte*	log_ptr,/* in: pointer to mlog buffer: must contain at least
+			MLOG_BUF_MARGIN bytes of free space; the buffer is
+			closed within this function */
+	mtr_t*	mtr)	/* in: mtr into whose log to write */
+{
+	upd_field_t*	upd_field;
+	dfield_t*	new_val;
+	ulint		len;
+	ulint		n_fields;
+	byte*		buf_end;
+	ulint		i;
+
+	n_fields = upd_get_n_fields(update);
+
+	buf_end = log_ptr + MLOG_BUF_MARGIN;
+	
+	mach_write_to_1(log_ptr, update->info_bits);
+	log_ptr++;
+	log_ptr += mach_write_compressed(log_ptr, n_fields);
+	
+	for (i = 0; i < n_fields; i++) {
+
+		ut_ad(MLOG_BUF_MARGIN > 30);
+
+		if (log_ptr + 30 > buf_end) {
+			mlog_close(mtr, log_ptr);
+			
+			log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
+			buf_end = log_ptr + MLOG_BUF_MARGIN;
+		}
+
+		upd_field = upd_get_nth_field(update, i);
+
+		new_val = &(upd_field->new_val);
+
+		len = new_val->len;
+
+		log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
+		log_ptr += mach_write_compressed(log_ptr, len);
+
+		if (len != UNIV_SQL_NULL) {
+			if (log_ptr + len < buf_end) {
+				ut_memcpy(log_ptr, new_val->data, len);
+
+				log_ptr += len;
+			} else {
+				mlog_close(mtr, log_ptr);
+			
+				mlog_catenate_string(mtr, new_val->data, len);
+
+				log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
+				buf_end = log_ptr + MLOG_BUF_MARGIN;
+			}
+		}
+	}
+
+	mlog_close(mtr, log_ptr);
+}
+
+/*************************************************************************
+Parses the log data written by row_upd_index_write_log. */
+
+byte*
+row_upd_index_parse(
+/*================*/
+				/* out: log data end or NULL */
+	byte*		ptr,	/* in: buffer */
+	byte*		end_ptr,/* in: buffer end */
+	mem_heap_t*	heap,	/* in: memory heap where update vector is
+				built */
+	upd_t**		update_out)/* out: update vector */
+{
+	upd_t*		update;
+	upd_field_t*	upd_field;
+	dfield_t*	new_val;
+	ulint		len;
+	ulint		n_fields;
+	byte*		buf;
+	ulint		info_bits;
+	ulint		i;
+
+	if (end_ptr < ptr + 1) {
+
+		return(NULL);
+	}
+
+	info_bits = mach_read_from_1(ptr);
+	ptr++;
+	ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
+
+	if (ptr == NULL) {
+
+		return(NULL);
+	}
+
+	update = upd_create(n_fields, heap);
+	update->info_bits = info_bits;
+
+	for (i = 0; i < n_fields; i++) {
+		upd_field = upd_get_nth_field(update, i);
+		new_val = &(upd_field->new_val);
+
+		ptr = mach_parse_compressed(ptr, end_ptr,
+						&(upd_field->field_no));
+		if (ptr == NULL) {
+
+			return(NULL);
+		}
+
+		ptr = mach_parse_compressed(ptr, end_ptr, &len);
+
+		if (ptr == NULL) {
+
+			return(NULL);
+		}
+
+		new_val->len = len;
+
+		if (len != UNIV_SQL_NULL) {
+
+			if (end_ptr < ptr + len) {
+
+				return(NULL);
+			} else {
+				buf = mem_heap_alloc(heap, len);
+				ut_memcpy(buf, ptr, len);
+
+				ptr += len;
+
+				new_val->data = buf;
+			}
+		}
+	}
+
+	*update_out = update;
+
+	return(ptr);
+}
+	
+/*******************************************************************
+Builds an update vector from those fields, excluding the roll ptr and
+trx id fields, which in an index entry differ from a record that has
+the equal ordering fields. */
+
+upd_t*
+row_upd_build_difference(
+/*=====================*/
+				/* out, own: update vector of differing
+				fields, excluding roll ptr and trx id */
+	dict_index_t*	index,	/* in: clustered index */
+	dtuple_t*	entry,	/* in: entry to insert */
+	rec_t*		rec,	/* in: clustered index record */
+	mem_heap_t*	heap)	/* in: memory heap from which allocated */
+{
+	upd_field_t*	upd_field;
+	dfield_t*	dfield;
+	byte*		data;
+	ulint		len;
+	upd_t*		update;
+	ulint		n_diff;
+	ulint		roll_ptr_pos;
+	ulint		trx_id_pos;
+	ulint		i;
+
+	/* This function is used only for a clustered index */
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	update = upd_create(dtuple_get_n_fields(entry), heap);
+
+	n_diff = 0;
+
+	roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR);
+	trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+
+		data = rec_get_nth_field(rec, i, &len);
+		dfield = dtuple_get_nth_field(entry, i);
+
+		if ((i != trx_id_pos) && (i != roll_ptr_pos)
+		   		&& !dfield_data_is_equal(dfield, len, data)) {
+
+			upd_field = upd_get_nth_field(update, n_diff);
+
+			dfield_copy(&(upd_field->new_val), dfield);
+
+			upd_field_set_field_no(upd_field, i, index);
+				
+			n_diff++;
+		}
+	}
+
+	update->n_fields = n_diff;
+
+	return(update);
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
+void
+row_upd_index_replace_new_col_vals(
+/*===============================*/
+	dtuple_t*	entry,	/* in/out: index entry where replaced */
+	dict_index_t*	index,	/* in: index; NOTE that may also be a
+				non-clustered index */
+	upd_t*		update)	/* in: update vector */
+{
+	upd_field_t*	upd_field;
+	dfield_t*	dfield;
+	dfield_t*	new_val;
+	ulint		field_no;
+	dict_index_t*	clust_index;
+	ulint		i;
+
+	ut_ad(index);
+
+	clust_index = dict_table_get_first_index(index->table);
+
+	dtuple_set_info_bits(entry, update->info_bits);
+
+	for (i = 0; i < upd_get_n_fields(update); i++) {
+
+		upd_field = upd_get_nth_field(update, i);
+
+		field_no = dict_index_get_nth_col_pos(index,
+				dict_index_get_nth_col_no(clust_index,
+							upd_field->field_no));
+		if (field_no != ULINT_UNDEFINED) {
+			dfield = dtuple_get_nth_field(entry, field_no);
+
+			new_val = &(upd_field->new_val);
+
+			dfield_set_data(dfield, new_val->data, new_val->len);
+		}
+	}
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the
+clustered index entry given. */
+
+void
+row_upd_clust_index_replace_new_col_vals(
+/*=====================================*/
+	dtuple_t*	entry,	/* in/out: index entry where replaced */
+	upd_t*		update)	/* in: update vector */
+{
+	upd_field_t*	upd_field;
+	dfield_t*	dfield;
+	dfield_t*	new_val;
+	ulint		field_no;
+	ulint		i;
+
+	dtuple_set_info_bits(entry, update->info_bits);
+
+	for (i = 0; i < upd_get_n_fields(update); i++) {
+
+		upd_field = upd_get_nth_field(update, i);
+
+		field_no = upd_field->field_no;
+
+		dfield = dtuple_get_nth_field(entry, field_no);
+
+		new_val = &(upd_field->new_val);
+
+		dfield_set_data(dfield, new_val->data, new_val->len);
+	}
+}
+
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_ord_field(
+/*======================*/
+				/* out: TRUE if update vector changes
+				an ordering field in the index record */
+	dtuple_t*	row,	/* in: old value of row, or NULL if the
+				row and the data values in update are not
+				known when this function is called, e.g., at
+				compile time */
+	dict_index_t*	index,	/* in: index of the record */
+	upd_t*		update)	/* in: update vector for the row */
+{
+	upd_field_t*	upd_field;
+	dict_field_t*	ind_field;
+	dict_col_t*	col;
+	ulint		n_unique;
+	ulint		n_upd_fields;
+	ulint		col_pos;
+	ulint		col_no;
+	ulint		i, j;
+	
+	ut_ad(update && index);
+
+	n_unique = dict_index_get_n_unique(index);
+	n_upd_fields = upd_get_n_fields(update);
+
+	for (i = 0; i < n_unique; i++) {
+
+		ind_field = dict_index_get_nth_field(index, i);
+		col = dict_field_get_col(ind_field);
+		col_pos = dict_col_get_clust_pos(col);
+		col_no = dict_col_get_no(col);
+
+		for (j = 0; j < n_upd_fields; j++) {
+
+			upd_field = upd_get_nth_field(update, j);
+
+			if (col_pos == upd_field->field_no
+			     && (row == NULL
+				 || !dfield_datas_are_equal(
+					dtuple_get_nth_field(row, col_no),
+						&(upd_field->new_val)))) {
+				return(TRUE);
+			}
+		}
+	}
+
+	return(FALSE);
+}
+
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_some_index_ord_field(
+/*=================================*/
+				/* out: TRUE if update vector may change
+				an ordering field in an index record */
+	dict_table_t*	table,	/* in: table */
+	upd_t*		update)	/* in: update vector for the row */
+{
+	dict_index_t*	index;
+
+	index = dict_table_get_first_index(table);
+	
+	while (index) {
+		if (row_upd_changes_ord_field(NULL, index, update)) {
+
+			return(TRUE);
+		}
+
+		index = dict_table_get_next_index(index);
+	}	
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Copies the column values from a record. */
+UNIV_INLINE
+void
+row_upd_copy_columns(
+/*=================*/
+	rec_t*		rec,	/* in: record in a clustered index */
+	sym_node_t*	column)	/* in: first column in a column list, or
+				NULL */
+{
+	byte*	data;
+	ulint	len;
+
+	while (column) {
+		data = rec_get_nth_field(rec,
+					column->field_nos[SYM_CLUST_FIELD_NO],
+									&len);
+		eval_node_copy_and_alloc_val(column, data, len);
+
+		column = UT_LIST_GET_NEXT(col_var_list, column);
+	}
+}
+
+/*************************************************************************
+Calculates the new values for fields to update. Note that row_upd_copy_columns
+must have been called first. */
+UNIV_INLINE
+void
+row_upd_eval_new_vals(
+/*==================*/
+	upd_t*	update)	/* in: update vector */
+{
+	que_node_t*	exp;
+	upd_field_t*	upd_field;
+	ulint		n_fields;
+	ulint		i;
+
+	n_fields = upd_get_n_fields(update);
+
+	for (i = 0; i < n_fields; i++) {
+		upd_field = upd_get_nth_field(update, i);
+
+		exp = upd_field->exp;
+
+		eval_exp(exp);
+
+		dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp));
+	}
+}
+
+/***************************************************************
+Stores to the heap the row on which the node->pcur is positioned. */
+UNIV_INLINE
+void
+row_upd_store_row(
+/*==============*/
+	upd_node_t*	node)	/* in: row update node */
+{
+	dict_index_t*	clust_index;
+	
+	ut_ad((node->pcur)->latch_mode != BTR_NO_LATCHES);
+
+	if (node->row != NULL) {
+		mem_heap_empty(node->heap);
+		node->row = NULL;
+	}
+	
+	clust_index = dict_table_get_first_index(node->table);
+
+	node->row = row_build(ROW_COPY_DATA, clust_index,
+				btr_pcur_get_rec(node->pcur), node->heap);
+}
+
+/***************************************************************
+Updates a secondary index entry of a row. */
+static
+ulint
+row_upd_sec_index_entry(
+/*====================*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, else error code or DB_LOCK_WAIT */
+	upd_node_t*	node,	/* in: row update node */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	ibool		found;
+	dict_index_t*	index;
+	dtuple_t*	entry;
+	mtr_t		mtr;
+	btr_pcur_t	pcur;
+	btr_cur_t*	btr_cur;
+	mem_heap_t*	heap;
+	rec_t*		rec;
+	ulint		err;
+	
+	index = node->index;
+	
+	heap = mem_heap_create(1024);
+
+	/* Build old index entry */
+	entry = row_build_index_entry(node->row, index, heap);
+
+	log_free_check();
+	mtr_start(&mtr);
+	
+	found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
+									&mtr);
+	ut_ad(found);
+
+	btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+	rec = btr_cur_get_rec(btr_cur);
+
+ 	/* Delete mark the old index record; it can already be delete marked if
+ 	we return after a lock wait in row_ins_index_entry below */
+
+	if (!rec_get_deleted_flag(rec)) {
+		err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr,
+									&mtr);
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+
+	if (node->is_delete || (err != DB_SUCCESS)) {
+
+		mem_heap_free(heap);	
+
+        	return(err);
+	}
+
+	/* Build a new index entry */
+	row_upd_index_replace_new_col_vals(entry, index, node->update);
+
+	/* Insert new index entry */
+	err = row_ins_index_entry(index, entry, thr);
+
+	mem_heap_free(heap);	
+
+        return(err);
+}
+
+/***************************************************************
+Updates secondary index record if it is changed in the row update. This
+should be quite rare in database applications. */
+UNIV_INLINE
+ulint
+row_upd_sec_step(
+/*=============*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, else error code or DB_LOCK_WAIT */
+	upd_node_t*	node,	/* in: row update node */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	ulint	err;
+
+	ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
+				|| (node->state == UPD_NODE_UPDATE_SOME_SEC));
+	ut_ad(!(node->index->type & DICT_CLUSTERED));
+	
+	if ((node->state == UPD_NODE_UPDATE_ALL_SEC)
+			|| row_upd_changes_ord_field(node->row, node->index,
+							node->update)) {
+		err = row_upd_sec_index_entry(node, thr);
+
+		return(err);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/***************************************************************
+Marks the clustered index record deleted and inserts the updated version
+of the record to the index. This function should be used when the ordering
+fields of the clustered index record change. This should be quite rare in
+database applications. */
+static
+ulint
+row_upd_clust_rec_by_insert(
+/*========================*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, else error code or DB_LOCK_WAIT */
+	upd_node_t*	node,	/* in: row update node */
+	dict_index_t*	index,	/* in: clustered index of the record */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr)	/* in: mtr; gets committed here */
+{	
+	btr_pcur_t*	pcur;
+	btr_cur_t*	btr_cur;
+	trx_t*		trx;
+	dict_table_t*	table;
+	mem_heap_t*	heap;
+	dtuple_t*	entry;
+	ulint		err;
+	
+	ut_ad(node);
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	trx = thr_get_trx(thr);
+	table = node->table;
+	pcur = node->pcur;
+	btr_cur	= btr_pcur_get_btr_cur(pcur);
+	
+	if (node->state != UPD_NODE_INSERT_CLUSTERED) {
+
+		err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
+						btr_cur, TRUE, thr, mtr);
+		if (err != DB_SUCCESS) {
+			mtr_commit(mtr);
+
+			return(err);
+		}
+	} 
+
+	mtr_commit(mtr);
+
+	node->state = UPD_NODE_INSERT_CLUSTERED;
+
+	heap = mem_heap_create(1024);
+
+	entry = row_build_index_entry(node->row, index, heap);
+
+	row_upd_clust_index_replace_new_col_vals(entry, node->update);
+
+	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
+	
+	err = row_ins_index_entry(index, entry, thr);
+
+	mem_heap_free(heap);	
+
+	return(err);
+}
+
+/***************************************************************
+Updates a clustered index record of a row when the ordering fields do
+not change. */
+static
+ulint
+row_upd_clust_rec(
+/*==============*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, else error code or DB_LOCK_WAIT */
+	upd_node_t*	node,	/* in: row update node */
+	dict_index_t*	index,	/* in: clustered index */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr)	/* in: mtr; gets committed here */
+{
+	btr_pcur_t*	pcur;
+	btr_cur_t*	btr_cur;
+	ulint		err;
+	
+	ut_ad(node);
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	pcur = node->pcur;
+	btr_cur = btr_pcur_get_btr_cur(pcur);
+
+	ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+	
+	/* Try optimistic updating of the record, keeping changes within
+	the page; we do not check locks because we assume the x-lock on the
+	record to update */
+
+	if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
+		err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
+						btr_cur, node->update,
+						node->cmpl_info, thr, mtr);
+	} else {
+		err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
+						btr_cur, node->update,
+						node->cmpl_info, thr, mtr);
+	}
+
+	mtr_commit(mtr);
+	
+	if (err == DB_SUCCESS) {
+
+		return(err);
+	}
+
+	/* We may have to modify the tree structure: do a pessimistic descent
+	down the index tree */
+
+	mtr_start(mtr);
+	
+	/* NOTE: this transaction has an s-lock or x-lock on the record and
+	therefore other transactions cannot modify the record when we have no
+	latch on the page. In addition, we assume that other query threads of
+	the same transaction do not modify the record in the meantime.
+	Therefore we can assert that the restoration of the cursor succeeds. */
+
+	ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
+
+	ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+	
+	err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
+				node->update, node->cmpl_info, thr, mtr);
+	mtr_commit(mtr);
+
+	return(err);
+}
+
+/***************************************************************
+Delete marks a clustered index record. */
+static
+ulint
+row_upd_del_mark_clust_rec(
+/*=======================*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, else error code or DB_LOCK_WAIT */
+	upd_node_t*	node,	/* in: row update node */
+	dict_index_t*	index,	/* in: clustered index */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr)	/* in: mtr; gets committed here */
+{
+	btr_pcur_t*	pcur;
+	btr_cur_t*	btr_cur;
+	ulint		err;
+	
+	ut_ad(node);
+	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(node->is_delete);
+
+	pcur = node->pcur;
+	btr_cur = btr_pcur_get_btr_cur(pcur);
+
+	ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+	/* Store row because we have to build also the secondary index
+	entries */
+	
+	row_upd_store_row(node);
+
+	/* Mark the clustered index record deleted; we do not have to check
+	locks, because we assume that we have an x-lock on the record */
+
+	err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur,
+							TRUE, thr, mtr);
+	mtr_commit(mtr);
+	
+	return(err);
+}
+
+/***************************************************************
+Updates the clustered index record. */
+static
+ulint
+row_upd_clust_step(
+/*===============*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, DB_LOCK_WAIT in case of a lock wait,
+				else error code */
+	upd_node_t*	node,	/* in: row update node */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	dict_index_t*	index;
+	btr_pcur_t*	pcur;
+	ibool		success;
+	ulint		err;
+	mtr_t		mtr_buf;
+	mtr_t*		mtr;
+	
+	index = dict_table_get_first_index(node->table);
+
+	pcur = node->pcur;
+
+	/* We have to restore the cursor to its position */
+	mtr = &mtr_buf;
+
+	mtr_start(mtr);
+	
+	/* If the restoration does not succeed, then the same
+	transaction has deleted the record on which the cursor was,
+	and that is an SQL error. If the restoration succeeds, it may
+	still be that the same transaction has successively deleted
+	and inserted a record with the same ordering fields, but in
+	that case we know that the transaction has at least an
+	implicit x-lock on the record. */
+	
+	ut_a(pcur->rel_pos == BTR_PCUR_ON);
+
+	success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+
+	if (!success) {
+		err = DB_RECORD_NOT_FOUND;
+
+		mtr_commit(mtr);
+
+		return(err);
+	}
+
+	/* If this is a row in SYS_INDEXES table of the data dictionary,
+	then we have to free the file segments of the index tree associated
+	with the index */
+
+	if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
+
+		dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
+
+		mtr_commit(mtr);
+
+		mtr_start(mtr);
+
+		success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
+									mtr);
+		if (!success) {
+			err = DB_ERROR;
+
+			mtr_commit(mtr);
+
+			return(err);
+		}
+	} 
+
+	if (!node->has_clust_rec_x_lock) {
+		err = lock_clust_rec_modify_check_and_lock(0,
+						btr_pcur_get_rec(pcur),
+						index, thr);
+		if (err != DB_SUCCESS) {
+			mtr_commit(mtr);
+
+			return(err);
+		}
+	}
+
+	/* NOTE: the following function calls will also commit mtr */
+
+	if (node->is_delete) {
+		err = row_upd_del_mark_clust_rec(node, index, thr, mtr);
+
+		if (err != DB_SUCCESS) {
+
+			return(err);
+		}
+
+		node->state = UPD_NODE_UPDATE_ALL_SEC;
+		node->index = dict_table_get_next_index(index);
+
+		return(err);
+	}
+	
+	/* If the update is made for MySQL, we already have the update vector
+	ready, else we have to do some evaluation: */
+ 
+	if (!node->in_mysql_interface) {
+		/* Copy the necessary columns from clust_rec and calculate the
+		new values to set */
+
+		row_upd_copy_columns(btr_pcur_get_rec(pcur),
+					UT_LIST_GET_FIRST(node->columns));
+		row_upd_eval_new_vals(node->update);
+	}
+		
+	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+
+		err = row_upd_clust_rec(node, index, thr, mtr);
+
+		return(err);
+	}
+	
+	row_upd_store_row(node);
+
+	if (row_upd_changes_ord_field(node->row, index, node->update)) {
+
+		/* Update causes an ordering field (ordering fields within
+		the B-tree) of the clustered index record to change: perform
+		the update by delete marking and inserting.
+
+		TODO! What to do to the 'Halloween problem', where an update
+		moves the record forward in index so that it is again
+		updated when the cursor arrives there? Solution: the
+		read operation must check the undo record undo number when
+		choosing records to update. MySQL solves now the problem
+		externally! */
+
+		err = row_upd_clust_rec_by_insert(node, index, thr, mtr);
+
+		if (err != DB_SUCCESS) {
+
+			return(err);
+		}
+
+		node->state = UPD_NODE_UPDATE_ALL_SEC;
+	} else {
+		err = row_upd_clust_rec(node, index, thr, mtr);
+
+		if (err != DB_SUCCESS) {
+
+			return(err);
+		}
+
+		node->state = UPD_NODE_UPDATE_SOME_SEC;
+	}
+
+	node->index = dict_table_get_next_index(index);
+
+	return(err);
+}
+
+/***************************************************************
+Updates the affected index records of a row. When the control is transferred
+to this node, we assume that we have a persistent cursor which was on a
+record, and the position of the cursor is stored in the cursor. */
+static
+ulint
+row_upd(
+/*====*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, else error code or DB_LOCK_WAIT */
+	upd_node_t*	node,	/* in: row update node */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	ulint	err;
+	
+	ut_ad(node && thr);
+
+	if (node->in_mysql_interface) {
+		/* We do not get the cmpl_info value from the MySQL
+		interpreter: we must calculate it on the fly: */
+		
+		if (row_upd_changes_some_index_ord_field(node->table,
+							node->update)) {
+			node->cmpl_info = 0; 
+		} else {
+			node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
+		}
+	}
+
+	if (node->state == UPD_NODE_UPDATE_CLUSTERED
+				|| node->state == UPD_NODE_INSERT_CLUSTERED) {
+
+		err = row_upd_clust_step(node, thr);
+		
+		if (err != DB_SUCCESS) {
+
+			goto function_exit;
+		}
+	}
+
+	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+
+		goto function_exit;
+	}
+
+	while (node->index != NULL) {
+		err = row_upd_sec_step(node, thr);
+
+		if (err != DB_SUCCESS) {
+
+			goto function_exit;
+		}
+
+		node->index = dict_table_get_next_index(node->index);
+        }
+
+function_exit:
+	if (err == DB_SUCCESS) {
+		/* Do some cleanup */
+
+		if (node->row != NULL) {
+			mem_heap_empty(node->heap);
+			node->row = NULL;
+		}
+
+		node->state = UPD_NODE_UPDATE_CLUSTERED;
+	}
+
+        return(err);
+}
+
+/***************************************************************
+Updates a row in a table. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+row_upd_step(
+/*=========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	upd_node_t*	node;
+	sel_node_t*	sel_node;
+	que_node_t*	parent;
+	ulint		err		= DB_SUCCESS;
+	trx_t*		trx;
+
+	ut_ad(thr);
+	
+	trx = thr_get_trx(thr);
+
+	node = thr->run_node;
+	
+	sel_node = node->select;
+
+	parent = que_node_get_parent(node);
+	
+	ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
+	
+	if (thr->prev_node == parent) {
+		node->state = UPD_NODE_SET_IX_LOCK;
+	}
+
+	if (node->state == UPD_NODE_SET_IX_LOCK) {
+
+		if (!node->has_clust_rec_x_lock) {
+			/* It may be that the current session has not yet
+			started its transaction, or it has been committed: */
+
+			trx_start_if_not_started(thr_get_trx(thr));
+
+			err = lock_table(0, node->table, LOCK_IX, thr);
+
+			if (err != DB_SUCCESS) {
+
+				goto error_handling;
+			}
+		}
+	
+		node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+		if (node->searched_update) {
+			/* Reset the cursor */
+			sel_node->state = SEL_NODE_OPEN;
+		
+			/* Fetch a row to update */
+		
+			thr->run_node = sel_node;
+	
+			return(thr);
+		}
+	}
+
+	/* sel_node is NULL if we are in the MySQL interface */
+	
+	if (sel_node && (sel_node->state != SEL_NODE_FETCH)) {
+
+		if (!node->searched_update) {
+			/* An explicit cursor should be positioned on a row
+			to update */
+
+			ut_error;
+			
+			err = DB_ERROR;
+
+			goto error_handling;
+		}
+
+		ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
+
+		/* No more rows to update, or the select node performed the
+		updates directly in-place */
+
+		thr->run_node = parent;
+	
+		return(thr);
+	}
+
+	/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+	
+	err = row_upd(node, thr);
+
+error_handling:
+	trx->error_state = err;
+
+	if (err == DB_SUCCESS) {
+		/* Ok: do nothing */
+	} else if (err == DB_LOCK_WAIT) {
+
+		return(NULL);
+	} else {
+		return(NULL);
+	}
+
+	/* DO THE TRIGGER ACTIONS HERE */
+
+	if (node->searched_update) {
+		/* Fetch next row to update */
+
+		thr->run_node = sel_node;
+	} else {
+		/* It was an explicit cursor update */
+
+		thr->run_node = parent;
+	}
+
+	node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+	return(thr);
+} 
+
+/*************************************************************************
+Performs an in-place update for the current clustered index record in
+select. */
+
+void
+row_upd_in_place_in_select(
+/*=======================*/
+	sel_node_t*	sel_node,	/* in: select node */
+	que_thr_t*	thr,		/* in: query thread */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	upd_node_t*	node;
+	btr_pcur_t*	pcur;
+	btr_cur_t*	btr_cur;
+	ulint		err;
+
+	ut_ad(sel_node->select_will_do_update);
+	ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF);
+	ut_ad(sel_node->asc);
+
+	node = que_node_get_parent(sel_node);
+
+	ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
+
+	pcur = node->pcur;
+	btr_cur = btr_pcur_get_btr_cur(pcur);
+
+	/* Copy the necessary columns from clust_rec and calculate the new
+	values to set */
+
+	row_upd_copy_columns(btr_pcur_get_rec(pcur),
+					UT_LIST_GET_FIRST(node->columns));
+	row_upd_eval_new_vals(node->update);
+
+	ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+	
+	ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE);
+	ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
+	ut_ad(node->select_will_do_update);
+
+	err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
+						node->update, node->cmpl_info,
+						thr, mtr);
+	ut_ad(err == DB_SUCCESS);
+}