summaryrefslogtreecommitdiff
path: root/src/backend/executor/execMain.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-08-20 00:40:32 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-08-20 00:40:32 +0000
commitf57e3f4cf36f3fdd89cae8d566479ad747809b2f (patch)
tree6cb7a1b32271692929ecef6be67a823c9d29cf3f /src/backend/executor/execMain.c
parent97bb6e89beed326aae1990d798107b4af9a9af02 (diff)
downloadpostgresql-f57e3f4cf36f3fdd89cae8d566479ad747809b2f.tar.gz
Repair problems with VACUUM destroying t_ctid chains too soon, and with
insufficient paranoia in code that follows t_ctid links. (We must do both because even with VACUUM doing it properly, the intermediate state with a dangling t_ctid link is visible concurrently during lazy VACUUM, and could be seen afterwards if either type of VACUUM crashes partway through.) Also try to improve documentation about what's going on. Patch is a bit bulky because passing the XMAX information around required changing the APIs of some low-level heapam.c routines, but it's not conceptually very complicated. Per trouble report from Teodor and subsequent analysis. This needs to be back-patched, but I'll do that after 8.1 beta is out.
Diffstat (limited to 'src/backend/executor/execMain.c')
-rw-r--r--src/backend/executor/execMain.c145
1 files changed, 107 insertions, 38 deletions
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index eb485ad547..9f5c008fa9 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -26,7 +26,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.253 2005/08/18 21:34:20 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.254 2005/08/20 00:39:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1165,8 +1165,10 @@ lnext: ;
foreach(l, estate->es_rowMarks)
{
execRowMark *erm = lfirst(l);
- Buffer buffer;
HeapTupleData tuple;
+ Buffer buffer;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
TupleTableSlot *newSlot;
LockTupleMode lockmode;
HTSU_Result test;
@@ -1183,15 +1185,17 @@ lnext: ;
if (isNull)
elog(ERROR, "\"%s\" is NULL", erm->resname);
+ tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
+
if (estate->es_forUpdate)
lockmode = LockTupleExclusive;
else
lockmode = LockTupleShared;
- tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
test = heap_lock_tuple(erm->relation, &tuple, &buffer,
- estate->es_snapshot->curcid,
- lockmode, estate->es_rowNoWait);
+ &update_ctid, &update_xmax,
+ estate->es_snapshot->curcid,
+ lockmode, estate->es_rowNoWait);
ReleaseBuffer(buffer);
switch (test)
{
@@ -1207,11 +1211,15 @@ lnext: ;
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
- if (!(ItemPointerEquals(&(tuple.t_self),
- (ItemPointer) DatumGetPointer(datum))))
+ if (!ItemPointerEquals(&update_ctid,
+ &tuple.t_self))
{
- newSlot = EvalPlanQual(estate, erm->rti, &(tuple.t_self));
- if (!(TupIsNull(newSlot)))
+ /* updated, so look at updated version */
+ newSlot = EvalPlanQual(estate,
+ erm->rti,
+ &update_ctid,
+ update_xmax);
+ if (!TupIsNull(newSlot))
{
slot = newSlot;
estate->es_useEvalPlan = true;
@@ -1454,8 +1462,9 @@ ExecDelete(TupleTableSlot *slot,
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
- ItemPointerData ctid;
HTSU_Result result;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
/*
* get information on the (current) result relation
@@ -1486,7 +1495,7 @@ ExecDelete(TupleTableSlot *slot,
*/
ldelete:;
result = heap_delete(resultRelationDesc, tupleid,
- &ctid,
+ &update_ctid, &update_xmax,
estate->es_snapshot->curcid,
estate->es_crosscheck_snapshot,
true /* wait for commit */ );
@@ -1504,14 +1513,17 @@ ldelete:;
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
- else if (!(ItemPointerEquals(tupleid, &ctid)))
+ else if (!ItemPointerEquals(tupleid, &update_ctid))
{
- TupleTableSlot *epqslot = EvalPlanQual(estate,
- resultRelInfo->ri_RangeTableIndex, &ctid);
+ TupleTableSlot *epqslot;
+ epqslot = EvalPlanQual(estate,
+ resultRelInfo->ri_RangeTableIndex,
+ &update_ctid,
+ update_xmax);
if (!TupIsNull(epqslot))
{
- *tupleid = ctid;
+ *tupleid = update_ctid;
goto ldelete;
}
}
@@ -1558,8 +1570,9 @@ ExecUpdate(TupleTableSlot *slot,
HeapTuple tuple;
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
- ItemPointerData ctid;
HTSU_Result result;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
/*
* abort the operation if not running transactions
@@ -1627,7 +1640,7 @@ lreplace:;
* referential integrity updates in serializable transactions.
*/
result = heap_update(resultRelationDesc, tupleid, tuple,
- &ctid,
+ &update_ctid, &update_xmax,
estate->es_snapshot->curcid,
estate->es_crosscheck_snapshot,
true /* wait for commit */ );
@@ -1645,14 +1658,17 @@ lreplace:;
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
- else if (!(ItemPointerEquals(tupleid, &ctid)))
+ else if (!ItemPointerEquals(tupleid, &update_ctid))
{
- TupleTableSlot *epqslot = EvalPlanQual(estate,
- resultRelInfo->ri_RangeTableIndex, &ctid);
+ TupleTableSlot *epqslot;
+ epqslot = EvalPlanQual(estate,
+ resultRelInfo->ri_RangeTableIndex,
+ &update_ctid,
+ update_xmax);
if (!TupIsNull(epqslot))
{
- *tupleid = ctid;
+ *tupleid = update_ctid;
slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
tuple = ExecMaterializeSlot(slot);
goto lreplace;
@@ -1791,9 +1807,21 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
* under READ COMMITTED rules.
*
* See backend/executor/README for some info about how this works.
+ *
+ * estate - executor state data
+ * rti - rangetable index of table containing tuple
+ * *tid - t_ctid from the outdated tuple (ie, next updated version)
+ * priorXmax - t_xmax from the outdated tuple
+ *
+ * *tid is also an output parameter: it's modified to hold the TID of the
+ * latest version of the tuple (note this may be changed even on failure)
+ *
+ * Returns a slot containing the new candidate update/delete tuple, or
+ * NULL if we determine we shouldn't process the row.
*/
TupleTableSlot *
-EvalPlanQual(EState *estate, Index rti, ItemPointer tid)
+EvalPlanQual(EState *estate, Index rti,
+ ItemPointer tid, TransactionId priorXmax)
{
evalPlanQual *epq;
EState *epqstate;
@@ -1837,11 +1865,24 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid)
{
Buffer buffer;
- if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, false, NULL))
+ if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
{
- TransactionId xwait = SnapshotDirty->xmax;
+ /*
+ * If xmin isn't what we're expecting, the slot must have been
+ * recycled and reused for an unrelated tuple. This implies
+ * that the latest version of the row was deleted, so we need
+ * do nothing. (Should be safe to examine xmin without getting
+ * buffer's content lock, since xmin never changes in an existing
+ * tuple.)
+ */
+ if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
+ priorXmax))
+ {
+ ReleaseBuffer(buffer);
+ return NULL;
+ }
- /* xmin should not be dirty... */
+ /* otherwise xmin should not be dirty... */
if (TransactionIdIsValid(SnapshotDirty->xmin))
elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
@@ -1849,11 +1890,11 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid)
* If tuple is being updated by other transaction then we have
* to wait for its commit/abort.
*/
- if (TransactionIdIsValid(xwait))
+ if (TransactionIdIsValid(SnapshotDirty->xmax))
{
ReleaseBuffer(buffer);
- XactLockTableWait(xwait);
- continue;
+ XactLockTableWait(SnapshotDirty->xmax);
+ continue; /* loop back to repeat heap_fetch */
}
/*
@@ -1865,22 +1906,50 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid)
}
/*
- * Oops! Invalid tuple. Have to check is it updated or deleted.
- * Note that it's possible to get invalid SnapshotDirty->tid if
- * tuple updated by this transaction. Have we to check this ?
+ * If the referenced slot was actually empty, the latest version
+ * of the row must have been deleted, so we need do nothing.
*/
- if (ItemPointerIsValid(&(SnapshotDirty->tid)) &&
- !(ItemPointerEquals(&(tuple.t_self), &(SnapshotDirty->tid))))
+ if (tuple.t_data == NULL)
{
- /* updated, so look at the updated copy */
- tuple.t_self = SnapshotDirty->tid;
- continue;
+ ReleaseBuffer(buffer);
+ return NULL;
}
/*
- * Deleted or updated by this transaction; forget it.
+ * As above, if xmin isn't what we're expecting, do nothing.
*/
- return NULL;
+ if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
+ priorXmax))
+ {
+ ReleaseBuffer(buffer);
+ return NULL;
+ }
+
+ /*
+ * If we get here, the tuple was found but failed SnapshotDirty.
+ * Assuming the xmin is either a committed xact or our own xact
+ * (as it certainly should be if we're trying to modify the tuple),
+ * this must mean that the row was updated or deleted by either
+ * a committed xact or our own xact. If it was deleted, we can
+ * ignore it; if it was updated then chain up to the next version
+ * and repeat the whole test.
+ *
+ * As above, it should be safe to examine xmax and t_ctid without
+ * the buffer content lock, because they can't be changing.
+ */
+ if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
+ {
+ /* deleted, so forget about it */
+ ReleaseBuffer(buffer);
+ return NULL;
+ }
+
+ /* updated, so look at the updated row */
+ tuple.t_self = tuple.t_data->t_ctid;
+ /* updated row should have xmin matching this xmax */
+ priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
+ ReleaseBuffer(buffer);
+ /* loop back to fetch next in chain */
}
/*