diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2004-08-11 04:09:14 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2004-08-11 04:09:14 +0000 |
commit | d0b776b2be53b8081dfafa6f35d2268e604d921a (patch) | |
tree | 04391f7161e5106c9b0c7d952fe80b6d03a8702d | |
parent | fbec0d7e9459aa4e302bb029f248eef46de11c04 (diff) | |
download | postgresql-d0b776b2be53b8081dfafa6f35d2268e604d921a.tar.gz |
Fix failure to guarantee that a checkpoint will write out pg_clog updates
for transaction commits that occurred just before the checkpoint. This is
an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a
reproducible test case to prove its existence.
-rw-r--r-- | src/backend/access/transam/xact.c | 26 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 18 | ||||
-rw-r--r-- | src/include/storage/lwlock.h | 3 |
3 files changed, 40 insertions, 7 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b3d51a49ec..edbf9fb5d5 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.115.2.1 2002/03/15 19:20:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.115.2.2 2004/08/11 04:09:12 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -557,13 +557,27 @@ RecordTransactionCommit(void) */ if (MyXactMadeXLogEntry) { + bool madeTCentries; XLogRecPtr recptr; BufmgrCommit(); START_CRIT_SECTION(); - if (MyLastRecPtr.xrecoff != 0) + madeTCentries = (MyLastRecPtr.xrecoff != 0); + + /* + * We need to lock out checkpoint start between writing our XLOG + * record and updating pg_clog. Otherwise it is possible for the + * checkpoint to set REDO after the XLOG record but fail to flush the + * pg_clog update to disk, leading to loss of the transaction commit + * if we crash a little later. Slightly klugy fix for problem + * discovered 2004-08-10. + */ + if (madeTCentries) + LWLockAcquire(CheckpointStartLock, LW_SHARED); + + if (madeTCentries) { /* Need to emit a commit record */ XLogRecData rdata; @@ -610,9 +624,13 @@ RecordTransactionCommit(void) XLogFlush(recptr); /* Mark the transaction committed in clog, if needed */ - if (MyLastRecPtr.xrecoff != 0) + if (madeTCentries) TransactionIdCommit(xid); + /* Unlock checkpoint lock if we acquired it */ + if (madeTCentries) + LWLockRelease(CheckpointStartLock); + END_CRIT_SECTION(); } @@ -712,6 +730,8 @@ RecordTransactionAbort(void) * nowhere in permanent storage, so no one will ever care if it * committed.) We do not flush XLOG to disk in any case, since the * default assumption after a crash would be that we aborted, anyway. + * For the same reason, we don't need to worry about interlocking + * against checkpoint start. * * Extra check here is to catch case that we aborted partway through * RecordTransactionCommit ... diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c8aef1ad9d..26e52899f5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.86.2.3 2003/01/21 19:51:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.86.2.4 2004/08/11 04:09:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2945,6 +2945,15 @@ CreateCheckPoint(bool shutdown, bool force) checkPoint.ThisStartUpID = ThisStartUpID; checkPoint.time = time(NULL); + /* + * We must hold CheckpointStartLock while determining the checkpoint + * REDO pointer. This ensures that any concurrent transaction commits + * will be either not yet logged, or logged and recorded in pg_clog. + * See notes in RecordTransactionCommit(). + */ + LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE); + + /* And we need WALInsertLock too */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); /* @@ -2976,6 +2985,7 @@ CreateCheckPoint(bool shutdown, bool force) ControlFile->checkPointCopy.redo.xrecoff) { LWLockRelease(WALInsertLock); + LWLockRelease(CheckpointStartLock); LWLockRelease(CheckpointLock); END_CRIT_SECTION(); return; @@ -3035,11 +3045,13 @@ CreateCheckPoint(bool shutdown, bool force) #endif /* - * Now we can release insert lock, allowing other xacts to proceed - * even while we are flushing disk buffers. + * Now we can release insert lock and checkpoint start lock, allowing + * other xacts to proceed even while we are flushing disk buffers. */ LWLockRelease(WALInsertLock); + LWLockRelease(CheckpointStartLock); + LWLockAcquire(XidGenLock, LW_SHARED); checkPoint.nextXid = ShmemVariableCache->nextXid; LWLockRelease(XidGenLock); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 4db2d7e2c0..773d473437 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: lwlock.h,v 1.4 2001/11/05 17:46:35 momjian Exp $ + * $Id: lwlock.h,v 1.4.2.1 2004/08/11 04:09:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ typedef enum LWLockId WALWriteLock, ControlFileLock, CheckpointLock, + CheckpointStartLock, CLogControlLock, NumFixedLWLocks, /* must be last except for |