summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2015-12-10 16:25:12 +0100
committerAndres Freund <andres@anarazel.de>2015-12-10 16:29:26 +0100
commitc6a67bbc7077f652b59c79c2dd5bf9774755db48 (patch)
treebbc45d0b4fe3f127d91d5bfee9b6cfcfd152ad60
parentee0df4d77c9f3e3aa6c5e23fa2dbc66e9cd6deae (diff)
downloadpostgresql-c6a67bbc7077f652b59c79c2dd5bf9774755db48.tar.gz
Fix bug leading to restoring unlogged relations from empty files.
At the end of crash recovery, unlogged relations are reset to the empty state, using their init fork as the template. The init fork is copied to the main fork without going through shared buffers. Unfortunately WAL replay so far has not necessarily flushed writes from shared buffers to disk at that point. In normal crash recovery, and before the introduction of 'fast promotions' in fd4ced523 / 9.3, the END_OF_RECOVERY checkpoint flushes the buffers out in time. But with fast promotions that's not the case anymore. To fix, force WAL writes targeting the init fork to be flushed immediately (using the new FlushOneBuffer() function). In 9.5+ that flush can centrally be triggered from the code dealing with restoring full page writes (XLogReadBufferForRedoExtended), in earlier releases that responsibility is in the hands of XLOG_HEAP_NEWPAGE's replay function. Backpatch to 9.1, even if this currently is only known to trigger in 9.3+. Flushing earlier is more robust, and it is advantageous to keep the branches similar. Typical symptoms of this bug are errors like 'ERROR: index "..." contains unexpected zero page at block 0' shortly after promoting a node. Reported-By: Thom Brown Author: Andres Freund and Michael Paquier Discussion: 20150326175024.GJ451@alap3.anarazel.de Backpatch: 9.1-
-rw-r--r--src/backend/access/heap/heapam.c10
-rw-r--r--src/backend/storage/buffer/bufmgr.c21
-rw-r--r--src/include/storage/bufmgr.h1
3 files changed, 32 insertions, 0 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 0b397a86ac..5ed3cb52c4 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -7642,6 +7642,16 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
}
MarkBufferDirty(buffer);
+
+ /*
+ * At the end of crash recovery the init forks of unlogged relations are
+ * copied, without going through shared buffers. So we need to force the
+ * on-disk state of init forks to always be in sync with the state in
+ * shared buffers.
+ */
+ if (xlrec->forknum == INIT_FORKNUM)
+ FlushOneBuffer(buffer);
+
UnlockReleaseBuffer(buffer);
}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 18013d59eb..3b2c40ce42 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2549,6 +2549,27 @@ FlushDatabaseBuffers(Oid dbid)
}
/*
+ * Flush a previously, shared or exclusively, locked and pinned buffer to the
+ * OS.
+ */
+void
+FlushOneBuffer(Buffer buffer)
+{
+ volatile BufferDesc *bufHdr;
+
+ /* currently not needed, but no fundamental reason not to support */
+ Assert(!BufferIsLocal(buffer));
+
+ Assert(BufferIsPinned(buffer));
+
+ bufHdr = &BufferDescriptors[buffer - 1];
+
+ LWLockHeldByMe(bufHdr->content_lock);
+
+ FlushBuffer(bufHdr, NULL);
+}
+
+/*
* ReleaseBuffer -- release the pin on a buffer
*/
void
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 921e4edde2..0c3fc55541 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -191,6 +191,7 @@ extern void CheckPointBuffers(int flags);
extern BlockNumber BufferGetBlockNumber(Buffer buffer);
extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
ForkNumber forkNum);
+extern void FlushOneBuffer(Buffer buffer);
extern void FlushRelationBuffers(Relation rel);
extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,