summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/nbtree/nbtpage.c42
-rw-r--r--src/backend/storage/buffer/bufmgr.c33
-rw-r--r--src/include/storage/bufmgr.h3
3 files changed, 67 insertions, 11 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index b0aff28a59..23bc2fb062 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.69 2003/08/08 21:41:27 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.70 2003/08/10 19:48:08 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -409,6 +409,22 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
* that the page is still free. (For example, an already-free
* page could have been re-used between the time the last VACUUM
* scanned it and the time the VACUUM made its FSM updates.)
+ *
+ * In fact, it's worse than that: we can't even assume that it's
+ * safe to take a lock on the reported page. If somebody else
+ * has a lock on it, or even worse our own caller does, we could
+ * deadlock. (The own-caller scenario is actually not improbable.
+ * Consider an index on a serial or timestamp column. Nearly all
+ * splits will be at the rightmost page, so it's entirely likely
+ * that _bt_split will call us while holding a lock on the page most
+ * recently acquired from FSM. A VACUUM running concurrently with
+ * the previous split could well have placed that page back in FSM.)
+ *
+ * To get around that, we ask for only a conditional lock on the
+ * reported page. If we fail, then someone else is using the page,
+ * and we may reasonably assume it's not free. (If we happen to be
+ * wrong, the worst consequence is the page will be lost to use till
+ * the next VACUUM, which is no big problem.)
*/
for (;;)
{
@@ -416,16 +432,24 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
if (blkno == InvalidBlockNumber)
break;
buf = ReadBuffer(rel, blkno);
- LockBuffer(buf, access);
- page = BufferGetPage(buf);
- if (_bt_page_recyclable(page))
+ if (ConditionalLockBuffer(buf))
{
- /* Okay to use page. Re-initialize and return it */
- _bt_pageinit(page, BufferGetPageSize(buf));
- return buf;
+ page = BufferGetPage(buf);
+ if (_bt_page_recyclable(page))
+ {
+ /* Okay to use page. Re-initialize and return it */
+ _bt_pageinit(page, BufferGetPageSize(buf));
+ return buf;
+ }
+ elog(DEBUG2, "FSM returned nonrecyclable page");
+ _bt_relbuf(rel, buf);
+ }
+ else
+ {
+ elog(DEBUG2, "FSM returned nonlockable page");
+ /* couldn't get lock, so just drop pin */
+ ReleaseBuffer(buf);
}
- elog(DEBUG2, "FSM returned nonrecyclable page");
- _bt_relbuf(rel, buf);
}
/*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 2da3b4e364..1be30d30e2 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.139 2003/08/04 02:40:03 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.140 2003/08/10 19:48:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1937,6 +1937,37 @@ LockBuffer(Buffer buffer, int mode)
}
/*
+ * Acquire the cntx_lock for the buffer, but only if we don't have to wait.
+ *
+ * This assumes the caller wants BUFFER_LOCK_EXCLUSIVE mode.
+ */
+bool
+ConditionalLockBuffer(Buffer buffer)
+{
+ BufferDesc *buf;
+
+ Assert(BufferIsValid(buffer));
+ if (BufferIsLocal(buffer))
+ return true; /* act as though we got it */
+
+ buf = &(BufferDescriptors[buffer - 1]);
+
+ if (LWLockConditionalAcquire(buf->cntx_lock, LW_EXCLUSIVE))
+ {
+ /*
+ * This is not the best place to set cntxDirty flag (eg indices do
+ * not always change buffer they lock in excl mode). But please
+ * remember that it's critical to set cntxDirty *before* logging
+ * changes with XLogInsert() - see comments in BufferSync().
+ */
+ buf->cntxDirty = true;
+
+ return true;
+ }
+ return false;
+}
+
+/*
* LockBufferForCleanup - lock a buffer in preparation for deleting items
*
* Items may be deleted from a disk page only when the caller (a) holds an
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 5d78a3b9b2..bfbf64e207 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: bufmgr.h,v 1.69 2003/08/04 02:40:14 momjian Exp $
+ * $Id: bufmgr.h,v 1.70 2003/08/10 19:48:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -180,6 +180,7 @@ extern void SetBufferCommitInfoNeedsSave(Buffer buffer);
extern void UnlockBuffers(void);
extern void LockBuffer(Buffer buffer, int mode);
+extern bool ConditionalLockBuffer(Buffer buffer);
extern void LockBufferForCleanup(Buffer buffer);
extern void AbortBufferIO(void);