diff options
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 42 | ||||
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 33 | ||||
-rw-r--r-- | src/include/storage/bufmgr.h | 3 |
3 files changed, 67 insertions, 11 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index b0aff28a59..23bc2fb062 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.69 2003/08/08 21:41:27 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.70 2003/08/10 19:48:08 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -409,6 +409,22 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) * that the page is still free. (For example, an already-free * page could have been re-used between the time the last VACUUM * scanned it and the time the VACUUM made its FSM updates.) + * + * In fact, it's worse than that: we can't even assume that it's + * safe to take a lock on the reported page. If somebody else + * has a lock on it, or even worse our own caller does, we could + * deadlock. (The own-caller scenario is actually not improbable. + * Consider an index on a serial or timestamp column. Nearly all + * splits will be at the rightmost page, so it's entirely likely + * that _bt_split will call us while holding a lock on the page most + * recently acquired from FSM. A VACUUM running concurrently with + * the previous split could well have placed that page back in FSM.) + * + * To get around that, we ask for only a conditional lock on the + * reported page. If we fail, then someone else is using the page, + * and we may reasonably assume it's not free. (If we happen to be + * wrong, the worst consequence is the page will be lost to use till + * the next VACUUM, which is no big problem.) */ for (;;) { @@ -416,16 +432,24 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) if (blkno == InvalidBlockNumber) break; buf = ReadBuffer(rel, blkno); - LockBuffer(buf, access); - page = BufferGetPage(buf); - if (_bt_page_recyclable(page)) + if (ConditionalLockBuffer(buf)) { - /* Okay to use page. Re-initialize and return it */ - _bt_pageinit(page, BufferGetPageSize(buf)); - return buf; + page = BufferGetPage(buf); + if (_bt_page_recyclable(page)) + { + /* Okay to use page. Re-initialize and return it */ + _bt_pageinit(page, BufferGetPageSize(buf)); + return buf; + } + elog(DEBUG2, "FSM returned nonrecyclable page"); + _bt_relbuf(rel, buf); + } + else + { + elog(DEBUG2, "FSM returned nonlockable page"); + /* couldn't get lock, so just drop pin */ + ReleaseBuffer(buf); } - elog(DEBUG2, "FSM returned nonrecyclable page"); - _bt_relbuf(rel, buf); } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 2da3b4e364..1be30d30e2 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.139 2003/08/04 02:40:03 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.140 2003/08/10 19:48:08 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1937,6 +1937,37 @@ LockBuffer(Buffer buffer, int mode) } /* + * Acquire the cntx_lock for the buffer, but only if we don't have to wait. + * + * This assumes the caller wants BUFFER_LOCK_EXCLUSIVE mode. + */ +bool +ConditionalLockBuffer(Buffer buffer) +{ + BufferDesc *buf; + + Assert(BufferIsValid(buffer)); + if (BufferIsLocal(buffer)) + return true; /* act as though we got it */ + + buf = &(BufferDescriptors[buffer - 1]); + + if (LWLockConditionalAcquire(buf->cntx_lock, LW_EXCLUSIVE)) + { + /* + * This is not the best place to set cntxDirty flag (eg indices do + * not always change buffer they lock in excl mode). But please + * remember that it's critical to set cntxDirty *before* logging + * changes with XLogInsert() - see comments in BufferSync(). + */ + buf->cntxDirty = true; + + return true; + } + return false; +} + +/* * LockBufferForCleanup - lock a buffer in preparation for deleting items * * Items may be deleted from a disk page only when the caller (a) holds an diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 5d78a3b9b2..bfbf64e207 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: bufmgr.h,v 1.69 2003/08/04 02:40:14 momjian Exp $ + * $Id: bufmgr.h,v 1.70 2003/08/10 19:48:08 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -180,6 +180,7 @@ extern void SetBufferCommitInfoNeedsSave(Buffer buffer); extern void UnlockBuffers(void); extern void LockBuffer(Buffer buffer, int mode); +extern bool ConditionalLockBuffer(Buffer buffer); extern void LockBufferForCleanup(Buffer buffer); extern void AbortBufferIO(void); |