diff options
Diffstat (limited to 'src/backend/storage/buffer/freelist.c')
-rw-r--r-- | src/backend/storage/buffer/freelist.c | 925 |
1 files changed, 116 insertions, 809 deletions
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index b960208f74..247a9abf59 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -1,15 +1,7 @@ /*------------------------------------------------------------------------- * * freelist.c - * routines for manipulating the buffer pool's replacement strategy. - * - * The name "freelist.c" is now a bit of a misnomer, since this module - * controls not only the list of free buffers per se, but the entire - * mechanism for looking up existing shared buffers and the strategy - * for choosing replacement victims when needed. - * - * Note: all routines in this file assume that the BufMgrLock is held - * by the caller, so no synchronization is needed. + * routines for managing the buffer pool's replacement strategy. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group @@ -17,758 +9,185 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.50 2005/02/03 23:29:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.51 2005/03/04 20:21:06 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include <time.h> - -#include "access/xact.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" /* - * Definitions for the buffer replacement strategy - */ -#define STRAT_LIST_UNUSED (-1) -#define STRAT_LIST_B1 0 -#define STRAT_LIST_T1 1 -#define STRAT_LIST_T2 2 -#define STRAT_LIST_B2 3 -#define STRAT_NUM_LISTS 4 - -/* - * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC) + * The shared freelist control information. */ typedef struct { - int prev; /* list links */ - int next; - short list; /* ID of list it is currently in */ - bool t1_vacuum; /* t => present only because of VACUUM */ - TransactionId t1_xid; /* the xid this entry went onto T1 */ - BufferTag buf_tag; /* page identifier */ - int buf_id; /* currently assigned data buffer, or -1 */ -} BufferStrategyCDB; + /* Clock sweep hand: index of next buffer to consider grabbing */ + int nextVictimBuffer; -/* - * The shared ARC control information. - */ -typedef struct -{ - int target_T1_size; /* What T1 size are we aiming for */ - int listUnusedCDB; /* All unused StrategyCDB */ - int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2 - * and B2 */ - int listTail[STRAT_NUM_LISTS]; - int listSize[STRAT_NUM_LISTS]; - Buffer listFreeBuffers; /* List of unused buffers */ - - long num_lookup; /* Some hit statistics */ - long num_hit[STRAT_NUM_LISTS]; - time_t stat_report; - - /* Array of CDB's starts here */ - BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */ -} BufferStrategyControl; + int firstFreeBuffer; /* Head of list of unused buffers */ + int lastFreeBuffer; /* Tail of list of unused buffers */ -/* GUC variable: time in seconds between statistics reports */ -int DebugSharedBuffers = 0; + /* + * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 + * (that is, when the list is empty) + */ +} BufferStrategyControl; /* Pointers to shared state */ static BufferStrategyControl *StrategyControl = NULL; -static BufferStrategyCDB *StrategyCDB = NULL; /* Backend-local state about whether currently vacuuming */ -static bool strategy_hint_vacuum = false; -static TransactionId strategy_vacuum_xid; - +bool strategy_hint_vacuum = false; -#define T1_TARGET (StrategyControl->target_T1_size) -#define B1_LENGTH (StrategyControl->listSize[STRAT_LIST_B1]) -#define T1_LENGTH (StrategyControl->listSize[STRAT_LIST_T1]) -#define T2_LENGTH (StrategyControl->listSize[STRAT_LIST_T2]) -#define B2_LENGTH (StrategyControl->listSize[STRAT_LIST_B2]) - - -/* - * Macro to remove a CDB from whichever list it currently is on - */ -#define STRAT_LIST_REMOVE(cdb) \ -do { \ - Assert((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \ - if ((cdb)->prev < 0) \ - StrategyControl->listHead[(cdb)->list] = (cdb)->next; \ - else \ - StrategyCDB[(cdb)->prev].next = (cdb)->next; \ - if ((cdb)->next < 0) \ - StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \ - else \ - StrategyCDB[(cdb)->next].prev = (cdb)->prev; \ - StrategyControl->listSize[(cdb)->list]--; \ - (cdb)->list = STRAT_LIST_UNUSED; \ -} while(0) - -/* - * Macro to add a CDB to the tail of a list (MRU position) - */ -#define STRAT_MRU_INSERT(cdb,l) \ -do { \ - Assert((cdb)->list == STRAT_LIST_UNUSED); \ - if (StrategyControl->listTail[(l)] < 0) \ - { \ - (cdb)->prev = (cdb)->next = -1; \ - StrategyControl->listHead[(l)] = \ - StrategyControl->listTail[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - else \ - { \ - (cdb)->next = -1; \ - (cdb)->prev = StrategyControl->listTail[(l)]; \ - StrategyCDB[StrategyControl->listTail[(l)]].next = \ - ((cdb) - StrategyCDB); \ - StrategyControl->listTail[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - StrategyControl->listSize[(l)]++; \ - (cdb)->list = (l); \ -} while(0) /* - * Macro to add a CDB to the head of a list (LRU position) - */ -#define STRAT_LRU_INSERT(cdb,l) \ -do { \ - Assert((cdb)->list == STRAT_LIST_UNUSED); \ - if (StrategyControl->listHead[(l)] < 0) \ - { \ - (cdb)->prev = (cdb)->next = -1; \ - StrategyControl->listHead[(l)] = \ - StrategyControl->listTail[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - else \ - { \ - (cdb)->prev = -1; \ - (cdb)->next = StrategyControl->listHead[(l)]; \ - StrategyCDB[StrategyControl->listHead[(l)]].prev = \ - ((cdb) - StrategyCDB); \ - StrategyControl->listHead[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - StrategyControl->listSize[(l)]++; \ - (cdb)->list = (l); \ -} while(0) - - -/* - * Printout for use when DebugSharedBuffers is enabled - */ -static void -StrategyStatsDump(void) -{ - time_t now = time(NULL); - - if (StrategyControl->stat_report + DebugSharedBuffers < now) - { - long all_hit, - b1_hit, - t1_hit, - t2_hit, - b2_hit; - int id, - t1_clean, - t2_clean; - ErrorContextCallback *errcxtold; - - id = StrategyControl->listHead[STRAT_LIST_T1]; - t1_clean = 0; - while (id >= 0) - { - if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY) - break; - t1_clean++; - id = StrategyCDB[id].next; - } - id = StrategyControl->listHead[STRAT_LIST_T2]; - t2_clean = 0; - while (id >= 0) - { - if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY) - break; - t2_clean++; - id = StrategyCDB[id].next; - } - - if (StrategyControl->num_lookup == 0) - all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0; - else - { - b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 / - StrategyControl->num_lookup); - t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 / - StrategyControl->num_lookup); - t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 / - StrategyControl->num_lookup); - b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 / - StrategyControl->num_lookup); - all_hit = b1_hit + t1_hit + t2_hit + b2_hit; - } - - errcxtold = error_context_stack; - error_context_stack = NULL; - elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d", - T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH); - elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%", - all_hit, b1_hit, t1_hit, t2_hit, b2_hit); - elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d", - t1_clean, t2_clean); - error_context_stack = errcxtold; - - StrategyControl->num_lookup = 0; - StrategyControl->num_hit[STRAT_LIST_B1] = 0; - StrategyControl->num_hit[STRAT_LIST_T1] = 0; - StrategyControl->num_hit[STRAT_LIST_T2] = 0; - StrategyControl->num_hit[STRAT_LIST_B2] = 0; - StrategyControl->stat_report = now; - } -} - -/* - * StrategyBufferLookup - * - * Lookup a page request in the cache directory. A buffer is only - * returned for a T1 or T2 cache hit. B1 and B2 hits are just - * remembered here, to possibly affect the behaviour later. + * StrategyGetBuffer * - * recheck indicates we are rechecking after I/O wait; do not change - * internal status in this case. + * Called by the bufmgr to get the next candidate buffer to use in + * BufferAlloc(). The only hard requirement BufferAlloc() has is that + * the selected buffer must not currently be pinned by anyone. * - * *cdb_found_index is set to the index of the found CDB, or -1 if none. - * This is not intended to be used by the caller, except to pass to - * StrategyReplaceBuffer(). + * To ensure that no one else can pin the buffer before we do, we must + * return the buffer with the buffer header spinlock still held. That + * means that we return with the BufFreelistLock still held, as well; + * the caller must release that lock once the spinlock is dropped. */ BufferDesc * -StrategyBufferLookup(BufferTag *tagPtr, bool recheck, - int *cdb_found_index) +StrategyGetBuffer(void) { - BufferStrategyCDB *cdb; - - /* Optional stats printout */ - if (DebugSharedBuffers > 0) - StrategyStatsDump(); - - /* - * Count lookups - */ - StrategyControl->num_lookup++; - - /* - * Lookup the block in the shared hash table - */ - *cdb_found_index = BufTableLookup(tagPtr); - - /* - * Done if complete CDB lookup miss - */ - if (*cdb_found_index < 0) - return NULL; + BufferDesc *buf; + int trycounter; - /* - * We found a CDB - */ - cdb = &StrategyCDB[*cdb_found_index]; + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* - * Count hits + * Try to get a buffer from the freelist. Note that the freeNext fields + * are considered to be protected by the BufFreelistLock not the + * individual buffer spinlocks, so it's OK to manipulate them without + * holding the spinlock. */ - StrategyControl->num_hit[cdb->list]++; - - /* - * If this is a T2 hit, we simply move the CDB to the T2 MRU position - * and return the found buffer. - * - * A CDB in T2 cannot have t1_vacuum set, so we needn't check. However, - * if the current process is VACUUM then it doesn't promote to MRU. - */ - if (cdb->list == STRAT_LIST_T2) + while (StrategyControl->firstFreeBuffer >= 0) { - if (!strategy_hint_vacuum) - { - STRAT_LIST_REMOVE(cdb); - STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); - } - - return &BufferDescriptors[cdb->buf_id]; - } + buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; + Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); - /* - * If this is a T1 hit, we move the buffer to the T2 MRU only if - * another transaction had read it into T1, *and* neither transaction - * is a VACUUM. This is required because any UPDATE or DELETE in - * PostgreSQL does multiple ReadBuffer(), first during the scan, later - * during the heap_update() or heap_delete(). Otherwise move to T1 - * MRU. VACUUM doesn't even get to make that happen. - */ - if (cdb->list == STRAT_LIST_T1) - { - if (!strategy_hint_vacuum) - { - if (!cdb->t1_vacuum && - !TransactionIdEquals(cdb->t1_xid, GetTopTransactionId())) - { - STRAT_LIST_REMOVE(cdb); - STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); - } - else - { - STRAT_LIST_REMOVE(cdb); - STRAT_MRU_INSERT(cdb, STRAT_LIST_T1); - - /* - * If a non-VACUUM process references a page recently - * loaded by VACUUM, clear the stigma; the state will now - * be the same as if this process loaded it originally. - */ - if (cdb->t1_vacuum) - { - cdb->t1_xid = GetTopTransactionId(); - cdb->t1_vacuum = false; - } - } - } + /* Unconditionally remove buffer from freelist */ + StrategyControl->firstFreeBuffer = buf->freeNext; + buf->freeNext = FREENEXT_NOT_IN_LIST; - return &BufferDescriptors[cdb->buf_id]; + /* + * If the buffer is pinned or has a nonzero usage_count, + * we cannot use it; discard it and retry. (This can only happen + * if VACUUM put a valid buffer in the freelist and then someone + * else used it before we got to it.) + */ + LockBufHdr(buf); + if (buf->refcount == 0 && buf->usage_count == 0) + return buf; + UnlockBufHdr(buf); } - /* - * In the case of a recheck we don't care about B1 or B2 hits here. - * The bufmgr does this call only to make sure no-one faulted in the - * block while we where busy flushing another; we don't want to doubly - * adjust the T1target. - * - * Now for this really to end up as a B1 or B2 cache hit, we must have - * been flushing for quite some time as the block not only must have - * been read, but also traveled through the queue and evicted from the - * T cache again already. - * - * VACUUM re-reads shouldn't adjust the target either. - */ - if (recheck || strategy_hint_vacuum) - return NULL; - - /* - * Adjust the target size of the T1 cache depending on if this is a B1 - * or B2 hit. - */ - switch (cdb->list) + /* Nothing on the freelist, so run the "clock sweep" algorithm */ + trycounter = NBuffers; + for (;;) { - case STRAT_LIST_B1: - - /* - * B1 hit means that the T1 cache is probably too small. - * Adjust the T1 target size and continue below. - */ - T1_TARGET = Min(T1_TARGET + Max(B2_LENGTH / B1_LENGTH, 1), - NBuffers); - break; - - case STRAT_LIST_B2: - - /* - * B2 hit means that the T2 cache is probably too small. - * Adjust the T1 target size and continue below. - */ - T1_TARGET = Max(T1_TARGET - Max(B1_LENGTH / B2_LENGTH, 1), 0); - break; - - default: - elog(ERROR, "buffer hash table corrupted: CDB->list = %d", - cdb->list); - } - - /* - * Even though we had seen the block in the past, its data is not - * currently in memory ... cache miss to the bufmgr. - */ - return NULL; -} + buf = &BufferDescriptors[StrategyControl->nextVictimBuffer]; + if (++StrategyControl->nextVictimBuffer >= NBuffers) + StrategyControl->nextVictimBuffer = 0; -/* - * StrategyGetBuffer - * - * Called by the bufmgr to get the next candidate buffer to use in - * BufferAlloc(). The only hard requirement BufferAlloc() has is that - * this buffer must not currently be pinned. - * - * *cdb_replace_index is set to the index of the candidate CDB, or -1 if - * none (meaning we are using a previously free buffer). This is not - * intended to be used by the caller, except to pass to - * StrategyReplaceBuffer(). - */ -BufferDesc * -StrategyGetBuffer(int *cdb_replace_index) -{ - int cdb_id; - BufferDesc *buf; - - if (StrategyControl->listFreeBuffers < 0) - { /* - * We don't have a free buffer, must take one from T1 or T2. - * Choose based on trying to converge T1len to T1target. + * If the buffer is pinned or has a nonzero usage_count, + * we cannot use it; decrement the usage_count and keep scanning. */ - if (T1_LENGTH >= Max(1, T1_TARGET)) + LockBufHdr(buf); + if (buf->refcount == 0 && buf->usage_count == 0) + return buf; + if (buf->usage_count > 0) { - /* - * We should take the first unpinned buffer from T1. - */ - cdb_id = StrategyControl->listHead[STRAT_LIST_T1]; - while (cdb_id >= 0) - { - buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; - if (buf->refcount == 0) - { - *cdb_replace_index = cdb_id; - Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1); - return buf; - } - cdb_id = StrategyCDB[cdb_id].next; - } - - /* - * No unpinned T1 buffer found - try T2 cache. - */ - cdb_id = StrategyControl->listHead[STRAT_LIST_T2]; - while (cdb_id >= 0) - { - buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; - if (buf->refcount == 0) - { - *cdb_replace_index = cdb_id; - Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2); - return buf; - } - cdb_id = StrategyCDB[cdb_id].next; - } - - /* - * No unpinned buffers at all!!! - */ - elog(ERROR, "no unpinned buffers available"); + buf->usage_count--; + trycounter = NBuffers; } - else + else if (--trycounter == 0) { /* - * We should take the first unpinned buffer from T2. - */ - cdb_id = StrategyControl->listHead[STRAT_LIST_T2]; - while (cdb_id >= 0) - { - buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; - if (buf->refcount == 0) - { - *cdb_replace_index = cdb_id; - Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2); - return buf; - } - cdb_id = StrategyCDB[cdb_id].next; - } - - /* - * No unpinned T2 buffer found - try T1 cache. - */ - cdb_id = StrategyControl->listHead[STRAT_LIST_T1]; - while (cdb_id >= 0) - { - buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; - if (buf->refcount == 0) - { - *cdb_replace_index = cdb_id; - Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1); - return buf; - } - cdb_id = StrategyCDB[cdb_id].next; - } - - /* - * No unpinned buffers at all!!! + * We've scanned all the buffers without making any state + * changes, so all the buffers are pinned (or were when we + * looked at them). We could hope that someone will free + * one eventually, but it's probably better to fail than to + * risk getting stuck in an infinite loop. */ + UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } - } - else - { - /* There is a completely free buffer available - take it */ - - /* - * Note: This code uses the side effect that a free buffer can - * never be pinned or dirty and therefore the call to - * StrategyReplaceBuffer() will happen without the bufmgr - * releasing the bufmgr-lock in the meantime. That means, that - * there will never be any reason to recheck. Otherwise we would - * leak shared buffers here! - */ - *cdb_replace_index = -1; - buf = &BufferDescriptors[StrategyControl->listFreeBuffers]; - - StrategyControl->listFreeBuffers = buf->bufNext; - buf->bufNext = -1; - - /* Buffer in freelist cannot be pinned */ - Assert(buf->refcount == 0); - Assert(!(buf->flags & BM_DIRTY)); - - return buf; + UnlockBufHdr(buf); } /* not reached */ return NULL; } - /* - * StrategyReplaceBuffer - * - * Called by the buffer manager to inform us that he flushed a buffer - * and is now about to replace the content. Prior to this call, - * the cache algorithm still reports the buffer as in the cache. After - * this call we report the new block, even if IO might still need to - * be done to bring in the new content. + * StrategyFreeBuffer: put a buffer on the freelist * - * cdb_found_index and cdb_replace_index must be the auxiliary values - * returned by previous calls to StrategyBufferLookup and StrategyGetBuffer. + * The buffer is added either at the head or the tail, according to the + * at_head parameter. This allows a small amount of control over how + * quickly the buffer is reused. */ void -StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag, - int cdb_found_index, int cdb_replace_index) +StrategyFreeBuffer(BufferDesc *buf, bool at_head) { - BufferStrategyCDB *cdb_found; - BufferStrategyCDB *cdb_replace; - - if (cdb_found_index >= 0) - { - /* This must have been a ghost buffer cache hit (B1 or B2) */ - cdb_found = &StrategyCDB[cdb_found_index]; - - /* Assert that the buffer remembered in cdb_found is the one */ - /* the buffer manager is currently faulting in */ - Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag)); - - if (cdb_replace_index >= 0) - { - /* We are satisfying it with an evicted T buffer */ - cdb_replace = &StrategyCDB[cdb_replace_index]; - - /* Assert that the buffer remembered in cdb_replace is */ - /* the one the buffer manager has just evicted */ - Assert(cdb_replace->list == STRAT_LIST_T1 || - cdb_replace->list == STRAT_LIST_T2); - Assert(cdb_replace->buf_id == buf->buf_id); - Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag)); - - /* - * Under normal circumstances we move the evicted T list entry - * to the corresponding B list. However, T1 entries that - * exist only because of VACUUM are just thrown into the - * unused list instead. We don't expect them to be touched - * again by the VACUUM, and if we put them into B1 then VACUUM - * would skew T1_target adjusting. - */ - if (cdb_replace->t1_vacuum) - { - BufTableDelete(&(cdb_replace->buf_tag)); - STRAT_LIST_REMOVE(cdb_replace); - cdb_replace->next = StrategyControl->listUnusedCDB; - StrategyControl->listUnusedCDB = cdb_replace_index; - } - else - { - if (cdb_replace->list == STRAT_LIST_T1) - { - STRAT_LIST_REMOVE(cdb_replace); - STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); - } - else - { - STRAT_LIST_REMOVE(cdb_replace); - STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); - } - } - /* And clear its block reference */ - cdb_replace->buf_id = -1; - } - else - { - /* We are satisfying it with an unused buffer */ - } + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); - /* Now the found B CDB gets the buffer and is moved to T2 */ - cdb_found->buf_id = buf->buf_id; - STRAT_LIST_REMOVE(cdb_found); - STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2); - } - else + /* + * It is possible that we are told to put something in the freelist + * that is already in it; don't screw up the list if so. + */ + if (buf->freeNext == FREENEXT_NOT_IN_LIST) { - /* - * This was a complete cache miss, so we need to create a new CDB. - * The goal is to keep T1len+B1len <= c. - */ - if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= NBuffers) - { - /* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */ - cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]]; - - BufTableDelete(&(cdb_found->buf_tag)); - STRAT_LIST_REMOVE(cdb_found); - } - else - { - /* Otherwise, we try to use a free one */ - if (StrategyControl->listUnusedCDB >= 0) - { - cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB]; - StrategyControl->listUnusedCDB = cdb_found->next; - } - else - { - /* If there isn't, we take B2-LRU ... except if */ - /* T1len+B1len+T2len = c ... oh my */ - if (B2_LENGTH > 0) - cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B2]]; - else - cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]]; - - BufTableDelete(&(cdb_found->buf_tag)); - STRAT_LIST_REMOVE(cdb_found); - } - } - - /* Set the CDB's buf_tag and insert it into the hash table */ - cdb_found->buf_tag = *newTag; - BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB)); - - if (cdb_replace_index >= 0) + if (at_head) { - /* - * The buffer was formerly in a T list, move its CDB to the - * corresponding B list - */ - cdb_replace = &StrategyCDB[cdb_replace_index]; - - Assert(cdb_replace->list == STRAT_LIST_T1 || - cdb_replace->list == STRAT_LIST_T2); - Assert(cdb_replace->buf_id == buf->buf_id); - Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag)); - - if (cdb_replace->list == STRAT_LIST_T1) - { - STRAT_LIST_REMOVE(cdb_replace); - STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); - } - else - { - STRAT_LIST_REMOVE(cdb_replace); - STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); - } - /* And clear its block reference */ - cdb_replace->buf_id = -1; + buf->freeNext = StrategyControl->firstFreeBuffer; + if (buf->freeNext < 0) + StrategyControl->lastFreeBuffer = buf->buf_id; + StrategyControl->firstFreeBuffer = buf->buf_id; } else { - /* We are satisfying it with an unused buffer */ - } - - /* Assign the buffer id to the new CDB */ - cdb_found->buf_id = buf->buf_id; - - /* - * Specialized VACUUM optimization. If this complete cache miss - * happened because vacuum needed the page, we place it at the LRU - * position of T1; normally it goes at the MRU position. - */ - if (strategy_hint_vacuum) - { - if (TransactionIdEquals(strategy_vacuum_xid, - GetTopTransactionId())) - STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1); + buf->freeNext = FREENEXT_END_OF_LIST; + if (StrategyControl->firstFreeBuffer < 0) + StrategyControl->firstFreeBuffer = buf->buf_id; else - { - /* VACUUM must have been aborted by error, reset flag */ - strategy_hint_vacuum = false; - STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); - } + BufferDescriptors[StrategyControl->lastFreeBuffer].freeNext = buf->buf_id; + StrategyControl->lastFreeBuffer = buf->buf_id; } - else - STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); - - /* - * Remember the Xid when this buffer went onto T1 to avoid a - * single UPDATE promoting a newcomer straight into T2. Also - * remember if it was loaded for VACUUM. - */ - cdb_found->t1_xid = GetTopTransactionId(); - cdb_found->t1_vacuum = strategy_hint_vacuum; } -} + LWLockRelease(BufFreelistLock); +} /* - * StrategyInvalidateBuffer + * StrategySyncStart -- tell BufferSync where to start syncing * - * Called by the buffer manager to inform us that a buffer content - * is no longer valid. We simply throw away any eventual existing - * buffer hash entry and move the CDB and buffer to the free lists. + * The result is the buffer index of the best buffer to sync first. + * BufferSync() will proceed circularly around the buffer array from there. */ -void -StrategyInvalidateBuffer(BufferDesc *buf) +int +StrategySyncStart(void) { - int cdb_id; - BufferStrategyCDB *cdb; - - /* The buffer cannot be dirty or pinned */ - Assert(!(buf->flags & BM_DIRTY) || !(buf->flags & BM_VALID)); - Assert(buf->refcount == 0); - - /* - * Lookup the cache directory block for this buffer - */ - cdb_id = BufTableLookup(&(buf->tag)); - if (cdb_id < 0) - elog(ERROR, "buffer %d not in buffer hash table", buf->buf_id); - cdb = &StrategyCDB[cdb_id]; + int result; /* - * Remove the CDB from the hashtable and the ARC queue it is currently - * on. + * We could probably dispense with the locking here, but just to be + * safe ... */ - BufTableDelete(&(cdb->buf_tag)); - STRAT_LIST_REMOVE(cdb); - - /* - * Clear out the CDB's buffer tag and association with the buffer and - * add it to the list of unused CDB's - */ - CLEAR_BUFFERTAG(cdb->buf_tag); - cdb->buf_id = -1; - cdb->next = StrategyControl->listUnusedCDB; - StrategyControl->listUnusedCDB = cdb_id; - - /* - * Clear out the buffer's tag and add it to the list of currently - * unused buffers. We must do this to ensure that linear scans of the - * buffer array don't think the buffer is valid. - */ - CLEAR_BUFFERTAG(buf->tag); - buf->flags &= ~(BM_VALID | BM_DIRTY); - buf->cntxDirty = false; - buf->bufNext = StrategyControl->listFreeBuffers; - StrategyControl->listFreeBuffers = buf->buf_id; + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + result = StrategyControl->nextVictimBuffer; + LWLockRelease(BufFreelistLock); + return result; } /* @@ -778,87 +197,6 @@ void StrategyHintVacuum(bool vacuum_active) { strategy_hint_vacuum = vacuum_active; - strategy_vacuum_xid = GetTopTransactionId(); -} - -/* - * StrategyDirtyBufferList - * - * Returns a list of dirty buffers, in priority order for writing. - * Note that the caller may choose not to write them all. - * - * The caller must beware of the possibility that a buffer is no longer dirty, - * or even contains a different page, by the time he reaches it. If it no - * longer contains the same page it need not be written, even if it is (again) - * dirty. - * - * Buffer pointers are stored into buffers[], and corresponding tags into - * buftags[], both of size max_buffers. The function returns the number of - * buffer IDs stored. - */ -int -StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags, - int max_buffers) -{ - int num_buffer_dirty = 0; - int cdb_id_t1; - int cdb_id_t2; - int buf_id; - BufferDesc *buf; - - /* - * Traverse the T1 and T2 list LRU to MRU in "parallel" and add all - * dirty buffers found in that order to the list. The ARC strategy - * keeps all used buffers including pinned ones in the T1 or T2 list. - * So we cannot miss any dirty buffers. - */ - cdb_id_t1 = StrategyControl->listHead[STRAT_LIST_T1]; - cdb_id_t2 = StrategyControl->listHead[STRAT_LIST_T2]; - - while (cdb_id_t1 >= 0 || cdb_id_t2 >= 0) - { - if (cdb_id_t1 >= 0) - { - buf_id = StrategyCDB[cdb_id_t1].buf_id; - buf = &BufferDescriptors[buf_id]; - - if (buf->flags & BM_VALID) - { - if ((buf->flags & BM_DIRTY) || (buf->cntxDirty)) - { - buffers[num_buffer_dirty] = buf; - buftags[num_buffer_dirty] = buf->tag; - num_buffer_dirty++; - if (num_buffer_dirty >= max_buffers) - break; - } - } - - cdb_id_t1 = StrategyCDB[cdb_id_t1].next; - } - - if (cdb_id_t2 >= 0) - { - buf_id = StrategyCDB[cdb_id_t2].buf_id; - buf = &BufferDescriptors[buf_id]; - - if (buf->flags & BM_VALID) - { - if ((buf->flags & BM_DIRTY) || (buf->cntxDirty)) - { - buffers[num_buffer_dirty] = buf; - buftags[num_buffer_dirty] = buf->tag; - num_buffer_dirty++; - if (num_buffer_dirty >= max_buffers) - break; - } - } - - cdb_id_t2 = StrategyCDB[cdb_id_t2].next; - } - } - - return num_buffer_dirty; } @@ -866,21 +204,21 @@ StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags, * StrategyShmemSize * * estimate the size of shared memory used by the freelist-related structures. + * + * Note: for somewhat historical reasons, the buffer lookup hashtable size + * is also determined here. */ int StrategyShmemSize(void) { int size = 0; - /* size of CDB lookup hash table */ - size += BufTableShmemSize(NBuffers * 2); + /* size of lookup hash table */ + size += BufTableShmemSize(NBuffers); /* size of the shared replacement strategy control block */ size += MAXALIGN(sizeof(BufferStrategyControl)); - /* size of the ARC directory blocks */ - size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB)); - return size; } @@ -888,29 +226,26 @@ StrategyShmemSize(void) * StrategyInitialize -- initialize the buffer cache replacement * strategy. * - * Assume: All of the buffers are already building a linked list. + * Assumes: All of the buffers are already built into a linked list. * Only called by postmaster and only during initialization. */ void StrategyInitialize(bool init) { bool found; - int i; /* - * Initialize the shared CDB lookup hashtable + * Initialize the shared buffer lookup hashtable. */ - InitBufTable(NBuffers * 2); + InitBufTable(NBuffers); /* - * Get or create the shared strategy control block and the CDB's + * Get or create the shared strategy control block */ StrategyControl = (BufferStrategyControl *) ShmemInitStruct("Buffer Strategy Status", - sizeof(BufferStrategyControl) + - sizeof(BufferStrategyCDB) * (NBuffers * 2 - 1), + sizeof(BufferStrategyControl), &found); - StrategyCDB = &(StrategyControl->cdb[0]); if (!found) { @@ -923,39 +258,11 @@ StrategyInitialize(bool init) * Grab the whole linked list of free buffers for our strategy. We * assume it was previously set up by InitBufferPool(). */ - StrategyControl->listFreeBuffers = 0; - - /* - * We start off with a target T1 list size of half the available - * cache blocks. - */ - StrategyControl->target_T1_size = NBuffers / 2; - - /* - * Initialize B1, T1, T2 and B2 lists to be empty - */ - for (i = 0; i < STRAT_NUM_LISTS; i++) - { - StrategyControl->listHead[i] = -1; - StrategyControl->listTail[i] = -1; - StrategyControl->listSize[i] = 0; - StrategyControl->num_hit[i] = 0; - } - StrategyControl->num_lookup = 0; - StrategyControl->stat_report = 0; + StrategyControl->firstFreeBuffer = 0; + StrategyControl->lastFreeBuffer = NBuffers - 1; - /* - * All CDB's are linked as the listUnusedCDB - */ - for (i = 0; i < NBuffers * 2; i++) - { - StrategyCDB[i].next = i + 1; - StrategyCDB[i].list = STRAT_LIST_UNUSED; - CLEAR_BUFFERTAG(StrategyCDB[i].buf_tag); - StrategyCDB[i].buf_id = -1; - } - StrategyCDB[NBuffers * 2 - 1].next = -1; - StrategyControl->listUnusedCDB = 0; + /* Initialize the clock sweep pointer */ + StrategyControl->nextVictimBuffer = 0; } else Assert(!init); |