summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/nbtree/nbtpage.c40
-rw-r--r--src/backend/access/nbtree/nbtree.c12
-rw-r--r--src/include/access/nbtree.h4
3 files changed, 38 insertions, 18 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index f30c965428..1e1d8a7e2f 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -37,8 +37,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
BTStack stack);
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
+ BlockNumber scanblkno,
bool *rightsib_empty,
- TransactionId *oldestBtpoXact);
+ TransactionId *oldestBtpoXact,
+ uint32 *ndeleted);
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
BlockNumber *target, BlockNumber *rightsib);
@@ -1301,7 +1303,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
*
* Returns the number of pages successfully deleted (zero if page cannot
* be deleted now; could be more than one if parent or right sibling pages
- * were deleted too).
+ * were deleted too). Note that this does not include pages that we delete
+ * that the btvacuumscan scan has yet to reach; they'll get counted later
+ * instead.
*
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is
* responsible for maintaining *oldestBtpoXact in the case of pages that were
@@ -1311,16 +1315,22 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
* carefully, it's better to run it in a temp context that can be reset
* frequently.
*/
-int
+uint32
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
{
- int ndeleted = 0;
+ uint32 ndeleted = 0;
BlockNumber rightsib;
bool rightsib_empty;
Page page;
BTPageOpaque opaque;
/*
+ * Save original leafbuf block number from caller. Only deleted blocks
+ * that are <= scanblkno get counted in ndeleted return value.
+ */
+ BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
+
+ /*
* "stack" is a search stack leading (approximately) to the target page.
* It is initially NULL, but when iterating, we keep it to avoid
* duplicated search effort.
@@ -1370,8 +1380,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
if (P_ISDELETED(opaque))
ereport(LOG,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg_internal("found deleted block %u while following right link in index \"%s\"",
+ errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
BufferGetBlockNumber(leafbuf),
+ scanblkno,
RelationGetRelationName(rel))));
_bt_relbuf(rel, leafbuf);
@@ -1521,13 +1532,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
while (P_ISHALFDEAD(opaque))
{
/* Check for interrupts in _bt_unlink_halfdead_page */
- if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
- oldestBtpoXact))
+ if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
+ &rightsib_empty, oldestBtpoXact,
+ &ndeleted))
{
/* _bt_unlink_halfdead_page failed, released buffer */
return ndeleted;
}
- ndeleted++;
}
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
@@ -1779,8 +1790,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
* to avoid having to reacquire a lock we already released).
*/
static bool
-_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
- TransactionId *oldestBtpoXact)
+_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
+ bool *rightsib_empty, TransactionId *oldestBtpoXact,
+ uint32 *ndeleted)
{
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
BlockNumber leafleftsib;
@@ -2167,6 +2179,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
*oldestBtpoXact = opaque->btpo.xact;
/*
+ * If btvacuumscan won't revisit this page in a future btvacuumpage call
+ * and count it as deleted then, we count it as deleted by current
+ * btvacuumpage call
+ */
+ if (target <= scanblkno)
+ (*ndeleted)++;
+
+ /*
* Release the target, if it was not the leaf block. The leaf is always
* kept locked.
*/
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 8da25a71d8..9b53e12181 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -1349,17 +1349,17 @@ restart:
if (delete_now)
{
MemoryContext oldcontext;
- int ndel;
/* Run pagedel in a temp context to avoid memory leakage */
MemoryContextReset(vstate->pagedelcontext);
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
- ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
-
- /* count only this page, else may double-count parent */
- if (ndel)
- stats->pages_deleted++;
+ /*
+ * We trust the _bt_pagedel return value because it does not include
+ * any page that a future call here from btvacuumscan is expected to
+ * count. There will be no double-counting.
+ */
+ stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 6640581fd6..e8b7a5fde1 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -764,8 +764,8 @@ extern void _bt_delitems_delete(Relation rel, Buffer buf,
extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
OffsetNumber *itemnos, int nitems,
BlockNumber lastBlockVacuumed);
-extern int _bt_pagedel(Relation rel, Buffer leafbuf,
- TransactionId *oldestBtpoXact);
+extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
+ TransactionId *oldestBtpoXact);
/*
* prototypes for functions in nbtsearch.c