summaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtpage.c
diff options
context:
space:
mode:
authorPeter Geoghegan <pg@bowt.ie>2020-05-01 09:51:08 -0700
committerPeter Geoghegan <pg@bowt.ie>2020-05-01 09:51:08 -0700
commit40708eaaf3fd8536c245e1131e892ddbd9029218 (patch)
tree0c86fa5aa9bf080186260f22c4fd54a8cc284605 /src/backend/access/nbtree/nbtpage.c
parent05b73261cc26bf06d0c3896f67b3eaaee3b50496 (diff)
downloadpostgresql-40708eaaf3fd8536c245e1131e892ddbd9029218.tar.gz
Fix undercounting in VACUUM VERBOSE output.
The logic for determining how many nbtree pages in an index are deleted pages sometimes undercounted pages. Pages that were deleted by the current VACUUM operation (as opposed to some previous VACUUM operation whose deleted pages have yet to be reused) were sometimes overlooked. The final count is exposed to users through VACUUM VERBOSE's "%u index pages have been deleted" output. btvacuumpage() avoided double-counting when _bt_pagedel() deleted more than one page by assuming that only one page was deleted, and that the additional deleted pages would get picked up during a future call to btvacuumpage() by the same VACUUM operation. _bt_pagedel() can legitimately delete pages that the btvacuumscan() scan will not visit again, though, so that assumption was slightly faulty. Fix the accounting by teaching _bt_pagedel() about its caller's requirements. It now only reports on pages that it knows btvacuumscan() won't visit again (including the current btvacuumpage() page), so everything works out in the end. This bug has been around forever. Only backpatch to v11, though, to keep _bt_pagedel() is sync on the branches that have today's bugfix commit b0229f26da. Note that this commit changes the signature of _bt_pagedel(), just like commit b0229f26da. Author: Peter Geoghegan Reviewed-By: Masahiko Sawada Discussion: https://postgr.es/m/CAH2-WzkrXBcMQWAYUJMFTTvzx_r4q=pYSjDe07JnUXhe+OZnJA@mail.gmail.com Backpatch: 11-
Diffstat (limited to 'src/backend/access/nbtree/nbtpage.c')
-rw-r--r--src/backend/access/nbtree/nbtpage.c40
1 files changed, 30 insertions, 10 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index f30c965428..1e1d8a7e2f 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -37,8 +37,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
BTStack stack);
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
+ BlockNumber scanblkno,
bool *rightsib_empty,
- TransactionId *oldestBtpoXact);
+ TransactionId *oldestBtpoXact,
+ uint32 *ndeleted);
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
BlockNumber *target, BlockNumber *rightsib);
@@ -1301,7 +1303,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
*
* Returns the number of pages successfully deleted (zero if page cannot
* be deleted now; could be more than one if parent or right sibling pages
- * were deleted too).
+ * were deleted too). Note that this does not include pages that we delete
+ * that the btvacuumscan scan has yet to reach; they'll get counted later
+ * instead.
*
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is
* responsible for maintaining *oldestBtpoXact in the case of pages that were
@@ -1311,16 +1315,22 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
* carefully, it's better to run it in a temp context that can be reset
* frequently.
*/
-int
+uint32
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
{
- int ndeleted = 0;
+ uint32 ndeleted = 0;
BlockNumber rightsib;
bool rightsib_empty;
Page page;
BTPageOpaque opaque;
/*
+ * Save original leafbuf block number from caller. Only deleted blocks
+ * that are <= scanblkno get counted in ndeleted return value.
+ */
+ BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
+
+ /*
* "stack" is a search stack leading (approximately) to the target page.
* It is initially NULL, but when iterating, we keep it to avoid
* duplicated search effort.
@@ -1370,8 +1380,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
if (P_ISDELETED(opaque))
ereport(LOG,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg_internal("found deleted block %u while following right link in index \"%s\"",
+ errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
BufferGetBlockNumber(leafbuf),
+ scanblkno,
RelationGetRelationName(rel))));
_bt_relbuf(rel, leafbuf);
@@ -1521,13 +1532,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
while (P_ISHALFDEAD(opaque))
{
/* Check for interrupts in _bt_unlink_halfdead_page */
- if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
- oldestBtpoXact))
+ if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
+ &rightsib_empty, oldestBtpoXact,
+ &ndeleted))
{
/* _bt_unlink_halfdead_page failed, released buffer */
return ndeleted;
}
- ndeleted++;
}
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
@@ -1779,8 +1790,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
* to avoid having to reacquire a lock we already released).
*/
static bool
-_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
- TransactionId *oldestBtpoXact)
+_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
+ bool *rightsib_empty, TransactionId *oldestBtpoXact,
+ uint32 *ndeleted)
{
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
BlockNumber leafleftsib;
@@ -2167,6 +2179,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
*oldestBtpoXact = opaque->btpo.xact;
/*
+ * If btvacuumscan won't revisit this page in a future btvacuumpage call
+ * and count it as deleted then, we count it as deleted by current
+ * btvacuumpage call
+ */
+ if (target <= scanblkno)
+ (*ndeleted)++;
+
+ /*
* Release the target, if it was not the leaf block. The leaf is always
* kept locked.
*/