summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2011-03-08 20:13:52 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2011-03-08 20:30:53 +0200
commit93d888232e80e4d676e24fe93ae6d27459d966be (patch)
tree97be0432179bc53c6dc87b5c787461eec7fc9bef
parent915cd10c164be27dac0134efb16358f7d7564e8d (diff)
downloadpostgresql-93d888232e80e4d676e24fe93ae6d27459d966be.tar.gz
Don't throw a warning if vacuum sees PD_ALL_VISIBLE flag set on a page that
contains newly-inserted tuples that according to our OldestXmin are not yet visible to everyone. The value returned by GetOldestXmin() is conservative, and it can move backwards on repeated calls, so if we see that contradiction between the PD_ALL_VISIBLE flag and status of tuples on the page, we have to assume it's because an earlier vacuum calculated a higher OldestXmin value, and all the tuples really are visible to everyone. We have received several reports of this bug, with the "PD_ALL_VISIBLE flag was incorrectly set in relation ..." warning appearing in logs. We were finally able to hunt it down with David Gould's help to run extra diagnostics in an environment where this happened frequently. Also reword the warning, per Robert Haas' suggestion, to not imply that the PD_ALL_VISIBLE flag is necessarily at fault, as it might also be a symptom of corruption on a tuple header. Backpatch to 8.4, where the PD_ALL_VISIBLE flag was introduced.
-rw-r--r--src/backend/commands/vacuumlazy.c20
-rw-r--r--src/backend/storage/ipc/procarray.c12
2 files changed, 30 insertions, 2 deletions
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 515ea9173d..a5c024cc19 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -349,6 +349,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Size freespace;
bool all_visible_according_to_vm = false;
bool all_visible;
+ bool has_dead_tuples;
/*
* Skip pages that don't require vacuuming according to the visibility
@@ -500,6 +501,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* requiring freezing.
*/
all_visible = true;
+ has_dead_tuples = false;
nfrozen = 0;
hastup = false;
prev_dead_count = vacrelstats->num_dead_tuples;
@@ -640,6 +642,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
&vacrelstats->latestRemovedXid);
tups_vacuumed += 1;
+ has_dead_tuples = true;
}
else
{
@@ -702,9 +705,22 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
PageSetAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
}
- else if (PageIsAllVisible(page) && !all_visible)
+ /*
+ * It's possible for the value returned by GetOldestXmin() to move
+ * backwards, so it's not wrong for us to see tuples that appear to
+ * not be visible to everyone yet, while PD_ALL_VISIBLE is already
+ * set. The real safe xmin value never moves backwards, but
+ * GetOldestXmin() is conservative and sometimes returns a value
+ * that's unnecessarily small, so if we see that contradiction it
+ * just means that the tuples that we think are not visible to
+ * everyone yet actually are, and the PD_ALL_VISIBLE flag is correct.
+ *
+ * There should never be dead tuples on a page with PD_ALL_VISIBLE
+ * set, however.
+ */
+ else if (PageIsAllVisible(page) && has_dead_tuples)
{
- elog(WARNING, "PD_ALL_VISIBLE flag was incorrectly set in relation \"%s\" page %u",
+ elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
relname, blkno);
PageClearAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 2473881e8f..65fca7171a 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -1000,6 +1000,18 @@ TransactionIdIsActive(TransactionId xid)
* This is also used to determine where to truncate pg_subtrans. allDbs
* must be TRUE for that case, and ignoreVacuum FALSE.
*
+ * Note: it's possible for the calculated value to move backwards on repeated
+ * calls. The calculated value is conservative, so that anything older is
+ * definitely not considered as running by anyone anymore, but the exact
+ * value calculated depends on a number of things. For example, if allDbs is
+ * TRUE and there are no transactions running in the current database,
+ * GetOldestXmin() returns latestCompletedXid. If a transaction begins after
+ * that, its xmin will include in-progress transactions in other databases
+ * that started earlier, so another call will return an lower value. The
+ * return value is also adjusted with vacuum_defer_cleanup_age, so increasing
+ * that setting on the fly is an easy way to have GetOldestXmin() move
+ * backwards.
+ *
* Note: we include all currently running xids in the set of considered xids.
* This ensures that if a just-started xact has not yet set its snapshot,
* when it does set the snapshot it cannot set xmin less than what we compute.