summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/heap/tuptoaster.c67
-rw-r--r--src/backend/commands/cluster.c11
-rw-r--r--src/include/utils/rel.h3
3 files changed, 62 insertions, 19 deletions
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 2f0676f425..5bf690d78c 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -76,7 +76,8 @@ do { \
static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value,
struct varlena *oldexternal, int options);
-static bool toast_valueid_exists(Oid toastrelid, Oid valueid);
+static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
+static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
static struct varlena *toast_fetch_datum(struct varlena * attr);
static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
int32 sliceoffset, int32 length);
@@ -1342,7 +1343,34 @@ toast_save_datum(Relation rel, Datum value,
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
+ {
+ /* This value came from the old toast table; reuse its OID */
toast_pointer.va_valueid = old_toast_pointer.va_valueid;
+
+ /*
+ * There is a corner case here: the table rewrite might have
+ * to copy both live and recently-dead versions of a row, and
+ * those versions could easily reference the same toast value.
+ * When we copy the second or later version of such a row,
+ * reusing the OID will mean we select an OID that's already
+ * in the new toast table. Check for that, and if so, just
+ * fall through without writing the data again.
+ *
+ * While annoying and ugly-looking, this is a good thing
+ * because it ensures that we wind up with only one copy of
+ * the toast value when there is only one copy in the old
+ * toast table. Before we detected this case, we'd have made
+ * multiple copies, wasting space; and what's worse, the
+ * copies belonging to already-deleted heap tuples would not
+ * be reclaimed by VACUUM.
+ */
+ if (toastrel_valueid_exists(toastrel,
+ toast_pointer.va_valueid))
+ {
+ /* Match, so short-circuit the data storage loop below */
+ data_todo = 0;
+ }
+ }
}
if (toast_pointer.va_valueid == InvalidOid)
{
@@ -1356,8 +1384,8 @@ toast_save_datum(Relation rel, Datum value,
GetNewOidWithIndex(toastrel,
RelationGetRelid(toastidx),
(AttrNumber) 1);
- } while (toast_valueid_exists(rel->rd_toastoid,
- toast_pointer.va_valueid));
+ } while (toastid_valueid_exists(rel->rd_toastoid,
+ toast_pointer.va_valueid));
}
}
@@ -1495,25 +1523,19 @@ toast_delete_datum(Relation rel, Datum value)
/* ----------
- * toast_valueid_exists -
+ * toastrel_valueid_exists -
*
* Test whether a toast value with the given ID exists in the toast relation
* ----------
*/
static bool
-toast_valueid_exists(Oid toastrelid, Oid valueid)
+toastrel_valueid_exists(Relation toastrel, Oid valueid)
{
bool result = false;
- Relation toastrel;
ScanKeyData toastkey;
SysScanDesc toastscan;
/*
- * Open the toast relation
- */
- toastrel = heap_open(toastrelid, AccessShareLock);
-
- /*
* Setup a scan key to find chunks with matching va_valueid
*/
ScanKeyInit(&toastkey,
@@ -1530,10 +1552,27 @@ toast_valueid_exists(Oid toastrelid, Oid valueid)
if (systable_getnext(toastscan) != NULL)
result = true;
- /*
- * End scan and close relations
- */
systable_endscan(toastscan);
+
+ return result;
+}
+
+/* ----------
+ * toastid_valueid_exists -
+ *
+ * As above, but work from toast rel's OID not an open relation
+ * ----------
+ */
+static bool
+toastid_valueid_exists(Oid toastrelid, Oid valueid)
+{
+ bool result;
+ Relation toastrel;
+
+ toastrel = heap_open(toastrelid, AccessShareLock);
+
+ result = toastrel_valueid_exists(toastrel, valueid);
+
heap_close(toastrel, AccessShareLock);
return result;
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 76ea020512..5dec4298e2 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -800,16 +800,19 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
* When doing swap by content, any toast pointers written into NewHeap
* must use the old toast table's OID, because that's where the toast
* data will eventually be found. Set this up by setting rd_toastoid.
- * This also tells tuptoaster.c to preserve the toast value OIDs,
- * which we want so as not to invalidate toast pointers in system
- * catalog caches.
+ * This also tells toast_save_datum() to preserve the toast value
+ * OIDs, which we want so as not to invalidate toast pointers in
+ * system catalog caches, and to avoid making multiple copies of a
+ * single toast value.
*
* Note that we must hold NewHeap open until we are done writing data,
* since the relcache will not guarantee to remember this setting once
* the relation is closed. Also, this technique depends on the fact
* that no one will try to read from the NewHeap until after we've
* finished writing it and swapping the rels --- otherwise they could
- * follow the toast pointers to the wrong place.
+ * follow the toast pointers to the wrong place. (It would actually
+ * work for values copied over from the old toast table, but not for
+ * any values that we toast which were previously not toasted.)
*/
NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
}
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index e2c2fa9ae4..57cdc3b940 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -211,7 +211,8 @@ typedef struct RelationData
* have the existing toast table's OID, not the OID of the transient toast
* table. If rd_toastoid isn't InvalidOid, it is the OID to place in
* toast pointers inserted into this rel. (Note it's set on the new
- * version of the main heap, not the toast table itself.)
+ * version of the main heap, not the toast table itself.) This also
+ * causes toast_save_datum() to try to preserve toast value OIDs.
*/
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */