summaryrefslogtreecommitdiff
path: root/src/backend/partitioning
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2021-04-22 15:13:25 -0400
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2021-04-22 15:13:25 -0400
commit8aba9322511f718f12b618470d8c07f0ee5f0700 (patch)
tree89d4a6f405b4088e7c8767410e3e7c9c97c598c5 /src/backend/partitioning
parent82b13dbc4d4b46f71ca95ce1cc15c425deff5957 (diff)
downloadpostgresql-8aba9322511f718f12b618470d8c07f0ee5f0700.tar.gz
Fix relcache inconsistency hazard in partition detach
During queries coming from ri_triggers.c, we need to omit partitions that are marked pending detach -- otherwise, the RI query is tricked into allowing a row into the referencing table whose corresponding row is in the detached partition. Which is bogus: once the detach operation completes, the row becomes an orphan. However, the code was not doing that in repeatable-read transactions, because relcache kept a copy of the partition descriptor that included the partition, and used it in the RI query. This commit changes the partdesc cache code to only keep descriptors that aren't dependent on a snapshot (namely: those where no detached partition exist, and those where detached partitions are included). When a partdesc-without- detached-partitions is requested, we create one afresh each time; also, those partdescs are stored in PortalContext instead of CacheMemoryContext. find_inheritance_children gets a new output *detached_exist boolean, which indicates whether any partition marked pending-detach is found. Its "include_detached" input flag is changed to "omit_detached", because that name captures desired the semantics more naturally. CreatePartitionDirectory() and RelationGetPartitionDesc() arguments are identically renamed. This was noticed because a buildfarm member that runs with relcache clobbering, which would not keep the improperly cached partdesc, broke one test, which led us to realize that the expected output of that test was bogus. This commit also corrects that expected output. Author: Amit Langote <amitlangote09@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://postgr.es/m/3269784.1617215412@sss.pgh.pa.us
Diffstat (limited to 'src/backend/partitioning')
-rw-r--r--src/backend/partitioning/partbounds.c6
-rw-r--r--src/backend/partitioning/partdesc.c100
2 files changed, 72 insertions, 34 deletions
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index 1290d45963..c9c789297d 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2798,7 +2798,7 @@ check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec, ParseState *pstate)
{
PartitionKey key = RelationGetPartitionKey(parent);
- PartitionDesc partdesc = RelationGetPartitionDesc(parent, true);
+ PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int with = -1;
bool overlap = false;
@@ -3991,7 +3991,7 @@ get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
{
int i;
int ndatums = 0;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
PartitionBoundInfo boundinfo = pdesc->boundinfo;
if (boundinfo)
@@ -4191,7 +4191,7 @@ get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
if (spec->is_default)
{
List *or_expr_args = NIL;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
Oid *inhoids = pdesc->oids;
int nparts = pdesc->nparts,
i;
diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c
index 58570fecfd..12ef36a73e 100644
--- a/src/backend/partitioning/partdesc.c
+++ b/src/backend/partitioning/partdesc.c
@@ -37,7 +37,7 @@ typedef struct PartitionDirectoryData
{
MemoryContext pdir_mcxt;
HTAB *pdir_hash;
- bool include_detached;
+ bool omit_detached;
} PartitionDirectoryData;
typedef struct PartitionDirectoryEntry
@@ -47,7 +47,8 @@ typedef struct PartitionDirectoryEntry
PartitionDesc pd;
} PartitionDirectoryEntry;
-static void RelationBuildPartitionDesc(Relation rel, bool include_detached);
+static PartitionDesc RelationBuildPartitionDesc(Relation rel,
+ bool omit_detached);
/*
@@ -60,18 +61,29 @@ static void RelationBuildPartitionDesc(Relation rel, bool include_detached);
* for callers to continue to use that pointer as long as (a) they hold the
* relation open, and (b) they hold a relation lock strong enough to ensure
* that the data doesn't become stale.
+ *
+ * The above applies to partition descriptors that are complete regarding
+ * partitions concurrently being detached. When a descriptor that omits
+ * partitions being detached is requested (and such partitions are present),
+ * said descriptor is not part of relcache and so it isn't freed by
+ * invalidations either. Caller must not use such a descriptor beyond the
+ * current Portal.
*/
PartitionDesc
-RelationGetPartitionDesc(Relation rel, bool include_detached)
+RelationGetPartitionDesc(Relation rel, bool omit_detached)
{
- if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
- return NULL;
+ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
- if (unlikely(rel->rd_partdesc == NULL ||
- rel->rd_partdesc->includes_detached != include_detached))
- RelationBuildPartitionDesc(rel, include_detached);
+ /*
+ * If relcache has a partition descriptor, use that. However, we can only
+ * do so when we are asked to include all partitions including detached;
+ * and also when we know that there are no detached partitions.
+ */
+ if (likely(rel->rd_partdesc &&
+ (!rel->rd_partdesc->detached_exist || !omit_detached)))
+ return rel->rd_partdesc;
- return rel->rd_partdesc;
+ return RelationBuildPartitionDesc(rel, omit_detached);
}
/*
@@ -88,9 +100,15 @@ RelationGetPartitionDesc(Relation rel, bool include_detached)
* context the current context except in very brief code sections, out of fear
* that some of our callees allocate memory on their own which would be leaked
* permanently.
+ *
+ * As a special case, partition descriptors that are requested to omit
+ * partitions being detached (and which contain such partitions) are transient
+ * and are not associated with the relcache entry. Such descriptors only last
+ * through the requesting Portal, so we use the corresponding memory context
+ * for them.
*/
-static void
-RelationBuildPartitionDesc(Relation rel, bool include_detached)
+static PartitionDesc
+RelationBuildPartitionDesc(Relation rel, bool omit_detached)
{
PartitionDesc partdesc;
PartitionBoundInfo boundinfo = NULL;
@@ -98,6 +116,7 @@ RelationBuildPartitionDesc(Relation rel, bool include_detached)
PartitionBoundSpec **boundspecs = NULL;
Oid *oids = NULL;
bool *is_leaf = NULL;
+ bool detached_exist;
ListCell *cell;
int i,
nparts;
@@ -112,8 +131,8 @@ RelationBuildPartitionDesc(Relation rel, bool include_detached)
* concurrently, whatever this function returns will be accurate as of
* some well-defined point in time.
*/
- inhoids = find_inheritance_children(RelationGetRelid(rel), include_detached,
- NoLock);
+ inhoids = find_inheritance_children(RelationGetRelid(rel), omit_detached,
+ NoLock, &detached_exist);
nparts = list_length(inhoids);
/* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
@@ -234,6 +253,7 @@ RelationBuildPartitionDesc(Relation rel, bool include_detached)
partdesc = (PartitionDescData *)
MemoryContextAllocZero(new_pdcxt, sizeof(PartitionDescData));
partdesc->nparts = nparts;
+ partdesc->detached_exist = detached_exist;
/* If there are no partitions, the rest of the partdesc can stay zero */
if (nparts > 0)
{
@@ -241,7 +261,6 @@ RelationBuildPartitionDesc(Relation rel, bool include_detached)
partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
- partdesc->includes_detached = include_detached;
/*
* Assign OIDs from the original array into mapped indexes of the
@@ -261,22 +280,41 @@ RelationBuildPartitionDesc(Relation rel, bool include_detached)
}
/*
- * We have a fully valid partdesc ready to store into the relcache.
- * Reparent it so it has the right lifespan.
+ * We have a fully valid partdesc. Reparent it so that it has the right
+ * lifespan, and if appropriate put it into the relation's relcache entry.
*/
- MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
+ if (omit_detached && detached_exist)
+ {
+ /*
+ * A transient partition descriptor is only good for the current
+ * statement, so make it a child of the current portal's context.
+ */
+ MemoryContextSetParent(new_pdcxt, PortalContext);
+ }
+ else
+ {
+ /*
+ * This partdesc goes into relcache.
+ */
- /*
- * But first, a kluge: if there's an old rd_pdcxt, it contains an old
- * partition descriptor that may still be referenced somewhere. Preserve
- * it, while not leaking it, by reattaching it as a child context of the
- * new rd_pdcxt. Eventually it will get dropped by either RelationClose
- * or RelationClearRelation.
- */
- if (rel->rd_pdcxt != NULL)
- MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
- rel->rd_pdcxt = new_pdcxt;
- rel->rd_partdesc = partdesc;
+ MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
+
+ /*
+ * But first, a kluge: if there's an old rd_pdcxt, it contains an old
+ * partition descriptor that may still be referenced somewhere.
+ * Preserve it, while not leaking it, by reattaching it as a child
+ * context of the new rd_pdcxt. Eventually it will get dropped by
+ * either RelationClose or RelationClearRelation.
+ */
+ if (rel->rd_pdcxt != NULL)
+ MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
+ rel->rd_pdcxt = new_pdcxt;
+
+ /* Store it into relcache */
+ rel->rd_partdesc = partdesc;
+ }
+
+ return partdesc;
}
/*
@@ -284,7 +322,7 @@ RelationBuildPartitionDesc(Relation rel, bool include_detached)
* Create a new partition directory object.
*/
PartitionDirectory
-CreatePartitionDirectory(MemoryContext mcxt, bool include_detached)
+CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
{
MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
PartitionDirectory pdir;
@@ -299,7 +337,7 @@ CreatePartitionDirectory(MemoryContext mcxt, bool include_detached)
pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
- pdir->include_detached = include_detached;
+ pdir->omit_detached = omit_detached;
MemoryContextSwitchTo(oldcontext);
return pdir;
@@ -332,7 +370,7 @@ PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
*/
RelationIncrementReferenceCount(rel);
pde->rel = rel;
- pde->pd = RelationGetPartitionDesc(rel, pdir->include_detached);
+ pde->pd = RelationGetPartitionDesc(rel, pdir->omit_detached);
Assert(pde->pd != NULL);
}
return pde->pd;